command_submission.c - drivers/accel/habanalabs/common/command_submission.c - Linux source code v6.8

Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2021 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include <uapi/drm/habanalabs_accel.h>
   9#include "habanalabs.h"
  10
  11#include <linux/uaccess.h>
  12#include <linux/slab.h>
  13
  14#define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
  15			HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
  16			HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
  17			HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
  18
  19
  20#define MAX_TS_ITER_NUM 100
  21
  22/**
  23 * enum hl_cs_wait_status - cs wait status
  24 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
  25 * @CS_WAIT_STATUS_COMPLETED: cs completed
  26 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
  27 */
  28enum hl_cs_wait_status {
  29	CS_WAIT_STATUS_BUSY,
  30	CS_WAIT_STATUS_COMPLETED,
  31	CS_WAIT_STATUS_GONE
  32};
  33
  34/*
  35 * Data used while handling wait/timestamp nodes.
  36 * The purpose of this struct is to store the needed data for both operations
  37 * in one variable instead of passing large number of arguments to functions.
  38 */
  39struct wait_interrupt_data {
  40	struct hl_user_interrupt *interrupt;
  41	struct hl_mmap_mem_buf *buf;
  42	struct hl_mem_mgr *mmg;
  43	struct hl_cb *cq_cb;
  44	u64 ts_handle;
  45	u64 ts_offset;
  46	u64 cq_handle;
  47	u64 cq_offset;
  48	u64 target_value;
  49	u64 intr_timeout_us;
  50};
  51
  52static void job_wq_completion(struct work_struct *work);
  53static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
  54				enum hl_cs_wait_status *status, s64 *timestamp);
  55static void cs_do_release(struct kref *ref);
  56
  57static void hl_push_cs_outcome(struct hl_device *hdev,
  58			       struct hl_cs_outcome_store *outcome_store,
  59			       u64 seq, ktime_t ts, int error)
  60{
  61	struct hl_cs_outcome *node;
  62	unsigned long flags;
  63
  64	/*
  65	 * CS outcome store supports the following operations:
  66	 * push outcome - store a recent CS outcome in the store
  67	 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store
  68	 * It uses 2 lists: used list and free list.
  69	 * It has a pre-allocated amount of nodes, each node stores
  70	 * a single CS outcome.
  71	 * Initially, all the nodes are in the free list.
  72	 * On push outcome, a node (any) is taken from the free list, its
  73	 * information is filled in, and the node is moved to the used list.
  74	 * It is possible, that there are no nodes left in the free list.
  75	 * In this case, we will lose some information about old outcomes. We
  76	 * will pop the OLDEST node from the used list, and make it free.
  77	 * On pop, the node is searched for in the used list (using a search
  78	 * index).
  79	 * If found, the node is then removed from the used list, and moved
  80	 * back to the free list. The outcome data that the node contained is
  81	 * returned back to the user.
  82	 */
  83
  84	spin_lock_irqsave(&outcome_store->db_lock, flags);
  85
  86	if (list_empty(&outcome_store->free_list)) {
  87		node = list_last_entry(&outcome_store->used_list,
  88				       struct hl_cs_outcome, list_link);
  89		hash_del(&node->map_link);
  90		dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq);
  91	} else {
  92		node = list_last_entry(&outcome_store->free_list,
  93				       struct hl_cs_outcome, list_link);
  94	}
  95
  96	list_del_init(&node->list_link);
  97
  98	node->seq = seq;
  99	node->ts = ts;
 100	node->error = error;
 101
 102	list_add(&node->list_link, &outcome_store->used_list);
 103	hash_add(outcome_store->outcome_map, &node->map_link, node->seq);
 104
 105	spin_unlock_irqrestore(&outcome_store->db_lock, flags);
 106}
 107
 108static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store,
 109			       u64 seq, ktime_t *ts, int *error)
 110{
 111	struct hl_cs_outcome *node;
 112	unsigned long flags;
 113
 114	spin_lock_irqsave(&outcome_store->db_lock, flags);
 115
 116	hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq)
 117		if (node->seq == seq) {
 118			*ts = node->ts;
 119			*error = node->error;
 120
 121			hash_del(&node->map_link);
 122			list_del_init(&node->list_link);
 123			list_add(&node->list_link, &outcome_store->free_list);
 124
 125			spin_unlock_irqrestore(&outcome_store->db_lock, flags);
 126
 127			return true;
 128		}
 129
 130	spin_unlock_irqrestore(&outcome_store->db_lock, flags);
 131
 132	return false;
 133}
 134
 135static void hl_sob_reset(struct kref *ref)
 136{
 137	struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
 138							kref);
 139	struct hl_device *hdev = hw_sob->hdev;
 140
 141	dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
 142
 143	hdev->asic_funcs->reset_sob(hdev, hw_sob);
 144
 145	hw_sob->need_reset = false;
 146}
 147
 148void hl_sob_reset_error(struct kref *ref)
 149{
 150	struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
 151							kref);
 152	struct hl_device *hdev = hw_sob->hdev;
 153
 154	dev_crit(hdev->dev,
 155		"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
 156		hw_sob->q_idx, hw_sob->sob_id);
 157}
 158
 159void hw_sob_put(struct hl_hw_sob *hw_sob)
 160{
 161	if (hw_sob)
 162		kref_put(&hw_sob->kref, hl_sob_reset);
 163}
 164
 165static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
 166{
 167	if (hw_sob)
 168		kref_put(&hw_sob->kref, hl_sob_reset_error);
 169}
 170
 171void hw_sob_get(struct hl_hw_sob *hw_sob)
 172{
 173	if (hw_sob)
 174		kref_get(&hw_sob->kref);
 175}
 176
 177/**
 178 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
 179 * @sob_base: sob base id
 180 * @sob_mask: sob user mask, each bit represents a sob offset from sob base
 181 * @mask: generated mask
 182 *
 183 * Return: 0 if given parameters are valid
 184 */
 185int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
 186{
 187	int i;
 188
 189	if (sob_mask == 0)
 190		return -EINVAL;
 191
 192	if (sob_mask == 0x1) {
 193		*mask = ~(1 << (sob_base & 0x7));
 194	} else {
 195		/* find msb in order to verify sob range is valid */
 196		for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
 197			if (BIT(i) & sob_mask)
 198				break;
 199
 200		if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
 201			return -EINVAL;
 202
 203		*mask = ~sob_mask;
 204	}
 205
 206	return 0;
 207}
 208
 209static void hl_fence_release(struct kref *kref)
 210{
 211	struct hl_fence *fence =
 212		container_of(kref, struct hl_fence, refcount);
 213	struct hl_cs_compl *hl_cs_cmpl =
 214		container_of(fence, struct hl_cs_compl, base_fence);
 215
 216	kfree(hl_cs_cmpl);
 217}
 218
 219void hl_fence_put(struct hl_fence *fence)
 220{
 221	if (IS_ERR_OR_NULL(fence))
 222		return;
 223	kref_put(&fence->refcount, hl_fence_release);
 224}
 225
 226void hl_fences_put(struct hl_fence **fence, int len)
 227{
 228	int i;
 229
 230	for (i = 0; i < len; i++, fence++)
 231		hl_fence_put(*fence);
 232}
 233
 234void hl_fence_get(struct hl_fence *fence)
 235{
 236	if (fence)
 237		kref_get(&fence->refcount);
 238}
 239
 240static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 241{
 242	kref_init(&fence->refcount);
 243	fence->cs_sequence = sequence;
 244	fence->error = 0;
 245	fence->timestamp = ktime_set(0, 0);
 246	fence->mcs_handling_done = false;
 247	init_completion(&fence->completion);
 248}
 249
 250void cs_get(struct hl_cs *cs)
 251{
 252	kref_get(&cs->refcount);
 253}
 254
 255static int cs_get_unless_zero(struct hl_cs *cs)
 256{
 257	return kref_get_unless_zero(&cs->refcount);
 258}
 259
 260static void cs_put(struct hl_cs *cs)
 261{
 262	kref_put(&cs->refcount, cs_do_release);
 263}
 264
 265static void cs_job_do_release(struct kref *ref)
 266{
 267	struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
 268
 269	kfree(job);
 270}
 271
 272static void hl_cs_job_put(struct hl_cs_job *job)
 273{
 274	kref_put(&job->refcount, cs_job_do_release);
 275}
 276
 277bool cs_needs_completion(struct hl_cs *cs)
 278{
 279	/* In case this is a staged CS, only the last CS in sequence should
 280	 * get a completion, any non staged CS will always get a completion
 281	 */
 282	if (cs->staged_cs && !cs->staged_last)
 283		return false;
 284
 285	return true;
 286}
 287
 288bool cs_needs_timeout(struct hl_cs *cs)
 289{
 290	/* In case this is a staged CS, only the first CS in sequence should
 291	 * get a timeout, any non staged CS will always get a timeout
 292	 */
 293	if (cs->staged_cs && !cs->staged_first)
 294		return false;
 295
 296	return true;
 297}
 298
 299static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 300{
 301	/* Patched CB is created for external queues jobs */
 302	return (job->queue_type == QUEUE_TYPE_EXT);
 303}
 304
 305/*
 306 * cs_parser - parse the user command submission
 307 *
 308 * @hpriv	: pointer to the private data of the fd
 309 * @job        : pointer to the job that holds the command submission info
 310 *
 311 * The function parses the command submission of the user. It calls the
 312 * ASIC specific parser, which returns a list of memory blocks to send
 313 * to the device as different command buffers
 314 *
 315 */
 316static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 317{
 318	struct hl_device *hdev = hpriv->hdev;
 319	struct hl_cs_parser parser;
 320	int rc;
 321
 322	parser.ctx_id = job->cs->ctx->asid;
 323	parser.cs_sequence = job->cs->sequence;
 324	parser.job_id = job->id;
 325
 326	parser.hw_queue_id = job->hw_queue_id;
 327	parser.job_userptr_list = &job->userptr_list;
 328	parser.patched_cb = NULL;
 329	parser.user_cb = job->user_cb;
 330	parser.user_cb_size = job->user_cb_size;
 331	parser.queue_type = job->queue_type;
 332	parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
 333	job->patched_cb = NULL;
 334	parser.completion = cs_needs_completion(job->cs);
 335
 336	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 337
 338	if (is_cb_patched(hdev, job)) {
 339		if (!rc) {
 340			job->patched_cb = parser.patched_cb;
 341			job->job_cb_size = parser.patched_cb_size;
 342			job->contains_dma_pkt = parser.contains_dma_pkt;
 343			atomic_inc(&job->patched_cb->cs_cnt);
 344		}
 345
 346		/*
 347		 * Whether the parsing worked or not, we don't need the
 348		 * original CB anymore because it was already parsed and
 349		 * won't be accessed again for this CS
 350		 */
 351		atomic_dec(&job->user_cb->cs_cnt);
 352		hl_cb_put(job->user_cb);
 353		job->user_cb = NULL;
 354	} else if (!rc) {
 355		job->job_cb_size = job->user_cb_size;
 356	}
 357
 358	return rc;
 359}
 360
 361static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 362{
 363	struct hl_cs *cs = job->cs;
 364
 365	if (is_cb_patched(hdev, job)) {
 366		hl_userptr_delete_list(hdev, &job->userptr_list);
 367
 368		/*
 369		 * We might arrive here from rollback and patched CB wasn't
 370		 * created, so we need to check it's not NULL
 371		 */
 372		if (job->patched_cb) {
 373			atomic_dec(&job->patched_cb->cs_cnt);
 374			hl_cb_put(job->patched_cb);
 375		}
 376	}
 377
 378	/* For H/W queue jobs, if a user CB was allocated by driver,
 379	 * the user CB isn't released in cs_parser() and thus should be
 380	 * released here. This is also true for INT queues jobs which were
 381	 * allocated by driver.
 382	 */
 383	if (job->is_kernel_allocated_cb &&
 384			(job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) {
 385		atomic_dec(&job->user_cb->cs_cnt);
 386		hl_cb_put(job->user_cb);
 387	}
 388
 389	/*
 390	 * This is the only place where there can be multiple threads
 391	 * modifying the list at the same time
 392	 */
 393	spin_lock(&cs->job_lock);
 394	list_del(&job->cs_node);
 395	spin_unlock(&cs->job_lock);
 396
 397	hl_debugfs_remove_job(hdev, job);
 398
 399	/* We decrement reference only for a CS that gets completion
 400	 * because the reference was incremented only for this kind of CS
 401	 * right before it was scheduled.
 402	 *
 403	 * In staged submission, only the last CS marked as 'staged_last'
 404	 * gets completion, hence its release function will be called from here.
 405	 * As for all the rest CS's in the staged submission which do not get
 406	 * completion, their CS reference will be decremented by the
 407	 * 'staged_last' CS during the CS release flow.
 408	 * All relevant PQ CI counters will be incremented during the CS release
 409	 * flow by calling 'hl_hw_queue_update_ci'.
 410	 */
 411	if (cs_needs_completion(cs) &&
 412			(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
 413
 414		/* In CS based completions, the timestamp is already available,
 415		 * so no need to extract it from job
 416		 */
 417		if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
 418			cs->completion_timestamp = job->timestamp;
 419
 420		cs_put(cs);
 421	}
 422
 423	hl_cs_job_put(job);
 424}
 425
 426/*
 427 * hl_staged_cs_find_first - locate the first CS in this staged submission
 428 *
 429 * @hdev: pointer to device structure
 430 * @cs_seq: staged submission sequence number
 431 *
 432 * @note: This function must be called under 'hdev->cs_mirror_lock'
 433 *
 434 * Find and return a CS pointer with the given sequence
 435 */
 436struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
 437{
 438	struct hl_cs *cs;
 439
 440	list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
 441		if (cs->staged_cs && cs->staged_first &&
 442				cs->sequence == cs_seq)
 443			return cs;
 444
 445	return NULL;
 446}
 447
 448/*
 449 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
 450 *
 451 * @hdev: pointer to device structure
 452 * @cs: staged submission member
 453 *
 454 */
 455bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
 456{
 457	struct hl_cs *last_entry;
 458
 459	last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
 460								staged_cs_node);
 461
 462	if (last_entry->staged_last)
 463		return true;
 464
 465	return false;
 466}
 467
 468/*
 469 * staged_cs_get - get CS reference if this CS is a part of a staged CS
 470 *
 471 * @hdev: pointer to device structure
 472 * @cs: current CS
 473 * @cs_seq: staged submission sequence number
 474 *
 475 * Increment CS reference for every CS in this staged submission except for
 476 * the CS which get completion.
 477 */
 478static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
 479{
 480	/* Only the last CS in this staged submission will get a completion.
 481	 * We must increment the reference for all other CS's in this
 482	 * staged submission.
 483	 * Once we get a completion we will release the whole staged submission.
 484	 */
 485	if (!cs->staged_last)
 486		cs_get(cs);
 487}
 488
 489/*
 490 * staged_cs_put - put a CS in case it is part of staged submission
 491 *
 492 * @hdev: pointer to device structure
 493 * @cs: CS to put
 494 *
 495 * This function decrements a CS reference (for a non completion CS)
 496 */
 497static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
 498{
 499	/* We release all CS's in a staged submission except the last
 500	 * CS which we have never incremented its reference.
 501	 */
 502	if (!cs_needs_completion(cs))
 503		cs_put(cs);
 504}
 505
 506static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 507{
 508	struct hl_cs *next = NULL, *iter, *first_cs;
 509
 510	if (!cs_needs_timeout(cs))
 511		return;
 512
 513	spin_lock(&hdev->cs_mirror_lock);
 514
 515	/* We need to handle tdr only once for the complete staged submission.
 516	 * Hence, we choose the CS that reaches this function first which is
 517	 * the CS marked as 'staged_last'.
 518	 * In case single staged cs was submitted which has both first and last
 519	 * indications, then "cs_find_first" below will return NULL, since we
 520	 * removed the cs node from the list before getting here,
 521	 * in such cases just continue with the cs to cancel it's TDR work.
 522	 */
 523	if (cs->staged_cs && cs->staged_last) {
 524		first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
 525		if (first_cs)
 526			cs = first_cs;
 527	}
 528
 529	spin_unlock(&hdev->cs_mirror_lock);
 530
 531	/* Don't cancel TDR in case this CS was timedout because we might be
 532	 * running from the TDR context
 533	 */
 534	if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
 535		return;
 536
 537	if (cs->tdr_active)
 538		cancel_delayed_work_sync(&cs->work_tdr);
 539
 540	spin_lock(&hdev->cs_mirror_lock);
 541
 542	/* queue TDR for next CS */
 543	list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node)
 544		if (cs_needs_timeout(iter)) {
 545			next = iter;
 546			break;
 547		}
 548
 549	if (next && !next->tdr_active) {
 550		next->tdr_active = true;
 551		schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
 552	}
 553
 554	spin_unlock(&hdev->cs_mirror_lock);
 555}
 556
 557/*
 558 * force_complete_multi_cs - complete all contexts that wait on multi-CS
 559 *
 560 * @hdev: pointer to habanalabs device structure
 561 */
 562static void force_complete_multi_cs(struct hl_device *hdev)
 563{
 564	int i;
 565
 566	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
 567		struct multi_cs_completion *mcs_compl;
 568
 569		mcs_compl = &hdev->multi_cs_completion[i];
 570
 571		spin_lock(&mcs_compl->lock);
 572
 573		if (!mcs_compl->used) {
 574			spin_unlock(&mcs_compl->lock);
 575			continue;
 576		}
 577
 578		/* when calling force complete no context should be waiting on
 579		 * multi-cS.
 580		 * We are calling the function as a protection for such case
 581		 * to free any pending context and print error message
 582		 */
 583		dev_err(hdev->dev,
 584				"multi-CS completion context %d still waiting when calling force completion\n",
 585				i);
 586		complete_all(&mcs_compl->completion);
 587		spin_unlock(&mcs_compl->lock);
 588	}
 589}
 590
 591/*
 592 * complete_multi_cs - complete all waiting entities on multi-CS
 593 *
 594 * @hdev: pointer to habanalabs device structure
 595 * @cs: CS structure
 596 * The function signals a waiting entity that has an overlapping stream masters
 597 * with the completed CS.
 598 * For example:
 599 * - a completed CS worked on stream master QID 4, multi CS completion
 600 *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
 601 *   common stream master QID
 602 * - a completed CS worked on stream master QID 4, multi CS completion
 603 *   is actively waiting on stream master QIDs 3, 4. send signal as stream
 604 *   master QID 4 is common
 605 */
 606static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
 607{
 608	struct hl_fence *fence = cs->fence;
 609	int i;
 610
 611	/* in case of multi CS check for completion only for the first CS */
 612	if (cs->staged_cs && !cs->staged_first)
 613		return;
 614
 615	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
 616		struct multi_cs_completion *mcs_compl;
 617
 618		mcs_compl = &hdev->multi_cs_completion[i];
 619		if (!mcs_compl->used)
 620			continue;
 621
 622		spin_lock(&mcs_compl->lock);
 623
 624		/*
 625		 * complete if:
 626		 * 1. still waiting for completion
 627		 * 2. the completed CS has at least one overlapping stream
 628		 *    master with the stream masters in the completion
 629		 */
 630		if (mcs_compl->used &&
 631				(fence->stream_master_qid_map &
 632					mcs_compl->stream_master_qid_map)) {
 633			/* extract the timestamp only of first completed CS */
 634			if (!mcs_compl->timestamp)
 635				mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
 636
 637			complete_all(&mcs_compl->completion);
 638
 639			/*
 640			 * Setting mcs_handling_done inside the lock ensures
 641			 * at least one fence have mcs_handling_done set to
 642			 * true before wait for mcs finish. This ensures at
 643			 * least one CS will be set as completed when polling
 644			 * mcs fences.
 645			 */
 646			fence->mcs_handling_done = true;
 647		}
 648
 649		spin_unlock(&mcs_compl->lock);
 650	}
 651	/* In case CS completed without mcs completion initialized */
 652	fence->mcs_handling_done = true;
 653}
 654
 655static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
 656					struct hl_cs *cs,
 657					struct hl_cs_compl *hl_cs_cmpl)
 658{
 659	/* Skip this handler if the cs wasn't submitted, to avoid putting
 660	 * the hw_sob twice, since this case already handled at this point,
 661	 * also skip if the hw_sob pointer wasn't set.
 662	 */
 663	if (!hl_cs_cmpl->hw_sob || !cs->submitted)
 664		return;
 665
 666	spin_lock(&hl_cs_cmpl->lock);
 667
 668	/*
 669	 * we get refcount upon reservation of signals or signal/wait cs for the
 670	 * hw_sob object, and need to put it when the first staged cs
 671	 * (which contains the encaps signals) or cs signal/wait is completed.
 672	 */
 673	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
 674			(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
 675			(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
 676			(!!hl_cs_cmpl->encaps_signals)) {
 677		dev_dbg(hdev->dev,
 678				"CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
 679				hl_cs_cmpl->cs_seq,
 680				hl_cs_cmpl->type,
 681				hl_cs_cmpl->hw_sob->sob_id,
 682				hl_cs_cmpl->sob_val);
 683
 684		hw_sob_put(hl_cs_cmpl->hw_sob);
 685
 686		if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
 687			hdev->asic_funcs->reset_sob_group(hdev,
 688					hl_cs_cmpl->sob_group);
 689	}
 690
 691	spin_unlock(&hl_cs_cmpl->lock);
 692}
 693
 694static void cs_do_release(struct kref *ref)
 695{
 696	struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
 697	struct hl_device *hdev = cs->ctx->hdev;
 698	struct hl_cs_job *job, *tmp;
 699	struct hl_cs_compl *hl_cs_cmpl =
 700			container_of(cs->fence, struct hl_cs_compl, base_fence);
 701
 702	cs->completed = true;
 703
 704	/*
 705	 * Although if we reached here it means that all external jobs have
 706	 * finished, because each one of them took refcnt to CS, we still
 707	 * need to go over the internal jobs and complete them. Otherwise, we
 708	 * will have leaked memory and what's worse, the CS object (and
 709	 * potentially the CTX object) could be released, while the JOB
 710	 * still holds a pointer to them (but no reference).
 711	 */
 712	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 713		hl_complete_job(hdev, job);
 714
 715	if (!cs->submitted) {
 716		/*
 717		 * In case the wait for signal CS was submitted, the fence put
 718		 * occurs in init_signal_wait_cs() or collective_wait_init_cs()
 719		 * right before hanging on the PQ.
 720		 */
 721		if (cs->type == CS_TYPE_WAIT ||
 722				cs->type == CS_TYPE_COLLECTIVE_WAIT)
 723			hl_fence_put(cs->signal_fence);
 724
 725		goto out;
 726	}
 727
 728	/* Need to update CI for all queue jobs that does not get completion */
 729	hl_hw_queue_update_ci(cs);
 730
 731	/* remove CS from CS mirror list */
 732	spin_lock(&hdev->cs_mirror_lock);
 733	list_del_init(&cs->mirror_node);
 734	spin_unlock(&hdev->cs_mirror_lock);
 735
 736	cs_handle_tdr(hdev, cs);
 737
 738	if (cs->staged_cs) {
 739		/* the completion CS decrements reference for the entire
 740		 * staged submission
 741		 */
 742		if (cs->staged_last) {
 743			struct hl_cs *staged_cs, *tmp_cs;
 744
 745			list_for_each_entry_safe(staged_cs, tmp_cs,
 746					&cs->staged_cs_node, staged_cs_node)
 747				staged_cs_put(hdev, staged_cs);
 748		}
 749
 750		/* A staged CS will be a member in the list only after it
 751		 * was submitted. We used 'cs_mirror_lock' when inserting
 752		 * it to list so we will use it again when removing it
 753		 */
 754		if (cs->submitted) {
 755			spin_lock(&hdev->cs_mirror_lock);
 756			list_del(&cs->staged_cs_node);
 757			spin_unlock(&hdev->cs_mirror_lock);
 758		}
 759
 760		/* decrement refcount to handle when first staged cs
 761		 * with encaps signals is completed.
 762		 */
 763		if (hl_cs_cmpl->encaps_signals)
 764			kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
 765					hl_encaps_release_handle_and_put_ctx);
 766	}
 767
 768	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals)
 769		kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
 770
 771out:
 772	/* Must be called before hl_ctx_put because inside we use ctx to get
 773	 * the device
 774	 */
 775	hl_debugfs_remove_cs(cs);
 776
 777	hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL;
 778
 779	/* We need to mark an error for not submitted because in that case
 780	 * the hl fence release flow is different. Mainly, we don't need
 781	 * to handle hw_sob for signal/wait
 782	 */
 783	if (cs->timedout)
 784		cs->fence->error = -ETIMEDOUT;
 785	else if (cs->aborted)
 786		cs->fence->error = -EIO;
 787	else if (!cs->submitted)
 788		cs->fence->error = -EBUSY;
 789
 790	if (unlikely(cs->skip_reset_on_timeout)) {
 791		dev_err(hdev->dev,
 792			"Command submission %llu completed after %llu (s)\n",
 793			cs->sequence,
 794			div_u64(jiffies - cs->submission_time_jiffies, HZ));
 795	}
 796
 797	if (cs->timestamp) {
 798		cs->fence->timestamp = cs->completion_timestamp;
 799		hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
 800				   cs->fence->timestamp, cs->fence->error);
 801	}
 802
 803	hl_ctx_put(cs->ctx);
 804
 805	complete_all(&cs->fence->completion);
 806	complete_multi_cs(hdev, cs);
 807
 808	cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
 809
 810	hl_fence_put(cs->fence);
 811
 812	kfree(cs->jobs_in_queue_cnt);
 813	kfree(cs);
 814}
 815
 816static void cs_timedout(struct work_struct *work)
 817{
 818	struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work);
 819	bool skip_reset_on_timeout, device_reset = false;
 820	struct hl_device *hdev;
 821	u64 event_mask = 0x0;
 822	uint timeout_sec;
 823	int rc;
 824
 825	skip_reset_on_timeout = cs->skip_reset_on_timeout;
 826
 827	rc = cs_get_unless_zero(cs);
 828	if (!rc)
 829		return;
 830
 831	if ((!cs->submitted) || (cs->completed)) {
 832		cs_put(cs);
 833		return;
 834	}
 835
 836	hdev = cs->ctx->hdev;
 837
 838	if (likely(!skip_reset_on_timeout)) {
 839		if (hdev->reset_on_lockup)
 840			device_reset = true;
 841		else
 842			hdev->reset_info.needs_reset = true;
 843
 844		/* Mark the CS is timed out so we won't try to cancel its TDR */
 845		cs->timedout = true;
 846	}
 847
 848	/* Save only the first CS timeout parameters */
 849	rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
 850	if (rc) {
 851		hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
 852		hdev->captured_err_info.cs_timeout.seq = cs->sequence;
 853		event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
 854	}
 855
 856	timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000;
 857
 858	switch (cs->type) {
 859	case CS_TYPE_SIGNAL:
 860		dev_err(hdev->dev,
 861			"Signal command submission %llu has not finished in %u seconds!\n",
 862			cs->sequence, timeout_sec);
 863		break;
 864
 865	case CS_TYPE_WAIT:
 866		dev_err(hdev->dev,
 867			"Wait command submission %llu has not finished in %u seconds!\n",
 868			cs->sequence, timeout_sec);
 869		break;
 870
 871	case CS_TYPE_COLLECTIVE_WAIT:
 872		dev_err(hdev->dev,
 873			"Collective Wait command submission %llu has not finished in %u seconds!\n",
 874			cs->sequence, timeout_sec);
 875		break;
 876
 877	default:
 878		dev_err(hdev->dev,
 879			"Command submission %llu has not finished in %u seconds!\n",
 880			cs->sequence, timeout_sec);
 881		break;
 882	}
 883
 884	rc = hl_state_dump(hdev);
 885	if (rc)
 886		dev_err(hdev->dev, "Error during system state dump %d\n", rc);
 887
 888	cs_put(cs);
 889
 890	if (device_reset) {
 891		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
 892		hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask);
 893	} else if (event_mask) {
 894		hl_notifier_event_send_all(hdev, event_mask);
 895	}
 896}
 897
 898static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 899			enum hl_cs_type cs_type, u64 user_sequence,
 900			struct hl_cs **cs_new, u32 flags, u32 timeout)
 901{
 902	struct hl_cs_counters_atomic *cntr;
 903	struct hl_fence *other = NULL;
 904	struct hl_cs_compl *cs_cmpl;
 905	struct hl_cs *cs;
 906	int rc;
 907
 908	cntr = &hdev->aggregated_cs_counters;
 909
 910	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
 911	if (!cs)
 912		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 913
 914	if (!cs) {
 915		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 916		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 917		return -ENOMEM;
 918	}
 919
 920	/* increment refcnt for context */
 921	hl_ctx_get(ctx);
 922
 923	cs->ctx = ctx;
 924	cs->submitted = false;
 925	cs->completed = false;
 926	cs->type = cs_type;
 927	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 928	cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
 929	cs->timeout_jiffies = timeout;
 930	cs->skip_reset_on_timeout =
 931		hdev->reset_info.skip_reset_on_timeout ||
 932		!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
 933	cs->submission_time_jiffies = jiffies;
 934	INIT_LIST_HEAD(&cs->job_list);
 935	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 936	kref_init(&cs->refcount);
 937	spin_lock_init(&cs->job_lock);
 938
 939	cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 940	if (!cs_cmpl)
 941		cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
 942
 943	if (!cs_cmpl) {
 944		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 945		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 946		rc = -ENOMEM;
 947		goto free_cs;
 948	}
 949
 950	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 951			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 952	if (!cs->jobs_in_queue_cnt)
 953		cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 954				sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
 955
 956	if (!cs->jobs_in_queue_cnt) {
 957		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 958		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 959		rc = -ENOMEM;
 960		goto free_cs_cmpl;
 961	}
 962
 963	cs_cmpl->hdev = hdev;
 964	cs_cmpl->type = cs->type;
 965	spin_lock_init(&cs_cmpl->lock);
 966	cs->fence = &cs_cmpl->base_fence;
 967
 968	spin_lock(&ctx->cs_lock);
 969
 970	cs_cmpl->cs_seq = ctx->cs_sequence;
 971	other = ctx->cs_pending[cs_cmpl->cs_seq &
 972				(hdev->asic_prop.max_pending_cs - 1)];
 973
 974	if (other && !completion_done(&other->completion)) {
 975		/* If the following statement is true, it means we have reached
 976		 * a point in which only part of the staged submission was
 977		 * submitted and we don't have enough room in the 'cs_pending'
 978		 * array for the rest of the submission.
 979		 * This causes a deadlock because this CS will never be
 980		 * completed as it depends on future CS's for completion.
 981		 */
 982		if (other->cs_sequence == user_sequence)
 983			dev_crit_ratelimited(hdev->dev,
 984				"Staged CS %llu deadlock due to lack of resources",
 985				user_sequence);
 986
 987		dev_dbg_ratelimited(hdev->dev,
 988			"Rejecting CS because of too many in-flights CS\n");
 989		atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
 990		atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
 991		rc = -EAGAIN;
 992		goto free_fence;
 993	}
 994
 995	/* init hl_fence */
 996	hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 997
 998	cs->sequence = cs_cmpl->cs_seq;
 999
1000	ctx->cs_pending[cs_cmpl->cs_seq &
1001			(hdev->asic_prop.max_pending_cs - 1)] =
1002							&cs_cmpl->base_fence;
1003	ctx->cs_sequence++;
1004
1005	hl_fence_get(&cs_cmpl->base_fence);
1006
1007	hl_fence_put(other);
1008
1009	spin_unlock(&ctx->cs_lock);
1010
1011	*cs_new = cs;
1012
1013	return 0;
1014
1015free_fence:
1016	spin_unlock(&ctx->cs_lock);
1017	kfree(cs->jobs_in_queue_cnt);
1018free_cs_cmpl:
1019	kfree(cs_cmpl);
1020free_cs:
1021	kfree(cs);
1022	hl_ctx_put(ctx);
1023	return rc;
1024}
1025
1026static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
1027{
1028	struct hl_cs_job *job, *tmp;
1029
1030	staged_cs_put(hdev, cs);
1031
1032	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1033		hl_complete_job(hdev, job);
1034}
1035
1036/*
1037 * release_reserved_encaps_signals() - release reserved encapsulated signals.
1038 * @hdev: pointer to habanalabs device structure
1039 *
1040 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
1041 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
1042 * For these signals need also to put the refcount of the H/W SOB which was taken at the
1043 * reservation.
1044 */
1045static void release_reserved_encaps_signals(struct hl_device *hdev)
1046{
1047	struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
1048	struct hl_cs_encaps_sig_handle *handle;
1049	struct hl_encaps_signals_mgr *mgr;
1050	u32 id;
1051
1052	if (!ctx)
1053		return;
1054
1055	mgr = &ctx->sig_mgr;
1056
1057	idr_for_each_entry(&mgr->handles, handle, id)
1058		if (handle->cs_seq == ULLONG_MAX)
1059			kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx);
1060
1061	hl_ctx_put(ctx);
1062}
1063
1064void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
1065{
1066	int i;
1067	struct hl_cs *cs, *tmp;
1068
1069	if (!skip_wq_flush) {
1070		flush_workqueue(hdev->ts_free_obj_wq);
1071
1072		/* flush all completions before iterating over the CS mirror list in
1073		 * order to avoid a race with the release functions
1074		 */
1075		for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1076			flush_workqueue(hdev->cq_wq[i]);
1077
1078		flush_workqueue(hdev->cs_cmplt_wq);
1079	}
1080
1081	/* Make sure we don't have leftovers in the CS mirror list */
1082	list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
1083		cs_get(cs);
1084		cs->aborted = true;
1085		dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
1086					cs->ctx->asid, cs->sequence);
1087		cs_rollback(hdev, cs);
1088		cs_put(cs);
1089	}
1090
1091	force_complete_multi_cs(hdev);
1092
1093	release_reserved_encaps_signals(hdev);
1094}
1095
1096static void
1097wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
1098{
1099	struct hl_user_pending_interrupt *pend, *temp;
1100	unsigned long flags;
1101
1102	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
1103	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
1104		pend->fence.error = -EIO;
1105		complete_all(&pend->fence.completion);
1106	}
1107	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
1108
1109	spin_lock_irqsave(&interrupt->ts_list_lock, flags);
1110	list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
1111		list_del(&pend->list_node);
1112		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
1113		hl_cb_put(pend->ts_reg_info.cq_cb);
1114	}
1115	spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
1116}
1117
1118void hl_release_pending_user_interrupts(struct hl_device *hdev)
1119{
1120	struct asic_fixed_properties *prop = &hdev->asic_prop;
1121	struct hl_user_interrupt *interrupt;
1122	int i;
1123
1124	if (!prop->user_interrupt_count)
1125		return;
1126
1127	/* We iterate through the user interrupt requests and waking up all
1128	 * user threads waiting for interrupt completion. We iterate the
1129	 * list under a lock, this is why all user threads, once awake,
1130	 * will wait on the same lock and will release the waiting object upon
1131	 * unlock.
1132	 */
1133
1134	for (i = 0 ; i < prop->user_interrupt_count ; i++) {
1135		interrupt = &hdev->user_interrupt[i];
1136		wake_pending_user_interrupt_threads(interrupt);
1137	}
1138
1139	interrupt = &hdev->common_user_cq_interrupt;
1140	wake_pending_user_interrupt_threads(interrupt);
1141
1142	interrupt = &hdev->common_decoder_interrupt;
1143	wake_pending_user_interrupt_threads(interrupt);
1144}
1145
1146static void force_complete_cs(struct hl_device *hdev)
1147{
1148	struct hl_cs *cs;
1149
1150	spin_lock(&hdev->cs_mirror_lock);
1151
1152	list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) {
1153		cs->fence->error = -EIO;
1154		complete_all(&cs->fence->completion);
1155	}
1156
1157	spin_unlock(&hdev->cs_mirror_lock);
1158}
1159
1160void hl_abort_waiting_for_cs_completions(struct hl_device *hdev)
1161{
1162	force_complete_cs(hdev);
1163	force_complete_multi_cs(hdev);
1164}
1165
1166static void job_wq_completion(struct work_struct *work)
1167{
1168	struct hl_cs_job *job = container_of(work, struct hl_cs_job,
1169						finish_work);
1170	struct hl_cs *cs = job->cs;
1171	struct hl_device *hdev = cs->ctx->hdev;
1172
1173	/* job is no longer needed */
1174	hl_complete_job(hdev, job);
1175}
1176
1177static void cs_completion(struct work_struct *work)
1178{
1179	struct hl_cs *cs = container_of(work, struct hl_cs, finish_work);
1180	struct hl_device *hdev = cs->ctx->hdev;
1181	struct hl_cs_job *job, *tmp;
1182
1183	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1184		hl_complete_job(hdev, job);
1185}
1186
1187u32 hl_get_active_cs_num(struct hl_device *hdev)
1188{
1189	u32 active_cs_num = 0;
1190	struct hl_cs *cs;
1191
1192	spin_lock(&hdev->cs_mirror_lock);
1193
1194	list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
1195		if (!cs->completed)
1196			active_cs_num++;
1197
1198	spin_unlock(&hdev->cs_mirror_lock);
1199
1200	return active_cs_num;
1201}
1202
1203static int validate_queue_index(struct hl_device *hdev,
1204				struct hl_cs_chunk *chunk,
1205				enum hl_queue_type *queue_type,
1206				bool *is_kernel_allocated_cb)
1207{
1208	struct asic_fixed_properties *asic = &hdev->asic_prop;
1209	struct hw_queue_properties *hw_queue_prop;
1210
1211	/* This must be checked here to prevent out-of-bounds access to
1212	 * hw_queues_props array
1213	 */
1214	if (chunk->queue_index >= asic->max_queues) {
1215		dev_err(hdev->dev, "Queue index %d is invalid\n",
1216			chunk->queue_index);
1217		return -EINVAL;
1218	}
1219
1220	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1221
1222	if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1223		dev_err(hdev->dev, "Queue index %d is not applicable\n",
1224			chunk->queue_index);
1225		return -EINVAL;
1226	}
1227
1228	if (hw_queue_prop->binned) {
1229		dev_err(hdev->dev, "Queue index %d is binned out\n",
1230			chunk->queue_index);
1231		return -EINVAL;
1232	}
1233
1234	if (hw_queue_prop->driver_only) {
1235		dev_err(hdev->dev,
1236			"Queue index %d is restricted for the kernel driver\n",
1237			chunk->queue_index);
1238		return -EINVAL;
1239	}
1240
1241	/* When hw queue type isn't QUEUE_TYPE_HW,
1242	 * USER_ALLOC_CB flag shall be referred as "don't care".
1243	 */
1244	if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1245		if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1246			if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1247				dev_err(hdev->dev,
1248					"Queue index %d doesn't support user CB\n",
1249					chunk->queue_index);
1250				return -EINVAL;
1251			}
1252
1253			*is_kernel_allocated_cb = false;
1254		} else {
1255			if (!(hw_queue_prop->cb_alloc_flags &
1256					CB_ALLOC_KERNEL)) {
1257				dev_err(hdev->dev,
1258					"Queue index %d doesn't support kernel CB\n",
1259					chunk->queue_index);
1260				return -EINVAL;
1261			}
1262
1263			*is_kernel_allocated_cb = true;
1264		}
1265	} else {
1266		*is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1267						& CB_ALLOC_KERNEL);
1268	}
1269
1270	*queue_type = hw_queue_prop->type;
1271	return 0;
1272}
1273
1274static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1275					struct hl_mem_mgr *mmg,
1276					struct hl_cs_chunk *chunk)
1277{
1278	struct hl_cb *cb;
1279
1280	cb = hl_cb_get(mmg, chunk->cb_handle);
1281	if (!cb) {
1282		dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle);
1283		return NULL;
1284	}
1285
1286	if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1287		dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1288		goto release_cb;
1289	}
1290
1291	atomic_inc(&cb->cs_cnt);
1292
1293	return cb;
1294
1295release_cb:
1296	hl_cb_put(cb);
1297	return NULL;
1298}
1299
1300struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1301		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1302{
1303	struct hl_cs_job *job;
1304
1305	job = kzalloc(sizeof(*job), GFP_ATOMIC);
1306	if (!job)
1307		job = kzalloc(sizeof(*job), GFP_KERNEL);
1308
1309	if (!job)
1310		return NULL;
1311
1312	kref_init(&job->refcount);
1313	job->queue_type = queue_type;
1314	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1315
1316	if (is_cb_patched(hdev, job))
1317		INIT_LIST_HEAD(&job->userptr_list);
1318
1319	if (job->queue_type == QUEUE_TYPE_EXT)
1320		INIT_WORK(&job->finish_work, job_wq_completion);
1321
1322	return job;
1323}
1324
1325static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1326{
1327	if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1328		return CS_TYPE_SIGNAL;
1329	else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1330		return CS_TYPE_WAIT;
1331	else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1332		return CS_TYPE_COLLECTIVE_WAIT;
1333	else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1334		return CS_RESERVE_SIGNALS;
1335	else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1336		return CS_UNRESERVE_SIGNALS;
1337	else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
1338		return CS_TYPE_ENGINE_CORE;
1339	else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND)
1340		return CS_TYPE_ENGINES;
1341	else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
1342		return CS_TYPE_FLUSH_PCI_HBW_WRITES;
1343	else
1344		return CS_TYPE_DEFAULT;
1345}
1346
1347static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1348{
1349	struct hl_device *hdev = hpriv->hdev;
1350	struct hl_ctx *ctx = hpriv->ctx;
1351	u32 cs_type_flags, num_chunks;
1352	enum hl_device_status status;
1353	enum hl_cs_type cs_type;
1354	bool is_sync_stream;
1355	int i;
1356
1357	for (i = 0 ; i < sizeof(args->in.pad) ; i++)
1358		if (args->in.pad[i]) {
1359			dev_dbg(hdev->dev, "Padding bytes must be 0\n");
1360			return -EINVAL;
1361		}
1362
1363	if (!hl_device_operational(hdev, &status)) {
1364		return -EBUSY;
1365	}
1366
1367	if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1368			!hdev->supports_staged_submission) {
1369		dev_err(hdev->dev, "staged submission not supported");
1370		return -EPERM;
1371	}
1372
1373	cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1374
1375	if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1376		dev_err(hdev->dev,
1377			"CS type flags are mutually exclusive, context %d\n",
1378			ctx->asid);
1379		return -EINVAL;
1380	}
1381
1382	cs_type = hl_cs_get_cs_type(cs_type_flags);
1383	num_chunks = args->in.num_chunks_execute;
1384
1385	is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
1386			cs_type == CS_TYPE_COLLECTIVE_WAIT);
1387
1388	if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
1389		dev_err(hdev->dev, "Sync stream CS is not supported\n");
1390		return -EINVAL;
1391	}
1392
1393	if (cs_type == CS_TYPE_DEFAULT) {
1394		if (!num_chunks) {
1395			dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
1396			return -EINVAL;
1397		}
1398	} else if (is_sync_stream && num_chunks != 1) {
1399		dev_err(hdev->dev,
1400			"Sync stream CS mandates one chunk only, context %d\n",
1401			ctx->asid);
1402		return -EINVAL;
1403	}
1404
1405	return 0;
1406}
1407
1408static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1409					struct hl_cs_chunk **cs_chunk_array,
1410					void __user *chunks, u32 num_chunks,
1411					struct hl_ctx *ctx)
1412{
1413	u32 size_to_copy;
1414
1415	if (num_chunks > HL_MAX_JOBS_PER_CS) {
1416		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1417		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1418		dev_err(hdev->dev,
1419			"Number of chunks can NOT be larger than %d\n",
1420			HL_MAX_JOBS_PER_CS);
1421		return -EINVAL;
1422	}
1423
1424	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1425					GFP_ATOMIC);
1426	if (!*cs_chunk_array)
1427		*cs_chunk_array = kmalloc_array(num_chunks,
1428					sizeof(**cs_chunk_array), GFP_KERNEL);
1429	if (!*cs_chunk_array) {
1430		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1431		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1432		return -ENOMEM;
1433	}
1434
1435	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1436	if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1437		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1438		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1439		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1440		kfree(*cs_chunk_array);
1441		return -EFAULT;
1442	}
1443
1444	return 0;
1445}
1446
1447static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1448				u64 sequence, u32 flags,
1449				u32 encaps_signal_handle)
1450{
1451	if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1452		return 0;
1453
1454	cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1455	cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1456
1457	if (cs->staged_first) {
1458		/* Staged CS sequence is the first CS sequence */
1459		INIT_LIST_HEAD(&cs->staged_cs_node);
1460		cs->staged_sequence = cs->sequence;
1461
1462		if (cs->encaps_signals)
1463			cs->encaps_sig_hdl_id = encaps_signal_handle;
1464	} else {
1465		/* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1466		 * under the cs_mirror_lock
1467		 */
1468		cs->staged_sequence = sequence;
1469	}
1470
1471	/* Increment CS reference if needed */
1472	staged_cs_get(hdev, cs);
1473
1474	cs->staged_cs = true;
1475
1476	return 0;
1477}
1478
1479static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1480{
1481	int i;
1482
1483	for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1484		if (qid == hdev->stream_master_qid_arr[i])
1485			return BIT(i);
1486
1487	return 0;
1488}
1489
1490static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1491				u32 num_chunks, u64 *cs_seq, u32 flags,
1492				u32 encaps_signals_handle, u32 timeout,
1493				u16 *signal_initial_sob_count)
1494{
1495	bool staged_mid, int_queues_only = true, using_hw_queues = false;
1496	struct hl_device *hdev = hpriv->hdev;
1497	struct hl_cs_chunk *cs_chunk_array;
1498	struct hl_cs_counters_atomic *cntr;
1499	struct hl_ctx *ctx = hpriv->ctx;
1500	struct hl_cs_job *job;
1501	struct hl_cs *cs;
1502	struct hl_cb *cb;
1503	u64 user_sequence;
1504	u8 stream_master_qid_map = 0;
1505	int rc, i;
1506
1507	cntr = &hdev->aggregated_cs_counters;
1508	user_sequence = *cs_seq;
1509	*cs_seq = ULLONG_MAX;
1510
1511	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1512			hpriv->ctx);
1513	if (rc)
1514		goto out;
1515
1516	if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1517			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1518		staged_mid = true;
1519	else
1520		staged_mid = false;
1521
1522	rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1523			staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1524			timeout);
1525	if (rc)
1526		goto free_cs_chunk_array;
1527
1528	*cs_seq = cs->sequence;
1529
1530	hl_debugfs_add_cs(cs);
1531
1532	rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1533						encaps_signals_handle);
1534	if (rc)
1535		goto free_cs_object;
1536
1537	/* If this is a staged submission we must return the staged sequence
1538	 * rather than the internal CS sequence
1539	 */
1540	if (cs->staged_cs)
1541		*cs_seq = cs->staged_sequence;
1542
1543	/* Validate ALL the CS chunks before submitting the CS */
1544	for (i = 0 ; i < num_chunks ; i++) {
1545		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1546		enum hl_queue_type queue_type;
1547		bool is_kernel_allocated_cb;
1548
1549		rc = validate_queue_index(hdev, chunk, &queue_type,
1550						&is_kernel_allocated_cb);
1551		if (rc) {
1552			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1553			atomic64_inc(&cntr->validation_drop_cnt);
1554			goto free_cs_object;
1555		}
1556
1557		if (is_kernel_allocated_cb) {
1558			cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk);
1559			if (!cb) {
1560				atomic64_inc(
1561					&ctx->cs_counters.validation_drop_cnt);
1562				atomic64_inc(&cntr->validation_drop_cnt);
1563				rc = -EINVAL;
1564				goto free_cs_object;
1565			}
1566		} else {
1567			cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1568		}
1569
1570		if (queue_type == QUEUE_TYPE_EXT ||
1571						queue_type == QUEUE_TYPE_HW) {
1572			int_queues_only = false;
1573
1574			/*
1575			 * store which stream are being used for external/HW
1576			 * queues of this CS
1577			 */
1578			if (hdev->supports_wait_for_multi_cs)
1579				stream_master_qid_map |=
1580					get_stream_master_qid_mask(hdev,
1581							chunk->queue_index);
1582		}
1583
1584		if (queue_type == QUEUE_TYPE_HW)
1585			using_hw_queues = true;
1586
1587		job = hl_cs_allocate_job(hdev, queue_type,
1588						is_kernel_allocated_cb);
1589		if (!job) {
1590			atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1591			atomic64_inc(&cntr->out_of_mem_drop_cnt);
1592			dev_err(hdev->dev, "Failed to allocate a new job\n");
1593			rc = -ENOMEM;
1594			if (is_kernel_allocated_cb)
1595				goto release_cb;
1596
1597			goto free_cs_object;
1598		}
1599
1600		job->id = i + 1;
1601		job->cs = cs;
1602		job->user_cb = cb;
1603		job->user_cb_size = chunk->cb_size;
1604		job->hw_queue_id = chunk->queue_index;
1605
1606		cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1607		cs->jobs_cnt++;
1608
1609		list_add_tail(&job->cs_node, &cs->job_list);
1610
1611		/*
1612		 * Increment CS reference. When CS reference is 0, CS is
1613		 * done and can be signaled to user and free all its resources
1614		 * Only increment for JOB on external or H/W queues, because
1615		 * only for those JOBs we get completion
1616		 */
1617		if (cs_needs_completion(cs) &&
1618			(job->queue_type == QUEUE_TYPE_EXT ||
1619				job->queue_type == QUEUE_TYPE_HW))
1620			cs_get(cs);
1621
1622		hl_debugfs_add_job(hdev, job);
1623
1624		rc = cs_parser(hpriv, job);
1625		if (rc) {
1626			atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1627			atomic64_inc(&cntr->parsing_drop_cnt);
1628			dev_err(hdev->dev,
1629				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1630				cs->ctx->asid, cs->sequence, job->id, rc);
1631			goto free_cs_object;
1632		}
1633	}
1634
1635	/* We allow a CS with any queue type combination as long as it does
1636	 * not get a completion
1637	 */
1638	if (int_queues_only && cs_needs_completion(cs)) {
1639		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1640		atomic64_inc(&cntr->validation_drop_cnt);
1641		dev_err(hdev->dev,
1642			"Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1643			cs->ctx->asid, cs->sequence);
1644		rc = -EINVAL;
1645		goto free_cs_object;
1646	}
1647
1648	if (using_hw_queues)
1649		INIT_WORK(&cs->finish_work, cs_completion);
1650
1651	/*
1652	 * store the (external/HW queues) streams used by the CS in the
1653	 * fence object for multi-CS completion
1654	 */
1655	if (hdev->supports_wait_for_multi_cs)
1656		cs->fence->stream_master_qid_map = stream_master_qid_map;
1657
1658	rc = hl_hw_queue_schedule_cs(cs);
1659	if (rc) {
1660		if (rc != -EAGAIN)
1661			dev_err(hdev->dev,
1662				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
1663				cs->ctx->asid, cs->sequence, rc);
1664		goto free_cs_object;
1665	}
1666
1667	*signal_initial_sob_count = cs->initial_sob_count;
1668
1669	rc = HL_CS_STATUS_SUCCESS;
1670	goto put_cs;
1671
1672release_cb:
1673	atomic_dec(&cb->cs_cnt);
1674	hl_cb_put(cb);
1675free_cs_object:
1676	cs_rollback(hdev, cs);
1677	*cs_seq = ULLONG_MAX;
1678	/* The path below is both for good and erroneous exits */
1679put_cs:
1680	/* We finished with the CS in this function, so put the ref */
1681	cs_put(cs);
1682free_cs_chunk_array:
1683	kfree(cs_chunk_array);
1684out:
1685	return rc;
1686}
1687
1688static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1689				u64 *cs_seq)
1690{
1691	struct hl_device *hdev = hpriv->hdev;
1692	struct hl_ctx *ctx = hpriv->ctx;
1693	bool need_soft_reset = false;
1694	int rc = 0, do_ctx_switch = 0;
1695	void __user *chunks;
1696	u32 num_chunks, tmp;
1697	u16 sob_count;
1698	int ret;
1699
1700	if (hdev->supports_ctx_switch)
1701		do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1702
1703	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1704		mutex_lock(&hpriv->restore_phase_mutex);
1705
1706		if (do_ctx_switch) {
1707			rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1708			if (rc) {
1709				dev_err_ratelimited(hdev->dev,
1710					"Failed to switch to context %d, rejecting CS! %d\n",
1711					ctx->asid, rc);
1712				/*
1713				 * If we timedout, or if the device is not IDLE
1714				 * while we want to do context-switch (-EBUSY),
1715				 * we need to soft-reset because QMAN is
1716				 * probably stuck. However, we can't call to
1717				 * reset here directly because of deadlock, so
1718				 * need to do it at the very end of this
1719				 * function
1720				 */
1721				if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1722					need_soft_reset = true;
1723				mutex_unlock(&hpriv->restore_phase_mutex);
1724				goto out;
1725			}
1726		}
1727
1728		hdev->asic_funcs->restore_phase_topology(hdev);
1729
1730		chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1731		num_chunks = args->in.num_chunks_restore;
1732
1733		if (!num_chunks) {
1734			dev_dbg(hdev->dev,
1735				"Need to run restore phase but restore CS is empty\n");
1736			rc = 0;
1737		} else {
1738			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1739					cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1740		}
1741
1742		mutex_unlock(&hpriv->restore_phase_mutex);
1743
1744		if (rc) {
1745			dev_err(hdev->dev,
1746				"Failed to submit restore CS for context %d (%d)\n",
1747				ctx->asid, rc);
1748			goto out;
1749		}
1750
1751		/* Need to wait for restore completion before execution phase */
1752		if (num_chunks) {
1753			enum hl_cs_wait_status status;
1754
1755			ret = _hl_cs_wait_ioctl(hdev, ctx,
1756					jiffies_to_usecs(hdev->timeout_jiffies),
1757					*cs_seq, &status, NULL);
1758			if (ret) {
1759				dev_err(hdev->dev,
1760					"Restore CS for context %d failed to complete %d\n",
1761					ctx->asid, ret);
1762				rc = -ENOEXEC;
1763				goto out;
1764			}
1765		}
1766
1767		if (hdev->supports_ctx_switch)
1768			ctx->thread_ctx_switch_wait_token = 1;
1769
1770	} else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
1771		rc = hl_poll_timeout_memory(hdev,
1772			&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1773			100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1774
1775		if (rc == -ETIMEDOUT) {
1776			dev_err(hdev->dev,
1777				"context switch phase timeout (%d)\n", tmp);
1778			goto out;
1779		}
1780	}
1781
1782out:
1783	if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1784		hl_device_reset(hdev, 0);
1785
1786	return rc;
1787}
1788
1789/*
1790 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1791 * if the SOB value reaches the max value move to the other SOB reserved
1792 * to the queue.
1793 * @hdev: pointer to device structure
1794 * @q_idx: stream queue index
1795 * @hw_sob: the H/W SOB used in this signal CS.
1796 * @count: signals count
1797 * @encaps_sig: tells whether it's reservation for encaps signals or not.
1798 *
1799 * Note that this function must be called while hw_queues_lock is taken.
1800 */
1801int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1802			struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1803
1804{
1805	struct hl_sync_stream_properties *prop;
1806	struct hl_hw_sob *sob = *hw_sob, *other_sob;
1807	u8 other_sob_offset;
1808
1809	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1810
1811	hw_sob_get(sob);
1812
1813	/* check for wraparound */
1814	if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1815		/*
1816		 * Decrement as we reached the max value.
1817		 * The release function won't be called here as we've
1818		 * just incremented the refcount right before calling this
1819		 * function.
1820		 */
1821		hw_sob_put_err(sob);
1822
1823		/*
1824		 * check the other sob value, if it still in use then fail
1825		 * otherwise make the switch
1826		 */
1827		other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1828		other_sob = &prop->hw_sob[other_sob_offset];
1829
1830		if (kref_read(&other_sob->kref) != 1) {
1831			dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1832								q_idx);
1833			return -EINVAL;
1834		}
1835
1836		/*
1837		 * next_sob_val always points to the next available signal
1838		 * in the sob, so in encaps signals it will be the next one
1839		 * after reserving the required amount.
1840		 */
1841		if (encaps_sig)
1842			prop->next_sob_val = count + 1;
1843		else
1844			prop->next_sob_val = count;
1845
1846		/* only two SOBs are currently in use */
1847		prop->curr_sob_offset = other_sob_offset;
1848		*hw_sob = other_sob;
1849
1850		/*
1851		 * check if other_sob needs reset, then do it before using it
1852		 * for the reservation or the next signal cs.
1853		 * we do it here, and for both encaps and regular signal cs
1854		 * cases in order to avoid possible races of two kref_put
1855		 * of the sob which can occur at the same time if we move the
1856		 * sob reset(kref_put) to cs_do_release function.
1857		 * in addition, if we have combination of cs signal and
1858		 * encaps, and at the point we need to reset the sob there was
1859		 * no more reservations and only signal cs keep coming,
1860		 * in such case we need signal_cs to put the refcount and
1861		 * reset the sob.
1862		 */
1863		if (other_sob->need_reset)
1864			hw_sob_put(other_sob);
1865
1866		if (encaps_sig) {
1867			/* set reset indication for the sob */
1868			sob->need_reset = true;
1869			hw_sob_get(other_sob);
1870		}
1871
1872		dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1873				prop->curr_sob_offset, q_idx);
1874	} else {
1875		prop->next_sob_val += count;
1876	}
1877
1878	return 0;
1879}
1880
1881static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1882		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1883		bool encaps_signals)
1884{
1885	u64 *signal_seq_arr = NULL;
1886	u32 size_to_copy, signal_seq_arr_len;
1887	int rc = 0;
1888
1889	if (encaps_signals) {
1890		*signal_seq = chunk->encaps_signal_seq;
1891		return 0;
1892	}
1893
1894	signal_seq_arr_len = chunk->num_signal_seq_arr;
1895
1896	/* currently only one signal seq is supported */
1897	if (signal_seq_arr_len != 1) {
1898		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1899		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1900		dev_err(hdev->dev,
1901			"Wait for signal CS supports only one signal CS seq\n");
1902		return -EINVAL;
1903	}
1904
1905	signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1906					sizeof(*signal_seq_arr),
1907					GFP_ATOMIC);
1908	if (!signal_seq_arr)
1909		signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1910					sizeof(*signal_seq_arr),
1911					GFP_KERNEL);
1912	if (!signal_seq_arr) {
1913		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1914		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1915		return -ENOMEM;
1916	}
1917
1918	size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1919	if (copy_from_user(signal_seq_arr,
1920				u64_to_user_ptr(chunk->signal_seq_arr),
1921				size_to_copy)) {
1922		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1923		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1924		dev_err(hdev->dev,
1925			"Failed to copy signal seq array from user\n");
1926		rc = -EFAULT;
1927		goto out;
1928	}
1929
1930	/* currently it is guaranteed to have only one signal seq */
1931	*signal_seq = signal_seq_arr[0];
1932
1933out:
1934	kfree(signal_seq_arr);
1935
1936	return rc;
1937}
1938
1939static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1940		struct hl_ctx *ctx, struct hl_cs *cs,
1941		enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1942{
1943	struct hl_cs_counters_atomic *cntr;
1944	struct hl_cs_job *job;
1945	struct hl_cb *cb;
1946	u32 cb_size;
1947
1948	cntr = &hdev->aggregated_cs_counters;
1949
1950	job = hl_cs_allocate_job(hdev, q_type, true);
1951	if (!job) {
1952		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1953		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1954		dev_err(hdev->dev, "Failed to allocate a new job\n");
1955		return -ENOMEM;
1956	}
1957
1958	if (cs->type == CS_TYPE_WAIT)
1959		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1960	else
1961		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1962
1963	cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW);
1964	if (!cb) {
1965		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1966		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1967		kfree(job);
1968		return -EFAULT;
1969	}
1970
1971	job->id = 0;
1972	job->cs = cs;
1973	job->user_cb = cb;
1974	atomic_inc(&job->user_cb->cs_cnt);
1975	job->user_cb_size = cb_size;
1976	job->hw_queue_id = q_idx;
1977
1978	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1979			&& cs->encaps_signals)
1980		job->encaps_sig_wait_offset = encaps_signal_offset;
1981	/*
1982	 * No need in parsing, user CB is the patched CB.
1983	 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
1984	 * the CB idr anymore and to decrement its refcount as it was
1985	 * incremented inside hl_cb_kernel_create().
1986	 */
1987	job->patched_cb = job->user_cb;
1988	job->job_cb_size = job->user_cb_size;
1989	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1990
1991	/* increment refcount as for external queues we get completion */
1992	cs_get(cs);
1993
1994	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1995	cs->jobs_cnt++;
1996
1997	list_add_tail(&job->cs_node, &cs->job_list);
1998
1999	hl_debugfs_add_job(hdev, job);
2000
2001	return 0;
2002}
2003
2004static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
2005				u32 q_idx, u32 count,
2006				u32 *handle_id, u32 *sob_addr,
2007				u32 *signals_count)
2008{
2009	struct hw_queue_properties *hw_queue_prop;
2010	struct hl_sync_stream_properties *prop;
2011	struct hl_device *hdev = hpriv->hdev;
2012	struct hl_cs_encaps_sig_handle *handle;
2013	struct hl_encaps_signals_mgr *mgr;
2014	struct hl_hw_sob *hw_sob;
2015	int hdl_id;
2016	int rc = 0;
2017
2018	if (count >= HL_MAX_SOB_VAL) {
2019		dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
2020						count);
2021		rc = -EINVAL;
2022		goto out;
2023	}
2024
2025	if (q_idx >= hdev->asic_prop.max_queues) {
2026		dev_err(hdev->dev, "Queue index %d is invalid\n",
2027			q_idx);
2028		rc = -EINVAL;
2029		goto out;
2030	}
2031
2032	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2033
2034	if (!hw_queue_prop->supports_sync_stream) {
2035		dev_err(hdev->dev,
2036			"Queue index %d does not support sync stream operations\n",
2037									q_idx);
2038		rc = -EINVAL;
2039		goto out;
2040	}
2041
2042	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2043
2044	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
2045	if (!handle) {
2046		rc = -ENOMEM;
2047		goto out;
2048	}
2049
2050	handle->count = count;
2051
2052	hl_ctx_get(hpriv->ctx);
2053	handle->ctx = hpriv->ctx;
2054	mgr = &hpriv->ctx->sig_mgr;
2055
2056	spin_lock(&mgr->lock);
2057	hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
2058	spin_unlock(&mgr->lock);
2059
2060	if (hdl_id < 0) {
2061		dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
2062		rc = -EINVAL;
2063		goto put_ctx;
2064	}
2065
2066	handle->id = hdl_id;
2067	handle->q_idx = q_idx;
2068	handle->hdev = hdev;
2069	kref_init(&handle->refcount);
2070
2071	hdev->asic_funcs->hw_queues_lock(hdev);
2072
2073	hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2074
2075	/*
2076	 * Increment the SOB value by count by user request
2077	 * to reserve those signals
2078	 * check if the signals amount to reserve is not exceeding the max sob
2079	 * value, if yes then switch sob.
2080	 */
2081	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
2082								true);
2083	if (rc) {
2084		dev_err(hdev->dev, "Failed to switch SOB\n");
2085		hdev->asic_funcs->hw_queues_unlock(hdev);
2086		rc = -EINVAL;
2087		goto remove_idr;
2088	}
2089	/* set the hw_sob to the handle after calling the sob wraparound handler
2090	 * since sob could have changed.
2091	 */
2092	handle->hw_sob = hw_sob;
2093
2094	/* store the current sob value for unreserve validity check, and
2095	 * signal offset support
2096	 */
2097	handle->pre_sob_val = prop->next_sob_val - handle->count;
2098
2099	handle->cs_seq = ULLONG_MAX;
2100
2101	*signals_count = prop->next_sob_val;
2102	hdev->asic_funcs->hw_queues_unlock(hdev);
2103
2104	*sob_addr = handle->hw_sob->sob_addr;
2105	*handle_id = hdl_id;
2106
2107	dev_dbg(hdev->dev,
2108		"Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
2109			hw_sob->sob_id, handle->hw_sob->sob_addr,
2110			prop->next_sob_val - 1, q_idx, hdl_id);
2111	goto out;
2112
2113remove_idr:
2114	spin_lock(&mgr->lock);
2115	idr_remove(&mgr->handles, hdl_id);
2116	spin_unlock(&mgr->lock);
2117
2118put_ctx:
2119	hl_ctx_put(handle->ctx);
2120	kfree(handle);
2121
2122out:
2123	return rc;
2124}
2125
2126static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
2127{
2128	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
2129	struct hl_sync_stream_properties *prop;
2130	struct hl_device *hdev = hpriv->hdev;
2131	struct hl_encaps_signals_mgr *mgr;
2132	struct hl_hw_sob *hw_sob;
2133	u32 q_idx, sob_addr;
2134	int rc = 0;
2135
2136	mgr = &hpriv->ctx->sig_mgr;
2137
2138	spin_lock(&mgr->lock);
2139	encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
2140	if (encaps_sig_hdl) {
2141		dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
2142				handle_id, encaps_sig_hdl->hw_sob->sob_addr,
2143					encaps_sig_hdl->count);
2144
2145		hdev->asic_funcs->hw_queues_lock(hdev);
2146
2147		q_idx = encaps_sig_hdl->q_idx;
2148		prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2149		hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2150		sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
2151
2152		/* Check if sob_val got out of sync due to other
2153		 * signal submission requests which were handled
2154		 * between the reserve-unreserve calls or SOB switch
2155		 * upon reaching SOB max value.
2156		 */
2157		if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
2158				!= prop->next_sob_val ||
2159				sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
2160			dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
2161				encaps_sig_hdl->pre_sob_val,
2162				(prop->next_sob_val - encaps_sig_hdl->count));
2163
2164			hdev->asic_funcs->hw_queues_unlock(hdev);
2165			rc = -EINVAL;
2166			goto out_unlock;
2167		}
2168
2169		/*
2170		 * Decrement the SOB value by count by user request
2171		 * to unreserve those signals
2172		 */
2173		prop->next_sob_val -= encaps_sig_hdl->count;
2174
2175		hdev->asic_funcs->hw_queues_unlock(hdev);
2176
2177		hw_sob_put(hw_sob);
2178
2179		/* Release the id and free allocated memory of the handle */
2180		idr_remove(&mgr->handles, handle_id);
2181
2182		/* unlock before calling ctx_put, where we might sleep */
2183		spin_unlock(&mgr->lock);
2184		hl_ctx_put(encaps_sig_hdl->ctx);
2185		kfree(encaps_sig_hdl);
2186		goto out;
2187	} else {
2188		rc = -EINVAL;
2189		dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
2190	}
2191
2192out_unlock:
2193	spin_unlock(&mgr->lock);
2194
2195out:
2196	return rc;
2197}
2198
2199static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
2200				void __user *chunks, u32 num_chunks,
2201				u64 *cs_seq, u32 flags, u32 timeout,
2202				u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
2203{
2204	struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
2205	bool handle_found = false, is_wait_cs = false,
2206			wait_cs_submitted = false,
2207			cs_encaps_signals = false;
2208	struct hl_cs_chunk *cs_chunk_array, *chunk;
2209	bool staged_cs_with_encaps_signals = false;
2210	struct hw_queue_properties *hw_queue_prop;
2211	struct hl_device *hdev = hpriv->hdev;
2212	struct hl_cs_compl *sig_waitcs_cmpl;
2213	u32 q_idx, collective_engine_id = 0;
2214	struct hl_cs_counters_atomic *cntr;
2215	struct hl_fence *sig_fence = NULL;
2216	struct hl_ctx *ctx = hpriv->ctx;
2217	enum hl_queue_type q_type;
2218	struct hl_cs *cs;
2219	u64 signal_seq;
2220	int rc;
2221
2222	cntr = &hdev->aggregated_cs_counters;
2223	*cs_seq = ULLONG_MAX;
2224
2225	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
2226			ctx);
2227	if (rc)
2228		goto out;
2229
2230	/* currently it is guaranteed to have only one chunk */
2231	chunk = &cs_chunk_array[0];
2232
2233	if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2234		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2235		atomic64_inc(&cntr->validation_drop_cnt);
2236		dev_err(hdev->dev, "Queue index %d is invalid\n",
2237			chunk->queue_index);
2238		rc = -EINVAL;
2239		goto free_cs_chunk_array;
2240	}
2241
2242	q_idx = chunk->queue_index;
2243	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2244	q_type = hw_queue_prop->type;
2245
2246	if (!hw_queue_prop->supports_sync_stream) {
2247		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2248		atomic64_inc(&cntr->validation_drop_cnt);
2249		dev_err(hdev->dev,
2250			"Queue index %d does not support sync stream operations\n",
2251			q_idx);
2252		rc = -EINVAL;
2253		goto free_cs_chunk_array;
2254	}
2255
2256	if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
2257		if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2258			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2259			atomic64_inc(&cntr->validation_drop_cnt);
2260			dev_err(hdev->dev,
2261				"Queue index %d is invalid\n", q_idx);
2262			rc = -EINVAL;
2263			goto free_cs_chunk_array;
2264		}
2265
2266		if (!hdev->nic_ports_mask) {
2267			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2268			atomic64_inc(&cntr->validation_drop_cnt);
2269			dev_err(hdev->dev,
2270				"Collective operations not supported when NIC ports are disabled");
2271			rc = -EINVAL;
2272			goto free_cs_chunk_array;
2273		}
2274
2275		collective_engine_id = chunk->collective_engine_id;
2276	}
2277
2278	is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
2279			cs_type == CS_TYPE_COLLECTIVE_WAIT);
2280
2281	cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
2282
2283	if (is_wait_cs) {
2284		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2285				ctx, cs_encaps_signals);
2286		if (rc)
2287			goto free_cs_chunk_array;
2288
2289		if (cs_encaps_signals) {
2290			/* check if cs sequence has encapsulated
2291			 * signals handle
2292			 */
2293			struct idr *idp;
2294			u32 id;
2295
2296			spin_lock(&ctx->sig_mgr.lock);
2297			idp = &ctx->sig_mgr.handles;
2298			idr_for_each_entry(idp, encaps_sig_hdl, id) {
2299				if (encaps_sig_hdl->cs_seq == signal_seq) {
2300					/* get refcount to protect removing this handle from idr,
2301					 * needed when multiple wait cs are used with offset
2302					 * to wait on reserved encaps signals.
2303					 * Since kref_put of this handle is executed outside the
2304					 * current lock, it is possible that the handle refcount
2305					 * is 0 but it yet to be removed from the list. In this
2306					 * case need to consider the handle as not valid.
2307					 */
2308					if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
2309						handle_found = true;
2310					break;
2311				}
2312			}
2313			spin_unlock(&ctx->sig_mgr.lock);
2314
2315			if (!handle_found) {
2316				/* treat as signal CS already finished */
2317				dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2318						signal_seq);
2319				rc = 0;
2320				goto free_cs_chunk_array;
2321			}
2322
2323			/* validate also the signal offset value */
2324			if (chunk->encaps_signal_offset >
2325					encaps_sig_hdl->count) {
2326				dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2327						chunk->encaps_signal_offset,
2328						encaps_sig_hdl->count);
2329				rc = -EINVAL;
2330				goto free_cs_chunk_array;
2331			}
2332		}
2333
2334		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2335		if (IS_ERR(sig_fence)) {
2336			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2337			atomic64_inc(&cntr->validation_drop_cnt);
2338			dev_err(hdev->dev,
2339				"Failed to get signal CS with seq 0x%llx\n",
2340				signal_seq);
2341			rc = PTR_ERR(sig_fence);
2342			goto free_cs_chunk_array;
2343		}
2344
2345		if (!sig_fence) {
2346			/* signal CS already finished */
2347			rc = 0;
2348			goto free_cs_chunk_array;
2349		}
2350
2351		sig_waitcs_cmpl =
2352			container_of(sig_fence, struct hl_cs_compl, base_fence);
2353
2354		staged_cs_with_encaps_signals = !!
2355				(sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2356				(flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2357
2358		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2359				!staged_cs_with_encaps_signals) {
2360			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2361			atomic64_inc(&cntr->validation_drop_cnt);
2362			dev_err(hdev->dev,
2363				"CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2364				signal_seq);
2365			hl_fence_put(sig_fence);
2366			rc = -EINVAL;
2367			goto free_cs_chunk_array;
2368		}
2369
2370		if (completion_done(&sig_fence->completion)) {
2371			/* signal CS already finished */
2372			hl_fence_put(sig_fence);
2373			rc = 0;
2374			goto free_cs_chunk_array;
2375		}
2376	}
2377
2378	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2379	if (rc) {
2380		if (is_wait_cs)
2381			hl_fence_put(sig_fence);
2382
2383		goto free_cs_chunk_array;
2384	}
2385
2386	/*
2387	 * Save the signal CS fence for later initialization right before
2388	 * hanging the wait CS on the queue.
2389	 * for encaps signals case, we save the cs sequence and handle pointer
2390	 * for later initialization.
2391	 */
2392	if (is_wait_cs) {
2393		cs->signal_fence = sig_fence;
2394		/* store the handle pointer, so we don't have to
2395		 * look for it again, later on the flow
2396		 * when we need to set SOB info in hw_queue.
2397		 */
2398		if (cs->encaps_signals)
2399			cs->encaps_sig_hdl = encaps_sig_hdl;
2400	}
2401
2402	hl_debugfs_add_cs(cs);
2403
2404	*cs_seq = cs->sequence;
2405
2406	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2407		rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2408				q_idx, chunk->encaps_signal_offset);
2409	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2410		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2411				cs, q_idx, collective_engine_id,
2412				chunk->encaps_signal_offset);
2413	else {
2414		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2415		atomic64_inc(&cntr->validation_drop_cnt);
2416		rc = -EINVAL;
2417	}
2418
2419	if (rc)
2420		goto free_cs_object;
2421
2422	if (q_type == QUEUE_TYPE_HW)
2423		INIT_WORK(&cs->finish_work, cs_completion);
2424
2425	rc = hl_hw_queue_schedule_cs(cs);
2426	if (rc) {
2427		/* In case wait cs failed here, it means the signal cs
2428		 * already completed. we want to free all it's related objects
2429		 * but we don't want to fail the ioctl.
2430		 */
2431		if (is_wait_cs)
2432			rc = 0;
2433		else if (rc != -EAGAIN)
2434			dev_err(hdev->dev,
2435				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
2436				ctx->asid, cs->sequence, rc);
2437		goto free_cs_object;
2438	}
2439
2440	*signal_sob_addr_offset = cs->sob_addr_offset;
2441	*signal_initial_sob_count = cs->initial_sob_count;
2442
2443	rc = HL_CS_STATUS_SUCCESS;
2444	if (is_wait_cs)
2445		wait_cs_submitted = true;
2446	goto put_cs;
2447
2448free_cs_object:
2449	cs_rollback(hdev, cs);
2450	*cs_seq = ULLONG_MAX;
2451	/* The path below is both for good and erroneous exits */
2452put_cs:
2453	/* We finished with the CS in this function, so put the ref */
2454	cs_put(cs);
2455free_cs_chunk_array:
2456	if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs)
2457		kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
2458	kfree(cs_chunk_array);
2459out:
2460	return rc;
2461}
2462
2463static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
2464						u32 num_engine_cores, u32 core_command)
2465{
2466	struct hl_device *hdev = hpriv->hdev;
2467	void __user *engine_cores_arr;
2468	u32 *cores;
2469	int rc;
2470
2471	if (!hdev->asic_prop.supports_engine_modes)
2472		return -EPERM;
2473
2474	if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
2475		dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
2476		return -EINVAL;
2477	}
2478
2479	if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
2480		dev_err(hdev->dev, "Engine core command is invalid\n");
2481		return -EINVAL;
2482	}
2483
2484	engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
2485	cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
2486	if (!cores)
2487		return -ENOMEM;
2488
2489	if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
2490		dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
2491		kfree(cores);
2492		return -EFAULT;
2493	}
2494
2495	rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
2496	kfree(cores);
2497
2498	return rc;
2499}
2500
2501static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
2502						u32 num_engines, enum hl_engine_command command)
2503{
2504	struct hl_device *hdev = hpriv->hdev;
2505	u32 *engines, max_num_of_engines;
2506	void __user *engines_arr;
2507	int rc;
2508
2509	if (!hdev->asic_prop.supports_engine_modes)
2510		return -EPERM;
2511
2512	if (command >= HL_ENGINE_COMMAND_MAX) {
2513		dev_err(hdev->dev, "Engine command is invalid\n");
2514		return -EINVAL;
2515	}
2516
2517	max_num_of_engines = hdev->asic_prop.max_num_of_engines;
2518	if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT)
2519		max_num_of_engines = hdev->asic_prop.num_engine_cores;
2520
2521	if (!num_engines || num_engines > max_num_of_engines) {
2522		dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
2523		return -EINVAL;
2524	}
2525
2526	engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr;
2527	engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL);
2528	if (!engines)
2529		return -ENOMEM;
2530
2531	if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) {
2532		dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
2533		kfree(engines);
2534		return -EFAULT;
2535	}
2536
2537	rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
2538	kfree(engines);
2539
2540	return rc;
2541}
2542
2543static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
2544{
2545	struct hl_device *hdev = hpriv->hdev;
2546	struct asic_fixed_properties *prop = &hdev->asic_prop;
2547
2548	if (!prop->hbw_flush_reg) {
2549		dev_dbg(hdev->dev, "HBW flush is not supported\n");
2550		return -EOPNOTSUPP;
2551	}
2552
2553	RREG32(prop->hbw_flush_reg);
2554
2555	return 0;
2556}
2557
2558int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
2559{
2560	struct hl_fpriv *hpriv = file_priv->driver_priv;
2561	union hl_cs_args *args = data;
2562	enum hl_cs_type cs_type = 0;
2563	u64 cs_seq = ULONG_MAX;
2564	void __user *chunks;
2565	u32 num_chunks, flags, timeout,
2566		signals_count = 0, sob_addr = 0, handle_id = 0;
2567	u16 sob_initial_count = 0;
2568	int rc;
2569
2570	rc = hl_cs_sanity_checks(hpriv, args);
2571	if (rc)
2572		goto out;
2573
2574	rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2575	if (rc)
2576		goto out;
2577
2578	cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2579					~HL_CS_FLAGS_FORCE_RESTORE);
2580	chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2581	num_chunks = args->in.num_chunks_execute;
2582	flags = args->in.cs_flags;
2583
2584	/* In case this is a staged CS, user should supply the CS sequence */
2585	if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2586			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2587		cs_seq = args->in.seq;
2588
2589	timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2590			? msecs_to_jiffies(args->in.timeout * 1000)
2591			: hpriv->hdev->timeout_jiffies;
2592
2593	switch (cs_type) {
2594	case CS_TYPE_SIGNAL:
2595	case CS_TYPE_WAIT:
2596	case CS_TYPE_COLLECTIVE_WAIT:
2597		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2598					&cs_seq, args->in.cs_flags, timeout,
2599					&sob_addr, &sob_initial_count);
2600		break;
2601	case CS_RESERVE_SIGNALS:
2602		rc = cs_ioctl_reserve_signals(hpriv,
2603					args->in.encaps_signals_q_idx,
2604					args->in.encaps_signals_count,
2605					&handle_id, &sob_addr, &signals_count);
2606		break;
2607	case CS_UNRESERVE_SIGNALS:
2608		rc = cs_ioctl_unreserve_signals(hpriv,
2609					args->in.encaps_sig_handle_id);
2610		break;
2611	case CS_TYPE_ENGINE_CORE:
2612		rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
2613				args->in.num_engine_cores, args->in.core_command);
2614		break;
2615	case CS_TYPE_ENGINES:
2616		rc = cs_ioctl_engines(hpriv, args->in.engines,
2617				args->in.num_engines, args->in.engine_command);
2618		break;
2619	case CS_TYPE_FLUSH_PCI_HBW_WRITES:
2620		rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
2621		break;
2622	default:
2623		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2624						args->in.cs_flags,
2625						args->in.encaps_sig_handle_id,
2626						timeout, &sob_initial_count);
2627		break;
2628	}
2629out:
2630	if (rc != -EAGAIN) {
2631		memset(args, 0, sizeof(*args));
2632
2633		switch (cs_type) {
2634		case CS_RESERVE_SIGNALS:
2635			args->out.handle_id = handle_id;
2636			args->out.sob_base_addr_offset = sob_addr;
2637			args->out.count = signals_count;
2638			break;
2639		case CS_TYPE_SIGNAL:
2640			args->out.sob_base_addr_offset = sob_addr;
2641			args->out.sob_count_before_submission = sob_initial_count;
2642			args->out.seq = cs_seq;
2643			break;
2644		case CS_TYPE_DEFAULT:
2645			args->out.sob_count_before_submission = sob_initial_count;
2646			args->out.seq = cs_seq;
2647			break;
2648		default:
2649			args->out.seq = cs_seq;
2650			break;
2651		}
2652
2653		args->out.status = rc;
2654	}
2655
2656	return rc;
2657}
2658
2659static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2660				enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp)
2661{
2662	struct hl_device *hdev = ctx->hdev;
2663	ktime_t timestamp_kt;
2664	long completion_rc;
2665	int rc = 0, error;
2666
2667	if (IS_ERR(fence)) {
2668		rc = PTR_ERR(fence);
2669		if (rc == -EINVAL)
2670			dev_notice_ratelimited(hdev->dev,
2671				"Can't wait on CS %llu because current CS is at seq %llu\n",
2672				seq, ctx->cs_sequence);
2673		return rc;
2674	}
2675
2676	if (!fence) {
2677		if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, &timestamp_kt, &error)) {
2678			dev_dbg(hdev->dev,
2679				"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2680				seq, ctx->cs_sequence);
2681			*status = CS_WAIT_STATUS_GONE;
2682			return 0;
2683		}
2684
2685		completion_rc = 1;
2686		goto report_results;
2687	}
2688
2689	if (!timeout_us) {
2690		completion_rc = completion_done(&fence->completion);
2691	} else {
2692		unsigned long timeout;
2693
2694		timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2695				timeout_us : usecs_to_jiffies(timeout_us);
2696		completion_rc =
2697			wait_for_completion_interruptible_timeout(
2698				&fence->completion, timeout);
2699	}
2700
2701	error = fence->error;
2702	timestamp_kt = fence->timestamp;
2703
2704report_results:
2705	if (completion_rc > 0) {
2706		*status = CS_WAIT_STATUS_COMPLETED;
2707		if (timestamp)
2708			*timestamp = ktime_to_ns(timestamp_kt);
2709	} else {
2710		*status = CS_WAIT_STATUS_BUSY;
2711	}
2712
2713	if (completion_rc == -ERESTARTSYS)
2714		rc = completion_rc;
2715	else if (error == -ETIMEDOUT || error == -EIO)
2716		rc = error;
2717
2718	return rc;
2719}
2720
2721/*
2722 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2723 *
2724 * @mcs_data: multi-CS internal data
2725 * @mcs_compl: multi-CS completion structure
2726 *
2727 * @return 0 on success, otherwise non 0 error code
2728 *
2729 * The function iterates on all CS sequence in the list and set bit in
2730 * completion_bitmap for each completed CS.
2731 * While iterating, the function sets the stream map of each fence in the fence
2732 * array in the completion QID stream map to be used by CSs to perform
2733 * completion to the multi-CS context.
2734 * This function shall be called after taking context ref
2735 */
2736static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
2737{
2738	struct hl_fence **fence_ptr = mcs_data->fence_arr;
2739	struct hl_device *hdev = mcs_data->ctx->hdev;
2740	int i, rc, arr_len = mcs_data->arr_len;
2741	u64 *seq_arr = mcs_data->seq_arr;
2742	ktime_t max_ktime, first_cs_time;
2743	enum hl_cs_wait_status status;
2744
2745	memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
2746
2747	/* get all fences under the same lock */
2748	rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2749	if (rc)
2750		return rc;
2751
2752	/*
2753	 * re-initialize the completion here to handle 2 possible cases:
2754	 * 1. CS will complete the multi-CS prior clearing the completion. in which
2755	 *    case the fence iteration is guaranteed to catch the CS completion.
2756	 * 2. the completion will occur after re-init of the completion.
2757	 *    in which case we will wake up immediately in wait_for_completion.
2758	 */
2759	reinit_completion(&mcs_compl->completion);
2760
2761	/*
2762	 * set to maximum time to verify timestamp is valid: if at the end
2763	 * this value is maintained- no timestamp was updated
2764	 */
2765	max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2766	first_cs_time = max_ktime;
2767
2768	for (i = 0; i < arr_len; i++, fence_ptr++) {
2769		struct hl_fence *fence = *fence_ptr;
2770
2771		/*
2772		 * In order to prevent case where we wait until timeout even though a CS associated
2773		 * with the multi-CS actually completed we do things in the below order:
2774		 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2775		 *    any CS can, potentially, complete the multi CS for the specific QID (note
2776		 *    that once completion is initialized, calling complete* and then wait on the
2777		 *    completion will cause it to return at once)
2778		 * 2. only after allowing multi-CS completion for the specific QID we check whether
2779		 *    the specific CS already completed (and thus the wait for completion part will
2780		 *    be skipped). if the CS not completed it is guaranteed that completing CS will
2781		 *    wake up the completion.
2782		 */
2783		if (fence)
2784			mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2785
2786		/*
2787		 * function won't sleep as it is called with timeout 0 (i.e.
2788		 * poll the fence)
2789		 */
2790		rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL);
2791		if (rc) {
2792			dev_err(hdev->dev,
2793				"wait_for_fence error :%d for CS seq %llu\n",
2794								rc, seq_arr[i]);
2795			break;
2796		}
2797
2798		switch (status) {
2799		case CS_WAIT_STATUS_BUSY:
2800			/* CS did not finished, QID to wait on already stored */
2801			break;
2802		case CS_WAIT_STATUS_COMPLETED:
2803			/*
2804			 * Using mcs_handling_done to avoid possibility of mcs_data
2805			 * returns to user indicating CS completed before it finished
2806			 * all of its mcs handling, to avoid race the next time the
2807			 * user waits for mcs.
2808			 * note: when reaching this case fence is definitely not NULL
2809			 *       but NULL check was added to overcome static analysis
2810			 */
2811			if (fence && !fence->mcs_handling_done) {
2812				/*
2813				 * in case multi CS is completed but MCS handling not done
2814				 * we "complete" the multi CS to prevent it from waiting
2815				 * until time-out and the "multi-CS handling done" will have
2816				 * another chance at the next iteration
2817				 */
2818				complete_all(&mcs_compl->completion);
2819				break;
2820			}
2821
2822			mcs_data->completion_bitmap |= BIT(i);
2823			/*
2824			 * For all completed CSs we take the earliest timestamp.
2825			 * For this we have to validate that the timestamp is
2826			 * earliest of all timestamps so far.
2827			 */
2828			if (fence && mcs_data->update_ts &&
2829					(ktime_compare(fence->timestamp, first_cs_time) < 0))
2830				first_cs_time = fence->timestamp;
2831			break;
2832		case CS_WAIT_STATUS_GONE:
2833			mcs_data->update_ts = false;
2834			mcs_data->gone_cs = true;
2835			/*
2836			 * It is possible to get an old sequence numbers from user
2837			 * which related to already completed CSs and their fences
2838			 * already gone. In this case, CS set as completed but
2839			 * no need to consider its QID for mcs completion.
2840			 */
2841			mcs_data->completion_bitmap |= BIT(i);
2842			break;
2843		default:
2844			dev_err(hdev->dev, "Invalid fence status\n");
2845			rc = -EINVAL;
2846			break;
2847		}
2848
2849	}
2850
2851	hl_fences_put(mcs_data->fence_arr, arr_len);
2852
2853	if (mcs_data->update_ts &&
2854			(ktime_compare(first_cs_time, max_ktime) != 0))
2855		mcs_data->timestamp = ktime_to_ns(first_cs_time);
2856
2857	return rc;
2858}
2859
2860static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
2861				enum hl_cs_wait_status *status, s64 *timestamp)
2862{
2863	struct hl_fence *fence;
2864	int rc = 0;
2865
2866	if (timestamp)
2867		*timestamp = 0;
2868
2869	hl_ctx_get(ctx);
2870
2871	fence = hl_ctx_get_fence(ctx, seq);
2872
2873	rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2874	hl_fence_put(fence);
2875	hl_ctx_put(ctx);
2876
2877	return rc;
2878}
2879
2880static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
2881{
2882	if (usecs <= U32_MAX)
2883		return usecs_to_jiffies(usecs);
2884
2885	/*
2886	 * If the value in nanoseconds is larger than 64 bit, use the largest
2887	 * 64 bit value.
2888	 */
2889	if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
2890		return nsecs_to_jiffies(U64_MAX);
2891
2892	return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
2893}
2894
2895/*
2896 * hl_wait_multi_cs_completion_init - init completion structure
2897 *
2898 * @hdev: pointer to habanalabs device structure
2899 * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2900 *                        master QID to wait on
2901 *
2902 * @return valid completion struct pointer on success, otherwise error pointer
2903 *
2904 * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2905 * the function gets the first available completion (by marking it "used")
2906 * and initialize its values.
2907 */
2908static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2909{
2910	struct multi_cs_completion *mcs_compl;
2911	int i;
2912
2913	/* find free multi_cs completion structure */
2914	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2915		mcs_compl = &hdev->multi_cs_completion[i];
2916		spin_lock(&mcs_compl->lock);
2917		if (!mcs_compl->used) {
2918			mcs_compl->used = 1;
2919			mcs_compl->timestamp = 0;
2920			/*
2921			 * init QID map to 0 to avoid completion by CSs. the actual QID map
2922			 * to multi-CS CSs will be set incrementally at a later stage
2923			 */
2924			mcs_compl->stream_master_qid_map = 0;
2925			spin_unlock(&mcs_compl->lock);
2926			break;
2927		}
2928		spin_unlock(&mcs_compl->lock);
2929	}
2930
2931	if (i == MULTI_CS_MAX_USER_CTX) {
2932		dev_err(hdev->dev, "no available multi-CS completion structure\n");
2933		return ERR_PTR(-ENOMEM);
2934	}
2935	return mcs_compl;
2936}
2937
2938/*
2939 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2940 *                                    unused
2941 *
2942 * @mcs_compl: pointer to the completion structure
2943 */
2944static void hl_wait_multi_cs_completion_fini(
2945					struct multi_cs_completion *mcs_compl)
2946{
2947	/*
2948	 * free completion structure, do it under lock to be in-sync with the
2949	 * thread that signals completion
2950	 */
2951	spin_lock(&mcs_compl->lock);
2952	mcs_compl->used = 0;
2953	spin_unlock(&mcs_compl->lock);
2954}
2955
2956/*
2957 * hl_wait_multi_cs_completion - wait for first CS to complete
2958 *
2959 * @mcs_data: multi-CS internal data
2960 *
2961 * @return 0 on success, otherwise non 0 error code
2962 */
2963static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
2964						struct multi_cs_completion *mcs_compl)
2965{
2966	long completion_rc;
2967
2968	completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2969									mcs_data->timeout_jiffies);
2970
2971	/* update timestamp */
2972	if (completion_rc > 0)
2973		mcs_data->timestamp = mcs_compl->timestamp;
2974
2975	if (completion_rc == -ERESTARTSYS)
2976		return completion_rc;
2977
2978	mcs_data->wait_status = completion_rc;
2979
2980	return 0;
2981}
2982
2983/*
2984 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2985 *
2986 * @hdev: pointer to habanalabs device structure
2987 */
2988void hl_multi_cs_completion_init(struct hl_device *hdev)
2989{
2990	struct multi_cs_completion *mcs_cmpl;
2991	int i;
2992
2993	for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2994		mcs_cmpl = &hdev->multi_cs_completion[i];
2995		mcs_cmpl->used = 0;
2996		spin_lock_init(&mcs_cmpl->lock);
2997		init_completion(&mcs_cmpl->completion);
2998	}
2999}
3000
3001/*
3002 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
3003 *
3004 * @hpriv: pointer to the private data of the fd
3005 * @data: pointer to multi-CS wait ioctl in/out args
3006 *
3007 */
3008static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3009{
3010	struct multi_cs_completion *mcs_compl;
3011	struct hl_device *hdev = hpriv->hdev;
3012	struct multi_cs_data mcs_data = {};
3013	union hl_wait_cs_args *args = data;
3014	struct hl_ctx *ctx = hpriv->ctx;
3015	struct hl_fence **fence_arr;
3016	void __user *seq_arr;
3017	u32 size_to_copy;
3018	u64 *cs_seq_arr;
3019	u8 seq_arr_len;
3020	int rc, i;
3021
3022	for (i = 0 ; i < sizeof(args->in.pad) ; i++)
3023		if (args->in.pad[i]) {
3024			dev_dbg(hdev->dev, "Padding bytes must be 0\n");
3025			return -EINVAL;
3026		}
3027
3028	if (!hdev->supports_wait_for_multi_cs) {
3029		dev_err(hdev->dev, "Wait for multi CS is not supported\n");
3030		return -EPERM;
3031	}
3032
3033	seq_arr_len = args->in.seq_arr_len;
3034
3035	if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
3036		dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
3037				HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
3038		return -EINVAL;
3039	}
3040
3041	/* allocate memory for sequence array */
3042	cs_seq_arr =
3043		kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
3044	if (!cs_seq_arr)
3045		return -ENOMEM;
3046
3047	/* copy CS sequence array from user */
3048	seq_arr = (void __user *) (uintptr_t) args->in.seq;
3049	size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
3050	if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
3051		dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
3052		rc = -EFAULT;
3053		goto free_seq_arr;
3054	}
3055
3056	/* allocate array for the fences */
3057	fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
3058	if (!fence_arr) {
3059		rc = -ENOMEM;
3060		goto free_seq_arr;
3061	}
3062
3063	/* initialize the multi-CS internal data */
3064	mcs_data.ctx = ctx;
3065	mcs_data.seq_arr = cs_seq_arr;
3066	mcs_data.fence_arr = fence_arr;
3067	mcs_data.arr_len = seq_arr_len;
3068
3069	hl_ctx_get(ctx);
3070
3071	/* wait (with timeout) for the first CS to be completed */
3072	mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
3073	mcs_compl = hl_wait_multi_cs_completion_init(hdev);
3074	if (IS_ERR(mcs_compl)) {
3075		rc = PTR_ERR(mcs_compl);
3076		goto put_ctx;
3077	}
3078
3079	/* poll all CS fences, extract timestamp */
3080	mcs_data.update_ts = true;
3081	rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
3082	/*
3083	 * skip wait for CS completion when one of the below is true:
3084	 * - an error on the poll function
3085	 * - one or more CS in the list completed
3086	 * - the user called ioctl with timeout 0
3087	 */
3088	if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
3089		goto completion_fini;
3090
3091	while (true) {
3092		rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
3093		if (rc || (mcs_data.wait_status == 0))
3094			break;
3095
3096		/*
3097		 * poll fences once again to update the CS map.
3098		 * no timestamp should be updated this time.
3099		 */
3100		mcs_data.update_ts = false;
3101		rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
3102
3103		if (rc || mcs_data.completion_bitmap)
3104			break;
3105
3106		/*
3107		 * if hl_wait_multi_cs_completion returned before timeout (i.e.
3108		 * it got a completion) it either got completed by CS in the multi CS list
3109		 * (in which case the indication will be non empty completion_bitmap) or it
3110		 * got completed by CS submitted to one of the shared stream master but
3111		 * not in the multi CS list (in which case we should wait again but modify
3112		 * the timeout and set timestamp as zero to let a CS related to the current
3113		 * multi-CS set a new, relevant, timestamp)
3114		 */
3115		mcs_data.timeout_jiffies = mcs_data.wait_status;
3116		mcs_compl->timestamp = 0;
3117	}
3118
3119completion_fini:
3120	hl_wait_multi_cs_completion_fini(mcs_compl);
3121
3122put_ctx:
3123	hl_ctx_put(ctx);
3124	kfree(fence_arr);
3125
3126free_seq_arr:
3127	kfree(cs_seq_arr);
3128
3129	if (rc == -ERESTARTSYS) {
3130		dev_err_ratelimited(hdev->dev,
3131				"user process got signal while waiting for Multi-CS\n");
3132		rc = -EINTR;
3133	}
3134
3135	if (rc)
3136		return rc;
3137
3138	/* update output args */
3139	memset(args, 0, sizeof(*args));
3140
3141	if (mcs_data.completion_bitmap) {
3142		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3143		args->out.cs_completion_map = mcs_data.completion_bitmap;
3144
3145		/* if timestamp not 0- it's valid */
3146		if (mcs_data.timestamp) {
3147			args->out.timestamp_nsec = mcs_data.timestamp;
3148			args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3149		}
3150
3151		/* update if some CS was gone */
3152		if (!mcs_data.timestamp)
3153			args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3154	} else {
3155		args->out.status = HL_WAIT_CS_STATUS_BUSY;
3156	}
3157
3158	return 0;
3159}
3160
3161static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3162{
3163	struct hl_device *hdev = hpriv->hdev;
3164	union hl_wait_cs_args *args = data;
3165	enum hl_cs_wait_status status;
3166	u64 seq = args->in.seq;
3167	s64 timestamp;
3168	int rc;
3169
3170	rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, &timestamp);
3171
3172	if (rc == -ERESTARTSYS) {
3173		dev_err_ratelimited(hdev->dev,
3174			"user process got signal while waiting for CS handle %llu\n",
3175			seq);
3176		return -EINTR;
3177	}
3178
3179	memset(args, 0, sizeof(*args));
3180
3181	if (rc) {
3182		if (rc == -ETIMEDOUT) {
3183			dev_err_ratelimited(hdev->dev,
3184				"CS %llu has timed-out while user process is waiting for it\n",
3185				seq);
3186			args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
3187		} else if (rc == -EIO) {
3188			dev_err_ratelimited(hdev->dev,
3189				"CS %llu has been aborted while user process is waiting for it\n",
3190				seq);
3191			args->out.status = HL_WAIT_CS_STATUS_ABORTED;
3192		}
3193		return rc;
3194	}
3195
3196	if (timestamp) {
3197		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3198		args->out.timestamp_nsec = timestamp;
3199	}
3200
3201	switch (status) {
3202	case CS_WAIT_STATUS_GONE:
3203		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3204		fallthrough;
3205	case CS_WAIT_STATUS_COMPLETED:
3206		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3207		break;
3208	case CS_WAIT_STATUS_BUSY:
3209	default:
3210		args->out.status = HL_WAIT_CS_STATUS_BUSY;
3211		break;
3212	}
3213
3214	return 0;
3215}
3216
3217static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
3218					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
3219{
3220	record->ts_reg_info.cq_cb = cq_cb;
3221	record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
3222	record->cq_target_value = target_value;
3223}
3224
3225static int validate_and_get_ts_record(struct device *dev,
3226					struct hl_ts_buff *ts_buff, u64 ts_offset,
3227					struct hl_user_pending_interrupt **req_event_record)
3228{
3229	struct hl_user_pending_interrupt *ts_cb_last;
3230
3231	*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3232						ts_offset;
3233	ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3234			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
3235
3236	/* Validate ts_offset not exceeding last max */
3237	if (*req_event_record >= ts_cb_last) {
3238		dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
3239				ts_offset, (u64)(uintptr_t)ts_cb_last);
3240		return -EINVAL;
3241	}
3242
3243	return 0;
3244}
3245
3246static void unregister_timestamp_node(struct hl_device *hdev,
3247			struct hl_user_pending_interrupt *record, bool need_lock)
3248{
3249	struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
3250	bool ts_rec_found = false;
3251	unsigned long flags;
3252
3253	if (need_lock)
3254		spin_lock_irqsave(&interrupt->ts_list_lock, flags);
3255
3256	if (record->ts_reg_info.in_use) {
3257		record->ts_reg_info.in_use = false;
3258		list_del(&record->list_node);
3259		ts_rec_found = true;
3260	}
3261
3262	if (need_lock)
3263		spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
3264
3265	/* Put refcounts that were taken when we registered the event */
3266	if (ts_rec_found) {
3267		hl_mmap_mem_buf_put(record->ts_reg_info.buf);
3268		hl_cb_put(record->ts_reg_info.cq_cb);
3269	}
3270}
3271
3272static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
3273					struct wait_interrupt_data *data, unsigned long *flags,
3274					struct hl_user_pending_interrupt **pend)
3275{
3276	struct hl_user_pending_interrupt *req_offset_record;
3277	struct hl_ts_buff *ts_buff = data->buf->private;
3278	bool need_lock = false;
3279	int rc;
3280
3281	rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
3282									&req_offset_record);
3283	if (rc)
3284		return rc;
3285
3286	/* In case the node already registered, need to unregister first then re-use */
3287	if (req_offset_record->ts_reg_info.in_use) {
3288		dev_dbg(data->buf->mmg->dev,
3289				"Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
3290				req_offset_record,
3291				req_offset_record->ts_reg_info.interrupt->interrupt_id,
3292				req_offset_record->ts_reg_info.timestamp_kernel_addr,
3293				data->interrupt->interrupt_id);
3294		/*
3295		 * Since interrupt here can be different than the one the node currently registered
3296		 * on, and we don't want to lock two lists while we're doing unregister, so
3297		 * unlock the new interrupt wait list here and acquire the lock again after you done
3298		 */
3299		if (data->interrupt->interrupt_id !=
3300				req_offset_record->ts_reg_info.interrupt->interrupt_id) {
3301
3302			need_lock = true;
3303			spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
3304		}
3305
3306		unregister_timestamp_node(hdev, req_offset_record, need_lock);
3307
3308		if (need_lock)
3309			spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
3310	}
3311
3312	/* Fill up the new registration node info and add it to the list */
3313	req_offset_record->ts_reg_info.in_use = true;
3314	req_offset_record->ts_reg_info.buf = data->buf;
3315	req_offset_record->ts_reg_info.timestamp_kernel_addr =
3316			(u64 *) ts_buff->user_buff_address + data->ts_offset;
3317	req_offset_record->ts_reg_info.interrupt = data->interrupt;
3318	set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
3319						data->target_value);
3320
3321	*pend = req_offset_record;
3322
3323	return rc;
3324}
3325
3326static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3327				struct wait_interrupt_data *data,
3328				u32 *status, u64 *timestamp)
3329{
3330	struct hl_user_pending_interrupt *pend;
3331	unsigned long flags;
3332	int rc = 0;
3333
3334	hl_ctx_get(ctx);
3335
3336	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3337	if (!data->cq_cb) {
3338		rc = -EINVAL;
3339		goto put_ctx;
3340	}
3341
3342	/* Validate the cq offset */
3343	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3344			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3345		rc = -EINVAL;
3346		goto put_cq_cb;
3347	}
3348
3349	dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
3350					data->interrupt->interrupt_id, data->ts_handle,
3351					data->ts_offset, data->cq_offset);
3352
3353	data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
3354	if (!data->buf) {
3355		rc = -EINVAL;
3356		goto put_cq_cb;
3357	}
3358
3359	spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
3360
3361	/* get ts buffer record */
3362	rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
3363	if (rc) {
3364		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3365		goto put_ts_buff;
3366	}
3367
3368	/* We check for completion value as interrupt could have been received
3369	 * before we add the timestamp node to the ts list.
3370	 */
3371	if (*pend->cq_kernel_addr >= data->target_value) {
3372		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3373
3374		dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
3375				pend, data->ts_offset, data->interrupt->interrupt_id);
3376
3377		pend->ts_reg_info.in_use = 0;
3378		*status = HL_WAIT_CS_STATUS_COMPLETED;
3379		*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
3380
3381		goto put_ts_buff;
3382	}
3383
3384	list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
3385	spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3386
3387	rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
3388
3389	hl_ctx_put(ctx);
3390
3391	return rc;
3392
3393put_ts_buff:
3394	hl_mmap_mem_buf_put(data->buf);
3395put_cq_cb:
3396	hl_cb_put(data->cq_cb);
3397put_ctx:
3398	hl_ctx_put(ctx);
3399
3400	return rc;
3401}
3402
3403static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3404				struct wait_interrupt_data *data,
3405				u32 *status, u64 *timestamp)
3406{
3407	struct hl_user_pending_interrupt *pend;
3408	unsigned long timeout, flags;
3409	long completion_rc;
3410	int rc = 0;
3411
3412	timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
3413
3414	hl_ctx_get(ctx);
3415
3416	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3417	if (!data->cq_cb) {
3418		rc = -EINVAL;
3419		goto put_ctx;
3420	}
3421
3422	/* Validate the cq offset */
3423	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3424			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3425		rc = -EINVAL;
3426		goto put_cq_cb;
3427	}
3428
3429	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3430	if (!pend) {
3431		rc = -ENOMEM;
3432		goto put_cq_cb;
3433	}
3434
3435	hl_fence_init(&pend->fence, ULONG_MAX);
3436	pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
3437	pend->cq_target_value = data->target_value;
3438	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3439
3440
3441	/* We check for completion value as interrupt could have been received
3442	 * before we add the wait node to the wait list.
3443	 */
3444	if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
3445		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3446
3447		if (*pend->cq_kernel_addr >= data->target_value)
3448			*status = HL_WAIT_CS_STATUS_COMPLETED;
3449		else
3450			*status = HL_WAIT_CS_STATUS_BUSY;
3451
3452		pend->fence.timestamp = ktime_get();
3453		goto set_timestamp;
3454	}
3455
3456	/* Add pending user interrupt to relevant list for the interrupt
3457	 * handler to monitor.
3458	 * Note that we cannot have sorted list by target value,
3459	 * in order to shorten the list pass loop, since
3460	 * same list could have nodes for different cq counter handle.
3461	 */
3462	list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
3463	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3464
3465	/* Wait for interrupt handler to signal completion */
3466	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3467								timeout);
3468	if (completion_rc > 0) {
3469		if (pend->fence.error == -EIO) {
3470			dev_err_ratelimited(hdev->dev,
3471					"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3472					pend->fence.error);
3473			rc = -EIO;
3474			*status = HL_WAIT_CS_STATUS_ABORTED;
3475		} else {
3476			*status = HL_WAIT_CS_STATUS_COMPLETED;
3477		}
3478	} else {
3479		if (completion_rc == -ERESTARTSYS) {
3480			dev_err_ratelimited(hdev->dev,
3481					"user process got signal while waiting for interrupt ID %d\n",
3482					data->interrupt->interrupt_id);
3483			rc = -EINTR;
3484			*status = HL_WAIT_CS_STATUS_ABORTED;
3485		} else {
3486			/* The wait has timed-out. We don't know anything beyond that
3487			 * because the workload was not submitted through the driver.
3488			 * Therefore, from driver's perspective, the workload is still
3489			 * executing.
3490			 */
3491			rc = 0;
3492			*status = HL_WAIT_CS_STATUS_BUSY;
3493		}
3494	}
3495
3496	/*
3497	 * We keep removing the node from list here, and not at the irq handler
3498	 * for completion timeout case. and if it's a registration
3499	 * for ts record, the node will be deleted in the irq handler after
3500	 * we reach the target value.
3501	 */
3502	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3503	list_del(&pend->list_node);
3504	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3505
3506set_timestamp:
3507	*timestamp = ktime_to_ns(pend->fence.timestamp);
3508	kfree(pend);
3509	hl_cb_put(data->cq_cb);
3510	hl_ctx_put(ctx);
3511
3512	return rc;
3513
3514put_cq_cb:
3515	hl_cb_put(data->cq_cb);
3516put_ctx:
3517	hl_ctx_put(ctx);
3518
3519	return rc;
3520}
3521
3522static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
3523				u64 timeout_us, u64 user_address,
3524				u64 target_value, struct hl_user_interrupt *interrupt,
3525				u32 *status,
3526				u64 *timestamp)
3527{
3528	struct hl_user_pending_interrupt *pend;
3529	unsigned long timeout, flags;
3530	u64 completion_value;
3531	long completion_rc;
3532	int rc = 0;
3533
3534	timeout = hl_usecs64_to_jiffies(timeout_us);
3535
3536	hl_ctx_get(ctx);
3537
3538	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3539	if (!pend) {
3540		hl_ctx_put(ctx);
3541		return -ENOMEM;
3542	}
3543
3544	hl_fence_init(&pend->fence, ULONG_MAX);
3545
3546	/* Add pending user interrupt to relevant list for the interrupt
3547	 * handler to monitor
3548	 */
3549	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3550	list_add_tail(&pend->list_node, &interrupt->wait_list_head);
3551	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3552
3553	/* We check for completion value as interrupt could have been received
3554	 * before we added the node to the wait list
3555	 */
3556	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3557		dev_err(hdev->dev, "Failed to copy completion value from user\n");
3558		rc = -EFAULT;
3559		goto remove_pending_user_interrupt;
3560	}
3561
3562	if (completion_value >= target_value) {
3563		*status = HL_WAIT_CS_STATUS_COMPLETED;
3564		/* There was no interrupt, we assume the completion is now. */
3565		pend->fence.timestamp = ktime_get();
3566	} else {
3567		*status = HL_WAIT_CS_STATUS_BUSY;
3568	}
3569
3570	if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
3571		goto remove_pending_user_interrupt;
3572
3573wait_again:
3574	/* Wait for interrupt handler to signal completion */
3575	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3576										timeout);
3577
3578	/* If timeout did not expire we need to perform the comparison.
3579	 * If comparison fails, keep waiting until timeout expires
3580	 */
3581	if (completion_rc > 0) {
3582		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3583		/* reinit_completion must be called before we check for user
3584		 * completion value, otherwise, if interrupt is received after
3585		 * the comparison and before the next wait_for_completion,
3586		 * we will reach timeout and fail
3587		 */
3588		reinit_completion(&pend->fence.completion);
3589		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3590
3591		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3592			dev_err(hdev->dev, "Failed to copy completion value from user\n");
3593			rc = -EFAULT;
3594
3595			goto remove_pending_user_interrupt;
3596		}
3597
3598		if (completion_value >= target_value) {
3599			*status = HL_WAIT_CS_STATUS_COMPLETED;
3600		} else if (pend->fence.error) {
3601			dev_err_ratelimited(hdev->dev,
3602				"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3603				pend->fence.error);
3604			/* set the command completion status as ABORTED */
3605			*status = HL_WAIT_CS_STATUS_ABORTED;
3606		} else {
3607			timeout = completion_rc;
3608			goto wait_again;
3609		}
3610	} else if (completion_rc == -ERESTARTSYS) {
3611		dev_err_ratelimited(hdev->dev,
3612			"user process got signal while waiting for interrupt ID %d\n",
3613			interrupt->interrupt_id);
3614		rc = -EINTR;
3615	} else {
3616		/* The wait has timed-out. We don't know anything beyond that
3617		 * because the workload wasn't submitted through the driver.
3618		 * Therefore, from driver's perspective, the workload is still
3619		 * executing.
3620		 */
3621		rc = 0;
3622		*status = HL_WAIT_CS_STATUS_BUSY;
3623	}
3624
3625remove_pending_user_interrupt:
3626	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3627	list_del(&pend->list_node);
3628	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3629
3630	*timestamp = ktime_to_ns(pend->fence.timestamp);
3631
3632	kfree(pend);
3633	hl_ctx_put(ctx);
3634
3635	return rc;
3636}
3637
3638static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3639{
3640	u16 interrupt_id, first_interrupt, last_interrupt;
3641	struct hl_device *hdev = hpriv->hdev;
3642	struct asic_fixed_properties *prop;
3643	struct hl_user_interrupt *interrupt;
3644	union hl_wait_cs_args *args = data;
3645	u32 status = HL_WAIT_CS_STATUS_BUSY;
3646	u64 timestamp = 0;
3647	int rc, int_idx;
3648
3649	prop = &hdev->asic_prop;
3650
3651	if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) {
3652		dev_err(hdev->dev, "no user interrupts allowed");
3653		return -EPERM;
3654	}
3655
3656	interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3657
3658	first_interrupt = prop->first_available_user_interrupt;
3659	last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1;
3660
3661	if (interrupt_id < prop->user_dec_intr_count) {
3662
3663		/* Check if the requested core is enabled */
3664		if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) {
3665			dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed",
3666				interrupt_id);
3667			return -EINVAL;
3668		}
3669
3670		interrupt = &hdev->user_interrupt[interrupt_id];
3671
3672	} else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) {
3673
3674		int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count;
3675		interrupt = &hdev->user_interrupt[int_idx];
3676
3677	} else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) {
3678		interrupt = &hdev->common_user_cq_interrupt;
3679	} else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) {
3680		interrupt = &hdev->common_decoder_interrupt;
3681	} else {
3682		dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3683		return -EINVAL;
3684	}
3685
3686	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
3687		struct wait_interrupt_data wait_intr_data = {0};
3688
3689		wait_intr_data.interrupt = interrupt;
3690		wait_intr_data.mmg = &hpriv->mem_mgr;
3691		wait_intr_data.cq_handle = args->in.cq_counters_handle;
3692		wait_intr_data.cq_offset = args->in.cq_counters_offset;
3693		wait_intr_data.ts_handle = args->in.timestamp_handle;
3694		wait_intr_data.ts_offset = args->in.timestamp_offset;
3695		wait_intr_data.target_value = args->in.target;
3696		wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
3697
3698		if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
3699			/*
3700			 * Allow only one registration at a time. this is needed in order to prevent
3701			 * issues while handling the flow of re-use of the same offset.
3702			 * Since the registration flow is protected only by the interrupt lock,
3703			 * re-use flow might request to move ts node to another interrupt list,
3704			 * and in such case we're not protected.
3705			 */
3706			mutex_lock(&hpriv->ctx->ts_reg_lock);
3707
3708			rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3709						&status, &timestamp);
3710
3711			mutex_unlock(&hpriv->ctx->ts_reg_lock);
3712		} else
3713			rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3714						&status, &timestamp);
3715	} else {
3716		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3717				args->in.interrupt_timeout_us, args->in.addr,
3718				args->in.target, interrupt, &status,
3719				&timestamp);
3720	}
3721
3722	if (rc)
3723		return rc;
3724
3725	memset(args, 0, sizeof(*args));
3726	args->out.status = status;
3727
3728	if (timestamp) {
3729		args->out.timestamp_nsec = timestamp;
3730		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3731	}
3732
3733	return 0;
3734}
3735
3736int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
3737{
3738	struct hl_fpriv *hpriv = file_priv->driver_priv;
3739	struct hl_device *hdev = hpriv->hdev;
3740	union hl_wait_cs_args *args = data;
3741	u32 flags = args->in.flags;
3742	int rc;
3743
3744	/* If the device is not operational, or if an error has happened and user should release the
3745	 * device, there is no point in waiting for any command submission or user interrupt.
3746	 */
3747	if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active)
3748		return -EBUSY;
3749
3750	if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
3751		rc = hl_interrupt_wait_ioctl(hpriv, data);
3752	else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
3753		rc = hl_multi_cs_wait_ioctl(hpriv, data);
3754	else
3755		rc = hl_cs_wait_ioctl(hpriv, data);
3756
3757	return rc;
3758}