amdgpu_gfx.c - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c - Linux source code v3.1

Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include <linux/firmware.h>
  27#include <linux/pm_runtime.h>
  28
  29#include "amdgpu.h"
  30#include "amdgpu_gfx.h"
  31#include "amdgpu_rlc.h"
  32#include "amdgpu_ras.h"
  33#include "amdgpu_reset.h"
  34#include "amdgpu_xcp.h"
  35#include "amdgpu_xgmi.h"
  36
  37/* delay 0.1 second to enable gfx off feature */
  38#define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
  39
  40#define GFX_OFF_NO_DELAY 0
  41
  42/*
  43 * GPU GFX IP block helpers function.
  44 */
  45
  46int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
  47				int pipe, int queue)
  48{
  49	int bit = 0;
  50
  51	bit += mec * adev->gfx.mec.num_pipe_per_mec
  52		* adev->gfx.mec.num_queue_per_pipe;
  53	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
  54	bit += queue;
  55
  56	return bit;
  57}
  58
  59void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
  60				 int *mec, int *pipe, int *queue)
  61{
  62	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
  63	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
  64		% adev->gfx.mec.num_pipe_per_mec;
  65	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
  66	       / adev->gfx.mec.num_pipe_per_mec;
  67
  68}
  69
  70bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
  71				     int xcc_id, int mec, int pipe, int queue)
  72{
  73	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
  74			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
  75}
  76
  77int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
  78			       int me, int pipe, int queue)
  79{
  80	int bit = 0;
  81
  82	bit += me * adev->gfx.me.num_pipe_per_me
  83		* adev->gfx.me.num_queue_per_pipe;
  84	bit += pipe * adev->gfx.me.num_queue_per_pipe;
  85	bit += queue;
  86
  87	return bit;
  88}
  89
  90bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
  91				    int me, int pipe, int queue)
  92{
  93	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
  94			adev->gfx.me.queue_bitmap);
  95}
  96
  97/**
  98 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
  99 *
 100 * @mask: array in which the per-shader array disable masks will be stored
 101 * @max_se: number of SEs
 102 * @max_sh: number of SHs
 103 *
 104 * The bitmask of CUs to be disabled in the shader array determined by se and
 105 * sh is stored in mask[se * max_sh + sh].
 106 */
 107void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
 108{
 109	unsigned int se, sh, cu;
 110	const char *p;
 111
 112	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
 113
 114	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
 115		return;
 116
 117	p = amdgpu_disable_cu;
 118	for (;;) {
 119		char *next;
 120		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
 121
 122		if (ret < 3) {
 123			DRM_ERROR("amdgpu: could not parse disable_cu\n");
 124			return;
 125		}
 126
 127		if (se < max_se && sh < max_sh && cu < 16) {
 128			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
 129			mask[se * max_sh + sh] |= 1u << cu;
 130		} else {
 131			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
 132				  se, sh, cu);
 133		}
 134
 135		next = strchr(p, ',');
 136		if (!next)
 137			break;
 138		p = next + 1;
 139	}
 140}
 141
 142static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
 143{
 144	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
 145}
 146
 147static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
 148{
 149	if (amdgpu_compute_multipipe != -1) {
 150		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
 151			 amdgpu_compute_multipipe);
 152		return amdgpu_compute_multipipe == 1;
 153	}
 154
 155	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
 156		return true;
 157
 158	/* FIXME: spreading the queues across pipes causes perf regressions
 159	 * on POLARIS11 compute workloads */
 160	if (adev->asic_type == CHIP_POLARIS11)
 161		return false;
 162
 163	return adev->gfx.mec.num_mec > 1;
 164}
 165
 166bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
 167						struct amdgpu_ring *ring)
 168{
 169	int queue = ring->queue;
 170	int pipe = ring->pipe;
 171
 172	/* Policy: use pipe1 queue0 as high priority graphics queue if we
 173	 * have more than one gfx pipe.
 174	 */
 175	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
 176	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
 177		int me = ring->me;
 178		int bit;
 179
 180		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
 181		if (ring == &adev->gfx.gfx_ring[bit])
 182			return true;
 183	}
 184
 185	return false;
 186}
 187
 188bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 189					       struct amdgpu_ring *ring)
 190{
 191	/* Policy: use 1st queue as high priority compute queue if we
 192	 * have more than one compute queue.
 193	 */
 194	if (adev->gfx.num_compute_rings > 1 &&
 195	    ring == &adev->gfx.compute_ring[0])
 196		return true;
 197
 198	return false;
 199}
 200
 201void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 202{
 203	int i, j, queue, pipe;
 204	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
 205	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
 206				     adev->gfx.mec.num_queue_per_pipe,
 207				     adev->gfx.num_compute_rings);
 208	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
 209
 210	if (multipipe_policy) {
 211		/* policy: make queues evenly cross all pipes on MEC1 only
 212		 * for multiple xcc, just use the original policy for simplicity */
 213		for (j = 0; j < num_xcc; j++) {
 214			for (i = 0; i < max_queues_per_mec; i++) {
 215				pipe = i % adev->gfx.mec.num_pipe_per_mec;
 216				queue = (i / adev->gfx.mec.num_pipe_per_mec) %
 217					 adev->gfx.mec.num_queue_per_pipe;
 218
 219				set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
 220					adev->gfx.mec_bitmap[j].queue_bitmap);
 221			}
 222		}
 223	} else {
 224		/* policy: amdgpu owns all queues in the given pipe */
 225		for (j = 0; j < num_xcc; j++) {
 226			for (i = 0; i < max_queues_per_mec; ++i)
 227				set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
 228		}
 229	}
 230
 231	for (j = 0; j < num_xcc; j++) {
 232		dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
 233			bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
 234	}
 235}
 236
 237void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 238{
 239	int i, queue, pipe;
 240	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
 241	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
 242					adev->gfx.me.num_queue_per_pipe;
 243
 244	if (multipipe_policy) {
 245		/* policy: amdgpu owns the first queue per pipe at this stage
 246		 * will extend to mulitple queues per pipe later */
 247		for (i = 0; i < max_queues_per_me; i++) {
 248			pipe = i % adev->gfx.me.num_pipe_per_me;
 249			queue = (i / adev->gfx.me.num_pipe_per_me) %
 250				adev->gfx.me.num_queue_per_pipe;
 251
 252			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
 253				adev->gfx.me.queue_bitmap);
 254		}
 255	} else {
 256		for (i = 0; i < max_queues_per_me; ++i)
 257			set_bit(i, adev->gfx.me.queue_bitmap);
 258	}
 259
 260	/* update the number of active graphics rings */
 261	adev->gfx.num_gfx_rings =
 262		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
 263}
 264
 265static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 266				  struct amdgpu_ring *ring, int xcc_id)
 267{
 268	int queue_bit;
 269	int mec, pipe, queue;
 270
 271	queue_bit = adev->gfx.mec.num_mec
 272		    * adev->gfx.mec.num_pipe_per_mec
 273		    * adev->gfx.mec.num_queue_per_pipe;
 274
 275	while (--queue_bit >= 0) {
 276		if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
 277			continue;
 278
 279		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
 280
 281		/*
 282		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
 283		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
 284		 * only can be issued on queue 0.
 285		 */
 286		if ((mec == 1 && pipe > 1) || queue != 0)
 287			continue;
 288
 289		ring->me = mec + 1;
 290		ring->pipe = pipe;
 291		ring->queue = queue;
 292
 293		return 0;
 294	}
 295
 296	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
 297	return -EINVAL;
 298}
 299
 300int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
 301{
 302	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 303	struct amdgpu_irq_src *irq = &kiq->irq;
 304	struct amdgpu_ring *ring = &kiq->ring;
 305	int r = 0;
 306
 307	spin_lock_init(&kiq->ring_lock);
 308
 309	ring->adev = NULL;
 310	ring->ring_obj = NULL;
 311	ring->use_doorbell = true;
 312	ring->xcc_id = xcc_id;
 313	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
 314	ring->doorbell_index =
 315		(adev->doorbell_index.kiq +
 316		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
 317		<< 1;
 318
 319	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
 320	if (r)
 321		return r;
 322
 323	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 324	ring->no_scheduler = true;
 325	snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
 326		 (unsigned char)xcc_id, (unsigned char)ring->me,
 327		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
 328	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
 329			     AMDGPU_RING_PRIO_DEFAULT, NULL);
 330	if (r)
 331		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 332
 333	return r;
 334}
 335
 336void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 337{
 338	amdgpu_ring_fini(ring);
 339}
 340
 341void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
 342{
 343	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 344
 345	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
 346}
 347
 348int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
 349			unsigned int hpd_size, int xcc_id)
 350{
 351	int r;
 352	u32 *hpd;
 353	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 354
 355	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
 356				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
 357				    &kiq->eop_gpu_addr, (void **)&hpd);
 358	if (r) {
 359		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
 360		return r;
 361	}
 362
 363	memset(hpd, 0, hpd_size);
 364
 365	r = amdgpu_bo_reserve(kiq->eop_obj, true);
 366	if (unlikely(r != 0))
 367		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 368	amdgpu_bo_kunmap(kiq->eop_obj);
 369	amdgpu_bo_unreserve(kiq->eop_obj);
 370
 371	return 0;
 372}
 373
 374/* create MQD for each compute/gfx queue */
 375int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 376			   unsigned int mqd_size, int xcc_id)
 377{
 378	int r, i, j;
 379	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 380	struct amdgpu_ring *ring = &kiq->ring;
 381	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
 382
 383#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
 384	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
 385	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
 386		domain |= AMDGPU_GEM_DOMAIN_VRAM;
 387#endif
 388
 389	/* create MQD for KIQ */
 390	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
 391		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
 392		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
 393		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
 394		 * KIQ MQD no matter SRIOV or Bare-metal
 395		 */
 396		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 397					    AMDGPU_GEM_DOMAIN_VRAM |
 398					    AMDGPU_GEM_DOMAIN_GTT,
 399					    &ring->mqd_obj,
 400					    &ring->mqd_gpu_addr,
 401					    &ring->mqd_ptr);
 402		if (r) {
 403			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
 404			return r;
 405		}
 406
 407		/* prepare MQD backup */
 408		kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
 409		if (!kiq->mqd_backup) {
 410			dev_warn(adev->dev,
 411				 "no memory to create MQD backup for ring %s\n", ring->name);
 412			return -ENOMEM;
 413		}
 414	}
 415
 416	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
 417		/* create MQD for each KGQ */
 418		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 419			ring = &adev->gfx.gfx_ring[i];
 420			if (!ring->mqd_obj) {
 421				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 422							    domain, &ring->mqd_obj,
 423							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 424				if (r) {
 425					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 426					return r;
 427				}
 428
 429				ring->mqd_size = mqd_size;
 430				/* prepare MQD backup */
 431				adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
 432				if (!adev->gfx.me.mqd_backup[i]) {
 433					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
 434					return -ENOMEM;
 435				}
 436			}
 437		}
 438	}
 439
 440	/* create MQD for each KCQ */
 441	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 442		j = i + xcc_id * adev->gfx.num_compute_rings;
 443		ring = &adev->gfx.compute_ring[j];
 444		if (!ring->mqd_obj) {
 445			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 446						    domain, &ring->mqd_obj,
 447						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 448			if (r) {
 449				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 450				return r;
 451			}
 452
 453			ring->mqd_size = mqd_size;
 454			/* prepare MQD backup */
 455			adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
 456			if (!adev->gfx.mec.mqd_backup[j]) {
 457				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
 458				return -ENOMEM;
 459			}
 460		}
 461	}
 462
 463	return 0;
 464}
 465
 466void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
 467{
 468	struct amdgpu_ring *ring = NULL;
 469	int i, j;
 470	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 471
 472	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
 473		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 474			ring = &adev->gfx.gfx_ring[i];
 475			kfree(adev->gfx.me.mqd_backup[i]);
 476			amdgpu_bo_free_kernel(&ring->mqd_obj,
 477					      &ring->mqd_gpu_addr,
 478					      &ring->mqd_ptr);
 479		}
 480	}
 481
 482	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 483		j = i + xcc_id * adev->gfx.num_compute_rings;
 484		ring = &adev->gfx.compute_ring[j];
 485		kfree(adev->gfx.mec.mqd_backup[j]);
 486		amdgpu_bo_free_kernel(&ring->mqd_obj,
 487				      &ring->mqd_gpu_addr,
 488				      &ring->mqd_ptr);
 489	}
 490
 491	ring = &kiq->ring;
 492	kfree(kiq->mqd_backup);
 493	amdgpu_bo_free_kernel(&ring->mqd_obj,
 494			      &ring->mqd_gpu_addr,
 495			      &ring->mqd_ptr);
 496}
 497
 498int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
 499{
 500	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 501	struct amdgpu_ring *kiq_ring = &kiq->ring;
 502	int i, r = 0;
 503	int j;
 504
 505	if (adev->enable_mes) {
 506		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 507			j = i + xcc_id * adev->gfx.num_compute_rings;
 508			amdgpu_mes_unmap_legacy_queue(adev,
 509						   &adev->gfx.compute_ring[j],
 510						   RESET_QUEUES, 0, 0);
 511		}
 512		return 0;
 513	}
 514
 515	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 516		return -EINVAL;
 517
 518	if (!kiq_ring->sched.ready || adev->job_hang || amdgpu_in_reset(adev))
 519		return 0;
 520
 521	spin_lock(&kiq->ring_lock);
 522	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
 523					adev->gfx.num_compute_rings)) {
 524		spin_unlock(&kiq->ring_lock);
 525		return -ENOMEM;
 526	}
 527
 528	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 529		j = i + xcc_id * adev->gfx.num_compute_rings;
 530		kiq->pmf->kiq_unmap_queues(kiq_ring,
 531					   &adev->gfx.compute_ring[j],
 532					   RESET_QUEUES, 0, 0);
 533	}
 534	/* Submit unmap queue packet */
 535	amdgpu_ring_commit(kiq_ring);
 536	/*
 537	 * Ring test will do a basic scratch register change check. Just run
 538	 * this to ensure that unmap queues that is submitted before got
 539	 * processed successfully before returning.
 540	 */
 541	r = amdgpu_ring_test_helper(kiq_ring);
 542
 543	spin_unlock(&kiq->ring_lock);
 544
 545	return r;
 546}
 547
 548int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
 549{
 550	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 551	struct amdgpu_ring *kiq_ring = &kiq->ring;
 552	int i, r = 0;
 553	int j;
 554
 555	if (adev->enable_mes) {
 556		if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 557			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 558				j = i + xcc_id * adev->gfx.num_gfx_rings;
 559				amdgpu_mes_unmap_legacy_queue(adev,
 560						      &adev->gfx.gfx_ring[j],
 561						      PREEMPT_QUEUES, 0, 0);
 562			}
 563		}
 564		return 0;
 565	}
 566
 567	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 568		return -EINVAL;
 569
 570	if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
 571		return 0;
 572
 573	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 574		spin_lock(&kiq->ring_lock);
 575		if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
 576						adev->gfx.num_gfx_rings)) {
 577			spin_unlock(&kiq->ring_lock);
 578			return -ENOMEM;
 579		}
 580
 581		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 582			j = i + xcc_id * adev->gfx.num_gfx_rings;
 583			kiq->pmf->kiq_unmap_queues(kiq_ring,
 584						   &adev->gfx.gfx_ring[j],
 585						   PREEMPT_QUEUES, 0, 0);
 586		}
 587		/* Submit unmap queue packet */
 588		amdgpu_ring_commit(kiq_ring);
 589
 590		/*
 591		 * Ring test will do a basic scratch register change check.
 592		 * Just run this to ensure that unmap queues that is submitted
 593		 * before got processed successfully before returning.
 594		 */
 595		r = amdgpu_ring_test_helper(kiq_ring);
 596		spin_unlock(&kiq->ring_lock);
 597	}
 598
 599	return r;
 600}
 601
 602int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 603					int queue_bit)
 604{
 605	int mec, pipe, queue;
 606	int set_resource_bit = 0;
 607
 608	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
 609
 610	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
 611
 612	return set_resource_bit;
 613}
 614
 615static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
 616{
 617	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 618	struct amdgpu_ring *kiq_ring = &kiq->ring;
 619	uint64_t queue_mask = ~0ULL;
 620	int r, i, j;
 621
 622	amdgpu_device_flush_hdp(adev, NULL);
 623
 624	if (!adev->enable_uni_mes) {
 625		spin_lock(&kiq->ring_lock);
 626		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
 627		if (r) {
 628			dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
 629			spin_unlock(&kiq->ring_lock);
 630			return r;
 631		}
 632
 633		kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
 634		r = amdgpu_ring_test_helper(kiq_ring);
 635		spin_unlock(&kiq->ring_lock);
 636		if (r)
 637			dev_err(adev->dev, "KIQ failed to set resources\n");
 638	}
 639
 640	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 641		j = i + xcc_id * adev->gfx.num_compute_rings;
 642		r = amdgpu_mes_map_legacy_queue(adev,
 643						&adev->gfx.compute_ring[j]);
 644		if (r) {
 645			dev_err(adev->dev, "failed to map compute queue\n");
 646			return r;
 647		}
 648	}
 649
 650	return 0;
 651}
 652
 653int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
 654{
 655	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 656	struct amdgpu_ring *kiq_ring = &kiq->ring;
 657	uint64_t queue_mask = 0;
 658	int r, i, j;
 659
 660	if (adev->mes.enable_legacy_queue_map)
 661		return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
 662
 663	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
 664		return -EINVAL;
 665
 666	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
 667		if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
 668			continue;
 669
 670		/* This situation may be hit in the future if a new HW
 671		 * generation exposes more than 64 queues. If so, the
 672		 * definition of queue_mask needs updating */
 673		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
 674			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
 675			break;
 676		}
 677
 678		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
 679	}
 680
 681	amdgpu_device_flush_hdp(adev, NULL);
 682
 683	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
 684		 kiq_ring->queue);
 685
 686	spin_lock(&kiq->ring_lock);
 687	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
 688					adev->gfx.num_compute_rings +
 689					kiq->pmf->set_resources_size);
 690	if (r) {
 691		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 692		spin_unlock(&kiq->ring_lock);
 693		return r;
 694	}
 695
 696	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
 697	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 698		j = i + xcc_id * adev->gfx.num_compute_rings;
 699		kiq->pmf->kiq_map_queues(kiq_ring,
 700					 &adev->gfx.compute_ring[j]);
 701	}
 702	/* Submit map queue packet */
 703	amdgpu_ring_commit(kiq_ring);
 704	/*
 705	 * Ring test will do a basic scratch register change check. Just run
 706	 * this to ensure that map queues that is submitted before got
 707	 * processed successfully before returning.
 708	 */
 709	r = amdgpu_ring_test_helper(kiq_ring);
 710	spin_unlock(&kiq->ring_lock);
 711	if (r)
 712		DRM_ERROR("KCQ enable failed\n");
 713
 714	return r;
 715}
 716
 717int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
 718{
 719	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 720	struct amdgpu_ring *kiq_ring = &kiq->ring;
 721	int r, i, j;
 722
 723	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
 724		return -EINVAL;
 725
 726	amdgpu_device_flush_hdp(adev, NULL);
 727
 728	if (adev->mes.enable_legacy_queue_map) {
 729		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 730			j = i + xcc_id * adev->gfx.num_gfx_rings;
 731			r = amdgpu_mes_map_legacy_queue(adev,
 732							&adev->gfx.gfx_ring[j]);
 733			if (r) {
 734				DRM_ERROR("failed to map gfx queue\n");
 735				return r;
 736			}
 737		}
 738
 739		return 0;
 740	}
 741
 742	spin_lock(&kiq->ring_lock);
 743	/* No need to map kcq on the slave */
 744	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 745		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
 746						adev->gfx.num_gfx_rings);
 747		if (r) {
 748			DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 749			spin_unlock(&kiq->ring_lock);
 750			return r;
 751		}
 752
 753		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 754			j = i + xcc_id * adev->gfx.num_gfx_rings;
 755			kiq->pmf->kiq_map_queues(kiq_ring,
 756						 &adev->gfx.gfx_ring[j]);
 757		}
 758	}
 759	/* Submit map queue packet */
 760	amdgpu_ring_commit(kiq_ring);
 761	/*
 762	 * Ring test will do a basic scratch register change check. Just run
 763	 * this to ensure that map queues that is submitted before got
 764	 * processed successfully before returning.
 765	 */
 766	r = amdgpu_ring_test_helper(kiq_ring);
 767	spin_unlock(&kiq->ring_lock);
 768	if (r)
 769		DRM_ERROR("KGQ enable failed\n");
 770
 771	return r;
 772}
 773
 774/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
 775 *
 776 * @adev: amdgpu_device pointer
 777 * @bool enable true: enable gfx off feature, false: disable gfx off feature
 778 *
 779 * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
 780 * 2. other client can send request to disable gfx off feature, the request should be honored.
 781 * 3. other client can cancel their request of disable gfx off feature
 782 * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
 783 */
 784
 785void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 786{
 787	unsigned long delay = GFX_OFF_DELAY_ENABLE;
 788
 789	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 790		return;
 791
 792	mutex_lock(&adev->gfx.gfx_off_mutex);
 793
 794	if (enable) {
 795		/* If the count is already 0, it means there's an imbalance bug somewhere.
 796		 * Note that the bug may be in a different caller than the one which triggers the
 797		 * WARN_ON_ONCE.
 798		 */
 799		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
 800			goto unlock;
 801
 802		adev->gfx.gfx_off_req_count--;
 803
 804		if (adev->gfx.gfx_off_req_count == 0 &&
 805		    !adev->gfx.gfx_off_state) {
 806			/* If going to s2idle, no need to wait */
 807			if (adev->in_s0ix) {
 808				if (!amdgpu_dpm_set_powergating_by_smu(adev,
 809						AMD_IP_BLOCK_TYPE_GFX, true))
 810					adev->gfx.gfx_off_state = true;
 811			} else {
 812				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
 813					      delay);
 814			}
 815		}
 816	} else {
 817		if (adev->gfx.gfx_off_req_count == 0) {
 818			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
 819
 820			if (adev->gfx.gfx_off_state &&
 821			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
 822				adev->gfx.gfx_off_state = false;
 823
 824				if (adev->gfx.funcs->init_spm_golden) {
 825					dev_dbg(adev->dev,
 826						"GFXOFF is disabled, re-init SPM golden settings\n");
 827					amdgpu_gfx_init_spm_golden(adev);
 828				}
 829			}
 830		}
 831
 832		adev->gfx.gfx_off_req_count++;
 833	}
 834
 835unlock:
 836	mutex_unlock(&adev->gfx.gfx_off_mutex);
 837}
 838
 839int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
 840{
 841	int r = 0;
 842
 843	mutex_lock(&adev->gfx.gfx_off_mutex);
 844
 845	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
 846
 847	mutex_unlock(&adev->gfx.gfx_off_mutex);
 848
 849	return r;
 850}
 851
 852int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
 853{
 854	int r = 0;
 855
 856	mutex_lock(&adev->gfx.gfx_off_mutex);
 857
 858	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
 859
 860	mutex_unlock(&adev->gfx.gfx_off_mutex);
 861
 862	return r;
 863}
 864
 865int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
 866{
 867	int r = 0;
 868
 869	mutex_lock(&adev->gfx.gfx_off_mutex);
 870
 871	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
 872
 873	mutex_unlock(&adev->gfx.gfx_off_mutex);
 874
 875	return r;
 876}
 877
 878int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
 879{
 880
 881	int r = 0;
 882
 883	mutex_lock(&adev->gfx.gfx_off_mutex);
 884
 885	r = amdgpu_dpm_get_status_gfxoff(adev, value);
 886
 887	mutex_unlock(&adev->gfx.gfx_off_mutex);
 888
 889	return r;
 890}
 891
 892int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
 893{
 894	int r;
 895
 896	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
 897		if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
 898			r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
 899			if (r)
 900				return r;
 901		}
 902
 903		r = amdgpu_ras_block_late_init(adev, ras_block);
 904		if (r)
 905			return r;
 906
 907		if (amdgpu_sriov_vf(adev))
 908			return r;
 909
 910		if (adev->gfx.cp_ecc_error_irq.funcs) {
 911			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
 912			if (r)
 913				goto late_fini;
 914		}
 915	} else {
 916		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
 917	}
 918
 919	return 0;
 920late_fini:
 921	amdgpu_ras_block_late_fini(adev, ras_block);
 922	return r;
 923}
 924
 925int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
 926{
 927	int err = 0;
 928	struct amdgpu_gfx_ras *ras = NULL;
 929
 930	/* adev->gfx.ras is NULL, which means gfx does not
 931	 * support ras function, then do nothing here.
 932	 */
 933	if (!adev->gfx.ras)
 934		return 0;
 935
 936	ras = adev->gfx.ras;
 937
 938	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
 939	if (err) {
 940		dev_err(adev->dev, "Failed to register gfx ras block!\n");
 941		return err;
 942	}
 943
 944	strcpy(ras->ras_block.ras_comm.name, "gfx");
 945	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
 946	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 947	adev->gfx.ras_if = &ras->ras_block.ras_comm;
 948
 949	/* If not define special ras_late_init function, use gfx default ras_late_init */
 950	if (!ras->ras_block.ras_late_init)
 951		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
 952
 953	/* If not defined special ras_cb function, use default ras_cb */
 954	if (!ras->ras_block.ras_cb)
 955		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
 956
 957	return 0;
 958}
 959
 960int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
 961						struct amdgpu_iv_entry *entry)
 962{
 963	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
 964		return adev->gfx.ras->poison_consumption_handler(adev, entry);
 965
 966	return 0;
 967}
 968
 969int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
 970		void *err_data,
 971		struct amdgpu_iv_entry *entry)
 972{
 973	/* TODO ue will trigger an interrupt.
 974	 *
 975	 * When “Full RAS” is enabled, the per-IP interrupt sources should
 976	 * be disabled and the driver should only look for the aggregated
 977	 * interrupt via sync flood
 978	 */
 979	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
 980		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 981		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
 982		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
 983			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
 984		amdgpu_ras_reset_gpu(adev);
 985	}
 986	return AMDGPU_RAS_SUCCESS;
 987}
 988
 989int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
 990				  struct amdgpu_irq_src *source,
 991				  struct amdgpu_iv_entry *entry)
 992{
 993	struct ras_common_if *ras_if = adev->gfx.ras_if;
 994	struct ras_dispatch_if ih_data = {
 995		.entry = entry,
 996	};
 997
 998	if (!ras_if)
 999		return 0;
1000
1001	ih_data.head = *ras_if;
1002
1003	DRM_ERROR("CP ECC ERROR IRQ\n");
1004	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1005	return 0;
1006}
1007
1008void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1009		void *ras_error_status,
1010		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1011				int xcc_id))
1012{
1013	int i;
1014	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1015	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1016	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1017
1018	if (err_data) {
1019		err_data->ue_count = 0;
1020		err_data->ce_count = 0;
1021	}
1022
1023	for_each_inst(i, xcc_mask)
1024		func(adev, ras_error_status, i);
1025}
1026
1027uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1028{
1029	signed long r, cnt = 0;
1030	unsigned long flags;
1031	uint32_t seq, reg_val_offs = 0, value = 0;
1032	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1033	struct amdgpu_ring *ring = &kiq->ring;
1034
1035	if (amdgpu_device_skip_hw_access(adev))
1036		return 0;
1037
1038	if (adev->mes.ring[0].sched.ready)
1039		return amdgpu_mes_rreg(adev, reg);
1040
1041	BUG_ON(!ring->funcs->emit_rreg);
1042
1043	spin_lock_irqsave(&kiq->ring_lock, flags);
1044	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
1045		pr_err("critical bug! too many kiq readers\n");
1046		goto failed_unlock;
1047	}
1048	r = amdgpu_ring_alloc(ring, 32);
1049	if (r)
1050		goto failed_unlock;
1051
1052	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
1053	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1054	if (r)
1055		goto failed_undo;
1056
1057	amdgpu_ring_commit(ring);
1058	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1059
1060	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1061
1062	/* don't wait anymore for gpu reset case because this way may
1063	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1064	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1065	 * never return if we keep waiting in virt_kiq_rreg, which cause
1066	 * gpu_recover() hang there.
1067	 *
1068	 * also don't wait anymore for IRQ context
1069	 * */
1070	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1071		goto failed_kiq_read;
1072
1073	might_sleep();
1074	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1075		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1076		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1077	}
1078
1079	if (cnt > MAX_KIQ_REG_TRY)
1080		goto failed_kiq_read;
1081
1082	mb();
1083	value = adev->wb.wb[reg_val_offs];
1084	amdgpu_device_wb_free(adev, reg_val_offs);
1085	return value;
1086
1087failed_undo:
1088	amdgpu_ring_undo(ring);
1089failed_unlock:
1090	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1091failed_kiq_read:
1092	if (reg_val_offs)
1093		amdgpu_device_wb_free(adev, reg_val_offs);
1094	dev_err(adev->dev, "failed to read reg:%x\n", reg);
1095	return ~0;
1096}
1097
1098void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1099{
1100	signed long r, cnt = 0;
1101	unsigned long flags;
1102	uint32_t seq;
1103	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1104	struct amdgpu_ring *ring = &kiq->ring;
1105
1106	BUG_ON(!ring->funcs->emit_wreg);
1107
1108	if (amdgpu_device_skip_hw_access(adev))
1109		return;
1110
1111	if (adev->mes.ring[0].sched.ready) {
1112		amdgpu_mes_wreg(adev, reg, v);
1113		return;
1114	}
1115
1116	spin_lock_irqsave(&kiq->ring_lock, flags);
1117	r = amdgpu_ring_alloc(ring, 32);
1118	if (r)
1119		goto failed_unlock;
1120
1121	amdgpu_ring_emit_wreg(ring, reg, v);
1122	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1123	if (r)
1124		goto failed_undo;
1125
1126	amdgpu_ring_commit(ring);
1127	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1128
1129	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1130
1131	/* don't wait anymore for gpu reset case because this way may
1132	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1133	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1134	 * never return if we keep waiting in virt_kiq_rreg, which cause
1135	 * gpu_recover() hang there.
1136	 *
1137	 * also don't wait anymore for IRQ context
1138	 * */
1139	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1140		goto failed_kiq_write;
1141
1142	might_sleep();
1143	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1144
1145		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1146		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1147	}
1148
1149	if (cnt > MAX_KIQ_REG_TRY)
1150		goto failed_kiq_write;
1151
1152	return;
1153
1154failed_undo:
1155	amdgpu_ring_undo(ring);
1156failed_unlock:
1157	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1158failed_kiq_write:
1159	dev_err(adev->dev, "failed to write reg:%x\n", reg);
1160}
1161
1162int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1163{
1164	if (amdgpu_num_kcq == -1) {
1165		return 8;
1166	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1167		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1168		return 8;
1169	}
1170	return amdgpu_num_kcq;
1171}
1172
1173void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
1174				  uint32_t ucode_id)
1175{
1176	const struct gfx_firmware_header_v1_0 *cp_hdr;
1177	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1178	struct amdgpu_firmware_info *info = NULL;
1179	const struct firmware *ucode_fw;
1180	unsigned int fw_size;
1181
1182	switch (ucode_id) {
1183	case AMDGPU_UCODE_ID_CP_PFP:
1184		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1185			adev->gfx.pfp_fw->data;
1186		adev->gfx.pfp_fw_version =
1187			le32_to_cpu(cp_hdr->header.ucode_version);
1188		adev->gfx.pfp_feature_version =
1189			le32_to_cpu(cp_hdr->ucode_feature_version);
1190		ucode_fw = adev->gfx.pfp_fw;
1191		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1192		break;
1193	case AMDGPU_UCODE_ID_CP_RS64_PFP:
1194		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1195			adev->gfx.pfp_fw->data;
1196		adev->gfx.pfp_fw_version =
1197			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1198		adev->gfx.pfp_feature_version =
1199			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1200		ucode_fw = adev->gfx.pfp_fw;
1201		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1202		break;
1203	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1204	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1205		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1206			adev->gfx.pfp_fw->data;
1207		ucode_fw = adev->gfx.pfp_fw;
1208		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1209		break;
1210	case AMDGPU_UCODE_ID_CP_ME:
1211		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1212			adev->gfx.me_fw->data;
1213		adev->gfx.me_fw_version =
1214			le32_to_cpu(cp_hdr->header.ucode_version);
1215		adev->gfx.me_feature_version =
1216			le32_to_cpu(cp_hdr->ucode_feature_version);
1217		ucode_fw = adev->gfx.me_fw;
1218		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1219		break;
1220	case AMDGPU_UCODE_ID_CP_RS64_ME:
1221		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1222			adev->gfx.me_fw->data;
1223		adev->gfx.me_fw_version =
1224			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1225		adev->gfx.me_feature_version =
1226			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1227		ucode_fw = adev->gfx.me_fw;
1228		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1229		break;
1230	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1231	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1232		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1233			adev->gfx.me_fw->data;
1234		ucode_fw = adev->gfx.me_fw;
1235		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1236		break;
1237	case AMDGPU_UCODE_ID_CP_CE:
1238		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1239			adev->gfx.ce_fw->data;
1240		adev->gfx.ce_fw_version =
1241			le32_to_cpu(cp_hdr->header.ucode_version);
1242		adev->gfx.ce_feature_version =
1243			le32_to_cpu(cp_hdr->ucode_feature_version);
1244		ucode_fw = adev->gfx.ce_fw;
1245		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1246		break;
1247	case AMDGPU_UCODE_ID_CP_MEC1:
1248		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1249			adev->gfx.mec_fw->data;
1250		adev->gfx.mec_fw_version =
1251			le32_to_cpu(cp_hdr->header.ucode_version);
1252		adev->gfx.mec_feature_version =
1253			le32_to_cpu(cp_hdr->ucode_feature_version);
1254		ucode_fw = adev->gfx.mec_fw;
1255		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1256			  le32_to_cpu(cp_hdr->jt_size) * 4;
1257		break;
1258	case AMDGPU_UCODE_ID_CP_MEC1_JT:
1259		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1260			adev->gfx.mec_fw->data;
1261		ucode_fw = adev->gfx.mec_fw;
1262		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1263		break;
1264	case AMDGPU_UCODE_ID_CP_MEC2:
1265		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1266			adev->gfx.mec2_fw->data;
1267		adev->gfx.mec2_fw_version =
1268			le32_to_cpu(cp_hdr->header.ucode_version);
1269		adev->gfx.mec2_feature_version =
1270			le32_to_cpu(cp_hdr->ucode_feature_version);
1271		ucode_fw = adev->gfx.mec2_fw;
1272		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1273			  le32_to_cpu(cp_hdr->jt_size) * 4;
1274		break;
1275	case AMDGPU_UCODE_ID_CP_MEC2_JT:
1276		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1277			adev->gfx.mec2_fw->data;
1278		ucode_fw = adev->gfx.mec2_fw;
1279		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1280		break;
1281	case AMDGPU_UCODE_ID_CP_RS64_MEC:
1282		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1283			adev->gfx.mec_fw->data;
1284		adev->gfx.mec_fw_version =
1285			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1286		adev->gfx.mec_feature_version =
1287			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1288		ucode_fw = adev->gfx.mec_fw;
1289		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1290		break;
1291	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1292	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1293	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1294	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1295		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1296			adev->gfx.mec_fw->data;
1297		ucode_fw = adev->gfx.mec_fw;
1298		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1299		break;
1300	default:
1301		dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
1302		return;
1303	}
1304
1305	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1306		info = &adev->firmware.ucode[ucode_id];
1307		info->ucode_id = ucode_id;
1308		info->fw = ucode_fw;
1309		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1310	}
1311}
1312
1313bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
1314{
1315	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
1316			adev->gfx.num_xcc_per_xcp : 1));
1317}
1318
1319static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
1320						struct device_attribute *addr,
1321						char *buf)
1322{
1323	struct drm_device *ddev = dev_get_drvdata(dev);
1324	struct amdgpu_device *adev = drm_to_adev(ddev);
1325	int mode;
1326
1327	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1328					       AMDGPU_XCP_FL_NONE);
1329
1330	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
1331}
1332
1333static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
1334						struct device_attribute *addr,
1335						const char *buf, size_t count)
1336{
1337	struct drm_device *ddev = dev_get_drvdata(dev);
1338	struct amdgpu_device *adev = drm_to_adev(ddev);
1339	enum amdgpu_gfx_partition mode;
1340	int ret = 0, num_xcc;
1341
1342	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1343	if (num_xcc % 2 != 0)
1344		return -EINVAL;
1345
1346	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
1347		mode = AMDGPU_SPX_PARTITION_MODE;
1348	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1349		/*
1350		 * DPX mode needs AIDs to be in multiple of 2.
1351		 * Each AID connects 2 XCCs.
1352		 */
1353		if (num_xcc%4)
1354			return -EINVAL;
1355		mode = AMDGPU_DPX_PARTITION_MODE;
1356	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
1357		if (num_xcc != 6)
1358			return -EINVAL;
1359		mode = AMDGPU_TPX_PARTITION_MODE;
1360	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
1361		if (num_xcc != 8)
1362			return -EINVAL;
1363		mode = AMDGPU_QPX_PARTITION_MODE;
1364	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
1365		mode = AMDGPU_CPX_PARTITION_MODE;
1366	} else {
1367		return -EINVAL;
1368	}
1369
1370	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
1371
1372	if (ret)
1373		return ret;
1374
1375	return count;
1376}
1377
1378static const char *xcp_desc[] = {
1379	[AMDGPU_SPX_PARTITION_MODE] = "SPX",
1380	[AMDGPU_DPX_PARTITION_MODE] = "DPX",
1381	[AMDGPU_TPX_PARTITION_MODE] = "TPX",
1382	[AMDGPU_QPX_PARTITION_MODE] = "QPX",
1383	[AMDGPU_CPX_PARTITION_MODE] = "CPX",
1384};
1385
1386static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
1387						struct device_attribute *addr,
1388						char *buf)
1389{
1390	struct drm_device *ddev = dev_get_drvdata(dev);
1391	struct amdgpu_device *adev = drm_to_adev(ddev);
1392	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1393	int size = 0, mode;
1394	char *sep = "";
1395
1396	if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
1397		return sysfs_emit(buf, "Not supported\n");
1398
1399	for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
1400		size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
1401		sep = ", ";
1402	}
1403
1404	size += sysfs_emit_at(buf, size, "\n");
1405
1406	return size;
1407}
1408
1409static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1410{
1411	struct amdgpu_device *adev = ring->adev;
1412	struct drm_gpu_scheduler *sched = &ring->sched;
1413	struct drm_sched_entity entity;
1414	struct dma_fence *f;
1415	struct amdgpu_job *job;
1416	struct amdgpu_ib *ib;
1417	int i, r;
1418
1419	/* Initialize the scheduler entity */
1420	r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1421				  &sched, 1, NULL);
1422	if (r) {
1423		dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1424		goto err;
1425	}
1426
1427	r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
1428				     64, 0,
1429				     &job);
1430	if (r)
1431		goto err;
1432
1433	job->enforce_isolation = true;
1434
1435	ib = &job->ibs[0];
1436	for (i = 0; i <= ring->funcs->align_mask; ++i)
1437		ib->ptr[i] = ring->funcs->nop;
1438	ib->length_dw = ring->funcs->align_mask + 1;
1439
1440	f = amdgpu_job_submit(job);
1441
1442	r = dma_fence_wait(f, false);
1443	if (r)
1444		goto err;
1445
1446	dma_fence_put(f);
1447
1448	/* Clean up the scheduler entity */
1449	drm_sched_entity_destroy(&entity);
1450	return 0;
1451
1452err:
1453	return r;
1454}
1455
1456static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1457{
1458	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1459	struct amdgpu_ring *ring;
1460	int num_xcc_to_clear;
1461	int i, r, xcc_id;
1462
1463	if (adev->gfx.num_xcc_per_xcp)
1464		num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1465	else
1466		num_xcc_to_clear = 1;
1467
1468	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1469		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1470			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1471			if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1472				r = amdgpu_gfx_run_cleaner_shader_job(ring);
1473				if (r)
1474					return r;
1475				num_xcc_to_clear--;
1476				break;
1477			}
1478		}
1479	}
1480
1481	if (num_xcc_to_clear)
1482		return -ENOENT;
1483
1484	return 0;
1485}
1486
1487static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1488						 struct device_attribute *attr,
1489						 const char *buf,
1490						 size_t count)
1491{
1492	struct drm_device *ddev = dev_get_drvdata(dev);
1493	struct amdgpu_device *adev = drm_to_adev(ddev);
1494	int ret;
1495	long value;
1496
1497	if (amdgpu_in_reset(adev))
1498		return -EPERM;
1499	if (adev->in_suspend && !adev->in_runpm)
1500		return -EPERM;
1501
1502	ret = kstrtol(buf, 0, &value);
1503
1504	if (ret)
1505		return -EINVAL;
1506
1507	if (value < 0)
1508		return -EINVAL;
1509
1510	if (adev->xcp_mgr) {
1511		if (value >= adev->xcp_mgr->num_xcps)
1512			return -EINVAL;
1513	} else {
1514		if (value > 1)
1515			return -EINVAL;
1516	}
1517
1518	ret = pm_runtime_get_sync(ddev->dev);
1519	if (ret < 0) {
1520		pm_runtime_put_autosuspend(ddev->dev);
1521		return ret;
1522	}
1523
1524	ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1525
1526	pm_runtime_mark_last_busy(ddev->dev);
1527	pm_runtime_put_autosuspend(ddev->dev);
1528
1529	if (ret)
1530		return ret;
1531
1532	return count;
1533}
1534
1535static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1536						struct device_attribute *attr,
1537						char *buf)
1538{
1539	struct drm_device *ddev = dev_get_drvdata(dev);
1540	struct amdgpu_device *adev = drm_to_adev(ddev);
1541	int i;
1542	ssize_t size = 0;
1543
1544	if (adev->xcp_mgr) {
1545		for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1546			size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1547			if (i < (adev->xcp_mgr->num_xcps - 1))
1548				size += sysfs_emit_at(buf, size, " ");
1549		}
1550		buf[size++] = '\n';
1551	} else {
1552		size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1553	}
1554
1555	return size;
1556}
1557
1558static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1559						struct device_attribute *attr,
1560						const char *buf, size_t count)
1561{
1562	struct drm_device *ddev = dev_get_drvdata(dev);
1563	struct amdgpu_device *adev = drm_to_adev(ddev);
1564	long partition_values[MAX_XCP] = {0};
1565	int ret, i, num_partitions;
1566	const char *input_buf = buf;
1567
1568	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1569		ret = sscanf(input_buf, "%ld", &partition_values[i]);
1570		if (ret <= 0)
1571			break;
1572
1573		/* Move the pointer to the next value in the string */
1574		input_buf = strchr(input_buf, ' ');
1575		if (input_buf) {
1576			input_buf++;
1577		} else {
1578			i++;
1579			break;
1580		}
1581	}
1582	num_partitions = i;
1583
1584	if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1585		return -EINVAL;
1586
1587	if (!adev->xcp_mgr && num_partitions != 1)
1588		return -EINVAL;
1589
1590	for (i = 0; i < num_partitions; i++) {
1591		if (partition_values[i] != 0 && partition_values[i] != 1)
1592			return -EINVAL;
1593	}
1594
1595	mutex_lock(&adev->enforce_isolation_mutex);
1596	for (i = 0; i < num_partitions; i++) {
1597		if (adev->enforce_isolation[i] && !partition_values[i])
1598			/* Going from enabled to disabled */
1599			amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
1600		else if (!adev->enforce_isolation[i] && partition_values[i])
1601			/* Going from disabled to enabled */
1602			amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
1603		adev->enforce_isolation[i] = partition_values[i];
1604	}
1605	mutex_unlock(&adev->enforce_isolation_mutex);
1606
1607	amdgpu_mes_update_enforce_isolation(adev);
1608
1609	return count;
1610}
1611
1612static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
1613						struct device_attribute *attr,
1614						char *buf)
1615{
1616	struct drm_device *ddev = dev_get_drvdata(dev);
1617	struct amdgpu_device *adev = drm_to_adev(ddev);
1618
1619	if (!adev)
1620		return -ENODEV;
1621
1622	return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
1623}
1624
1625static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
1626						struct device_attribute *attr,
1627						char *buf)
1628{
1629	struct drm_device *ddev = dev_get_drvdata(dev);
1630	struct amdgpu_device *adev = drm_to_adev(ddev);
1631
1632	if (!adev)
1633		return -ENODEV;
1634
1635	return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
1636}
1637
1638static DEVICE_ATTR(run_cleaner_shader, 0200,
1639		   NULL, amdgpu_gfx_set_run_cleaner_shader);
1640
1641static DEVICE_ATTR(enforce_isolation, 0644,
1642		   amdgpu_gfx_get_enforce_isolation,
1643		   amdgpu_gfx_set_enforce_isolation);
1644
1645static DEVICE_ATTR(current_compute_partition, 0644,
1646		   amdgpu_gfx_get_current_compute_partition,
1647		   amdgpu_gfx_set_compute_partition);
1648
1649static DEVICE_ATTR(available_compute_partition, 0444,
1650		   amdgpu_gfx_get_available_compute_partition, NULL);
1651static DEVICE_ATTR(gfx_reset_mask, 0444,
1652		   amdgpu_gfx_get_gfx_reset_mask, NULL);
1653
1654static DEVICE_ATTR(compute_reset_mask, 0444,
1655		   amdgpu_gfx_get_compute_reset_mask, NULL);
1656
1657static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
1658{
1659	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1660	bool xcp_switch_supported;
1661	int r;
1662
1663	if (!xcp_mgr)
1664		return 0;
1665
1666	xcp_switch_supported =
1667		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1668
1669	if (!xcp_switch_supported)
1670		dev_attr_current_compute_partition.attr.mode &=
1671			~(S_IWUSR | S_IWGRP | S_IWOTH);
1672
1673	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
1674	if (r)
1675		return r;
1676
1677	if (xcp_switch_supported)
1678		r = device_create_file(adev->dev,
1679				       &dev_attr_available_compute_partition);
1680
1681	return r;
1682}
1683
1684static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
1685{
1686	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1687	bool xcp_switch_supported;
1688
1689	if (!xcp_mgr)
1690		return;
1691
1692	xcp_switch_supported =
1693		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1694	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1695
1696	if (xcp_switch_supported)
1697		device_remove_file(adev->dev,
1698				   &dev_attr_available_compute_partition);
1699}
1700
1701static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1702{
1703	int r;
1704
1705	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1706	if (r)
1707		return r;
1708	if (adev->gfx.enable_cleaner_shader)
1709		r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1710
1711	return r;
1712}
1713
1714static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1715{
1716	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1717	if (adev->gfx.enable_cleaner_shader)
1718		device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1719}
1720
1721static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
1722{
1723	int r = 0;
1724
1725	if (!amdgpu_gpu_recovery)
1726		return r;
1727
1728	if (adev->gfx.num_gfx_rings) {
1729		r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
1730		if (r)
1731			return r;
1732	}
1733
1734	if (adev->gfx.num_compute_rings) {
1735		r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
1736		if (r)
1737			return r;
1738	}
1739
1740	return r;
1741}
1742
1743static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
1744{
1745	if (!amdgpu_gpu_recovery)
1746		return;
1747
1748	if (adev->gfx.num_gfx_rings)
1749		device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
1750
1751	if (adev->gfx.num_compute_rings)
1752		device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
1753}
1754
1755int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1756{
1757	int r;
1758
1759	r = amdgpu_gfx_sysfs_xcp_init(adev);
1760	if (r) {
1761		dev_err(adev->dev, "failed to create xcp sysfs files");
1762		return r;
1763	}
1764
1765	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
1766	if (r)
1767		dev_err(adev->dev, "failed to create isolation sysfs files");
1768
1769	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
1770	if (r)
1771		dev_err(adev->dev, "failed to create reset mask sysfs files");
1772
1773	return r;
1774}
1775
1776void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1777{
1778	if (adev->dev->kobj.sd) {
1779		amdgpu_gfx_sysfs_xcp_fini(adev);
1780		amdgpu_gfx_sysfs_isolation_shader_fini(adev);
1781		amdgpu_gfx_sysfs_reset_mask_fini(adev);
1782	}
1783}
1784
1785int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1786				      unsigned int cleaner_shader_size)
1787{
1788	if (!adev->gfx.enable_cleaner_shader)
1789		return -EOPNOTSUPP;
1790
1791	return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1792				       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1793				       &adev->gfx.cleaner_shader_obj,
1794				       &adev->gfx.cleaner_shader_gpu_addr,
1795				       (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1796}
1797
1798void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1799{
1800	if (!adev->gfx.enable_cleaner_shader)
1801		return;
1802
1803	amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1804			      &adev->gfx.cleaner_shader_gpu_addr,
1805			      (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1806}
1807
1808void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1809				    unsigned int cleaner_shader_size,
1810				    const void *cleaner_shader_ptr)
1811{
1812	if (!adev->gfx.enable_cleaner_shader)
1813		return;
1814
1815	if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1816		memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1817			    cleaner_shader_size);
1818}
1819
1820/**
1821 * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1822 * @adev: amdgpu_device pointer
1823 * @idx: Index of the scheduler to control
1824 * @enable: Whether to enable or disable the KFD scheduler
1825 *
1826 * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1827 * from the KGD. It is part of the cleaner shader feature. This function plays
1828 * a key role in enforcing process isolation on the GPU.
1829 *
1830 * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1831 * track of the number of requests to enable the KFD scheduler. When a request
1832 * to enable the KFD scheduler is made, the reference count is decremented.
1833 * When the reference count reaches zero, a delayed work is scheduled to
1834 * enforce isolation after a delay of GFX_SLICE_PERIOD.
1835 *
1836 * When a request to disable the KFD scheduler is made, the function first
1837 * checks if the reference count is zero. If it is, it cancels the delayed work
1838 * for enforcing isolation and checks if the KFD scheduler is active. If the
1839 * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1840 * sets the KFD scheduler state to inactive. Then, it increments the reference
1841 * count.
1842 *
1843 * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1844 * scheduler state and reference count are updated atomically.
1845 *
1846 * Note: If the reference count is already zero when a request to enable the
1847 * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1848 * function triggers a warning in this case.
1849 */
1850static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1851				    bool enable)
1852{
1853	mutex_lock(&adev->gfx.kfd_sch_mutex);
1854
1855	if (enable) {
1856		/* If the count is already 0, it means there's an imbalance bug somewhere.
1857		 * Note that the bug may be in a different caller than the one which triggers the
1858		 * WARN_ON_ONCE.
1859		 */
1860		if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1861			dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1862			goto unlock;
1863		}
1864
1865		adev->gfx.kfd_sch_req_count[idx]--;
1866
1867		if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1868		    adev->gfx.kfd_sch_inactive[idx]) {
1869			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1870					      msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
1871		}
1872	} else {
1873		if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1874			cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1875			if (!adev->gfx.kfd_sch_inactive[idx]) {
1876				amdgpu_amdkfd_stop_sched(adev, idx);
1877				adev->gfx.kfd_sch_inactive[idx] = true;
1878			}
1879		}
1880
1881		adev->gfx.kfd_sch_req_count[idx]++;
1882	}
1883
1884unlock:
1885	mutex_unlock(&adev->gfx.kfd_sch_mutex);
1886}
1887
1888/**
1889 * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1890 *
1891 * @work: work_struct.
1892 *
1893 * This function is the work handler for enforcing shader isolation on AMD GPUs.
1894 * It counts the number of emitted fences for each GFX and compute ring. If there
1895 * are any fences, it schedules the `enforce_isolation_work` to be run after a
1896 * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1897 * Driver (KFD) to resume the runqueue. The function is synchronized using the
1898 * `enforce_isolation_mutex`.
1899 */
1900void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1901{
1902	struct amdgpu_isolation_work *isolation_work =
1903		container_of(work, struct amdgpu_isolation_work, work.work);
1904	struct amdgpu_device *adev = isolation_work->adev;
1905	u32 i, idx, fences = 0;
1906
1907	if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1908		idx = 0;
1909	else
1910		idx = isolation_work->xcp_id;
1911
1912	if (idx >= MAX_XCP)
1913		return;
1914
1915	mutex_lock(&adev->enforce_isolation_mutex);
1916	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1917		if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1918			fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1919	}
1920	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1921		if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1922			fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1923	}
1924	if (fences) {
1925		/* we've already had our timeslice, so let's wrap this up */
1926		schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1927				      msecs_to_jiffies(1));
1928	} else {
1929		/* Tell KFD to resume the runqueue */
1930		if (adev->kfd.init_complete) {
1931			WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
1932			WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
1933				amdgpu_amdkfd_start_sched(adev, idx);
1934				adev->gfx.kfd_sch_inactive[idx] = false;
1935		}
1936	}
1937	mutex_unlock(&adev->enforce_isolation_mutex);
1938}
1939
1940static void
1941amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
1942					  u32 idx)
1943{
1944	unsigned long cjiffies;
1945	bool wait = false;
1946
1947	mutex_lock(&adev->enforce_isolation_mutex);
1948	if (adev->enforce_isolation[idx]) {
1949		/* set the initial values if nothing is set */
1950		if (!adev->gfx.enforce_isolation_jiffies[idx]) {
1951			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
1952			adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
1953		}
1954		/* Make sure KFD gets a chance to run */
1955		if (amdgpu_amdkfd_compute_active(adev, idx)) {
1956			cjiffies = jiffies;
1957			if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
1958				cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
1959				if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
1960					/* if our time is up, let KGD work drain before scheduling more */
1961					wait = true;
1962					/* reset the timer period */
1963					adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
1964				} else {
1965					/* set the timer period to what's left in our time slice */
1966					adev->gfx.enforce_isolation_time[idx] =
1967						GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
1968				}
1969			} else {
1970				/* if jiffies wrap around we will just wait a little longer */
1971				adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
1972			}
1973		} else {
1974			/* if there is no KFD work, then set the full slice period */
1975			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
1976			adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
1977		}
1978	}
1979	mutex_unlock(&adev->enforce_isolation_mutex);
1980
1981	if (wait)
1982		msleep(GFX_SLICE_PERIOD_MS);
1983}
1984
1985void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
1986{
1987	struct amdgpu_device *adev = ring->adev;
1988	u32 idx;
1989	bool sched_work = false;
1990
1991	if (!adev->gfx.enable_cleaner_shader)
1992		return;
1993
1994	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1995		idx = 0;
1996	else
1997		idx = ring->xcp_id;
1998
1999	if (idx >= MAX_XCP)
2000		return;
2001
2002	/* Don't submit more work until KFD has had some time */
2003	amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
2004
2005	mutex_lock(&adev->enforce_isolation_mutex);
2006	if (adev->enforce_isolation[idx]) {
2007		if (adev->kfd.init_complete)
2008			sched_work = true;
2009	}
2010	mutex_unlock(&adev->enforce_isolation_mutex);
2011
2012	if (sched_work)
2013		amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
2014}
2015
2016void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
2017{
2018	struct amdgpu_device *adev = ring->adev;
2019	u32 idx;
2020	bool sched_work = false;
2021
2022	if (!adev->gfx.enable_cleaner_shader)
2023		return;
2024
2025	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
2026		idx = 0;
2027	else
2028		idx = ring->xcp_id;
2029
2030	if (idx >= MAX_XCP)
2031		return;
2032
2033	mutex_lock(&adev->enforce_isolation_mutex);
2034	if (adev->enforce_isolation[idx]) {
2035		if (adev->kfd.init_complete)
2036			sched_work = true;
2037	}
2038	mutex_unlock(&adev->enforce_isolation_mutex);
2039
2040	if (sched_work)
2041		amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
2042}
2043
2044/*
2045 * debugfs for to enable/disable gfx job submission to specific core.
2046 */
2047#if defined(CONFIG_DEBUG_FS)
2048static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
2049{
2050	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2051	u32 i;
2052	u64 mask = 0;
2053	struct amdgpu_ring *ring;
2054
2055	if (!adev)
2056		return -ENODEV;
2057
2058	mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
2059	if ((val & mask) == 0)
2060		return -EINVAL;
2061
2062	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2063		ring = &adev->gfx.gfx_ring[i];
2064		if (val & (1 << i))
2065			ring->sched.ready = true;
2066		else
2067			ring->sched.ready = false;
2068	}
2069	/* publish sched.ready flag update effective immediately across smp */
2070	smp_rmb();
2071	return 0;
2072}
2073
2074static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
2075{
2076	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2077	u32 i;
2078	u64 mask = 0;
2079	struct amdgpu_ring *ring;
2080
2081	if (!adev)
2082		return -ENODEV;
2083	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2084		ring = &adev->gfx.gfx_ring[i];
2085		if (ring->sched.ready)
2086			mask |= 1ULL << i;
2087	}
2088
2089	*val = mask;
2090	return 0;
2091}
2092
2093DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
2094			 amdgpu_debugfs_gfx_sched_mask_get,
2095			 amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
2096
2097#endif
2098
2099void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
2100{
2101#if defined(CONFIG_DEBUG_FS)
2102	struct drm_minor *minor = adev_to_drm(adev)->primary;
2103	struct dentry *root = minor->debugfs_root;
2104	char name[32];
2105
2106	if (!(adev->gfx.num_gfx_rings > 1))
2107		return;
2108	sprintf(name, "amdgpu_gfx_sched_mask");
2109	debugfs_create_file(name, 0600, root, adev,
2110			    &amdgpu_debugfs_gfx_sched_mask_fops);
2111#endif
2112}
2113
2114/*
2115 * debugfs for to enable/disable compute job submission to specific core.
2116 */
2117#if defined(CONFIG_DEBUG_FS)
2118static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
2119{
2120	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2121	u32 i;
2122	u64 mask = 0;
2123	struct amdgpu_ring *ring;
2124
2125	if (!adev)
2126		return -ENODEV;
2127
2128	mask = (1ULL << adev->gfx.num_compute_rings) - 1;
2129	if ((val & mask) == 0)
2130		return -EINVAL;
2131
2132	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2133		ring = &adev->gfx.compute_ring[i];
2134		if (val & (1 << i))
2135			ring->sched.ready = true;
2136		else
2137			ring->sched.ready = false;
2138	}
2139
2140	/* publish sched.ready flag update effective immediately across smp */
2141	smp_rmb();
2142	return 0;
2143}
2144
2145static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
2146{
2147	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2148	u32 i;
2149	u64 mask = 0;
2150	struct amdgpu_ring *ring;
2151
2152	if (!adev)
2153		return -ENODEV;
2154	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2155		ring = &adev->gfx.compute_ring[i];
2156		if (ring->sched.ready)
2157			mask |= 1ULL << i;
2158	}
2159
2160	*val = mask;
2161	return 0;
2162}
2163
2164DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
2165			 amdgpu_debugfs_compute_sched_mask_get,
2166			 amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
2167
2168#endif
2169
2170void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
2171{
2172#if defined(CONFIG_DEBUG_FS)
2173	struct drm_minor *minor = adev_to_drm(adev)->primary;
2174	struct dentry *root = minor->debugfs_root;
2175	char name[32];
2176
2177	if (!(adev->gfx.num_compute_rings > 1))
2178		return;
2179	sprintf(name, "amdgpu_compute_sched_mask");
2180	debugfs_create_file(name, 0600, root, adev,
2181			    &amdgpu_debugfs_compute_sched_mask_fops);
2182#endif
2183}