amdgpu_gfx.c - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c - Linux diff v6.9.4

   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include <linux/firmware.h>
 
 
  27#include "amdgpu.h"
  28#include "amdgpu_gfx.h"
  29#include "amdgpu_rlc.h"
  30#include "amdgpu_ras.h"
 
  31#include "amdgpu_xcp.h"
  32#include "amdgpu_xgmi.h"
  33
  34/* delay 0.1 second to enable gfx off feature */
  35#define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
  36
  37#define GFX_OFF_NO_DELAY 0
  38
  39/*
  40 * GPU GFX IP block helpers function.
  41 */
  42
  43int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
  44				int pipe, int queue)
  45{
  46	int bit = 0;
  47
  48	bit += mec * adev->gfx.mec.num_pipe_per_mec
  49		* adev->gfx.mec.num_queue_per_pipe;
  50	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
  51	bit += queue;
  52
  53	return bit;
  54}
  55
  56void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
  57				 int *mec, int *pipe, int *queue)
  58{
  59	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
  60	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
  61		% adev->gfx.mec.num_pipe_per_mec;
  62	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
  63	       / adev->gfx.mec.num_pipe_per_mec;
  64
  65}
  66
  67bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
  68				     int xcc_id, int mec, int pipe, int queue)
  69{
  70	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
  71			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
  72}
  73
  74int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
  75			       int me, int pipe, int queue)
  76{
  77	int bit = 0;
  78
  79	bit += me * adev->gfx.me.num_pipe_per_me
  80		* adev->gfx.me.num_queue_per_pipe;
  81	bit += pipe * adev->gfx.me.num_queue_per_pipe;
  82	bit += queue;
  83
  84	return bit;
  85}
  86
  87void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
  88				int *me, int *pipe, int *queue)
  89{
  90	*queue = bit % adev->gfx.me.num_queue_per_pipe;
  91	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
  92		% adev->gfx.me.num_pipe_per_me;
  93	*me = (bit / adev->gfx.me.num_queue_per_pipe)
  94		/ adev->gfx.me.num_pipe_per_me;
  95}
  96
  97bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
  98				    int me, int pipe, int queue)
  99{
 100	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
 101			adev->gfx.me.queue_bitmap);
 102}
 103
 104/**
 105 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
 106 *
 107 * @mask: array in which the per-shader array disable masks will be stored
 108 * @max_se: number of SEs
 109 * @max_sh: number of SHs
 110 *
 111 * The bitmask of CUs to be disabled in the shader array determined by se and
 112 * sh is stored in mask[se * max_sh + sh].
 113 */
 114void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
 115{
 116	unsigned int se, sh, cu;
 117	const char *p;
 118
 119	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
 120
 121	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
 122		return;
 123
 124	p = amdgpu_disable_cu;
 125	for (;;) {
 126		char *next;
 127		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
 128
 129		if (ret < 3) {
 130			DRM_ERROR("amdgpu: could not parse disable_cu\n");
 131			return;
 132		}
 133
 134		if (se < max_se && sh < max_sh && cu < 16) {
 135			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
 136			mask[se * max_sh + sh] |= 1u << cu;
 137		} else {
 138			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
 139				  se, sh, cu);
 140		}
 141
 142		next = strchr(p, ',');
 143		if (!next)
 144			break;
 145		p = next + 1;
 146	}
 147}
 148
 149static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
 150{
 151	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
 152}
 153
 154static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
 155{
 156	if (amdgpu_compute_multipipe != -1) {
 157		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
 158			 amdgpu_compute_multipipe);
 159		return amdgpu_compute_multipipe == 1;
 160	}
 161
 162	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
 163		return true;
 164
 165	/* FIXME: spreading the queues across pipes causes perf regressions
 166	 * on POLARIS11 compute workloads */
 167	if (adev->asic_type == CHIP_POLARIS11)
 168		return false;
 169
 170	return adev->gfx.mec.num_mec > 1;
 171}
 172
 173bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
 174						struct amdgpu_ring *ring)
 175{
 176	int queue = ring->queue;
 177	int pipe = ring->pipe;
 178
 179	/* Policy: use pipe1 queue0 as high priority graphics queue if we
 180	 * have more than one gfx pipe.
 181	 */
 182	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
 183	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
 184		int me = ring->me;
 185		int bit;
 186
 187		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
 188		if (ring == &adev->gfx.gfx_ring[bit])
 189			return true;
 190	}
 191
 192	return false;
 193}
 194
 195bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 196					       struct amdgpu_ring *ring)
 197{
 198	/* Policy: use 1st queue as high priority compute queue if we
 199	 * have more than one compute queue.
 200	 */
 201	if (adev->gfx.num_compute_rings > 1 &&
 202	    ring == &adev->gfx.compute_ring[0])
 203		return true;
 204
 205	return false;
 206}
 207
 208void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 209{
 210	int i, j, queue, pipe;
 211	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
 212	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
 213				     adev->gfx.mec.num_queue_per_pipe,
 214				     adev->gfx.num_compute_rings);
 215	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
 216
 217	if (multipipe_policy) {
 218		/* policy: make queues evenly cross all pipes on MEC1 only
 219		 * for multiple xcc, just use the original policy for simplicity */
 220		for (j = 0; j < num_xcc; j++) {
 221			for (i = 0; i < max_queues_per_mec; i++) {
 222				pipe = i % adev->gfx.mec.num_pipe_per_mec;
 223				queue = (i / adev->gfx.mec.num_pipe_per_mec) %
 224					 adev->gfx.mec.num_queue_per_pipe;
 225
 226				set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
 227					adev->gfx.mec_bitmap[j].queue_bitmap);
 228			}
 229		}
 230	} else {
 231		/* policy: amdgpu owns all queues in the given pipe */
 232		for (j = 0; j < num_xcc; j++) {
 233			for (i = 0; i < max_queues_per_mec; ++i)
 234				set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
 235		}
 236	}
 237
 238	for (j = 0; j < num_xcc; j++) {
 239		dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
 240			bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
 241	}
 242}
 243
 244void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 245{
 246	int i, queue, pipe;
 247	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
 248	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
 249					adev->gfx.me.num_queue_per_pipe;
 250
 251	if (multipipe_policy) {
 252		/* policy: amdgpu owns the first queue per pipe at this stage
 253		 * will extend to mulitple queues per pipe later */
 254		for (i = 0; i < max_queues_per_me; i++) {
 255			pipe = i % adev->gfx.me.num_pipe_per_me;
 256			queue = (i / adev->gfx.me.num_pipe_per_me) %
 257				adev->gfx.me.num_queue_per_pipe;
 258
 259			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
 260				adev->gfx.me.queue_bitmap);
 261		}
 262	} else {
 263		for (i = 0; i < max_queues_per_me; ++i)
 264			set_bit(i, adev->gfx.me.queue_bitmap);
 265	}
 266
 267	/* update the number of active graphics rings */
 268	adev->gfx.num_gfx_rings =
 269		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
 270}
 271
 272static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 273				  struct amdgpu_ring *ring, int xcc_id)
 274{
 275	int queue_bit;
 276	int mec, pipe, queue;
 277
 278	queue_bit = adev->gfx.mec.num_mec
 279		    * adev->gfx.mec.num_pipe_per_mec
 280		    * adev->gfx.mec.num_queue_per_pipe;
 281
 282	while (--queue_bit >= 0) {
 283		if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
 284			continue;
 285
 286		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
 287
 288		/*
 289		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
 290		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
 291		 * only can be issued on queue 0.
 292		 */
 293		if ((mec == 1 && pipe > 1) || queue != 0)
 294			continue;
 295
 296		ring->me = mec + 1;
 297		ring->pipe = pipe;
 298		ring->queue = queue;
 299
 300		return 0;
 301	}
 302
 303	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
 304	return -EINVAL;
 305}
 306
 307int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
 308{
 309	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 310	struct amdgpu_irq_src *irq = &kiq->irq;
 311	struct amdgpu_ring *ring = &kiq->ring;
 312	int r = 0;
 313
 314	spin_lock_init(&kiq->ring_lock);
 315
 316	ring->adev = NULL;
 317	ring->ring_obj = NULL;
 318	ring->use_doorbell = true;
 319	ring->xcc_id = xcc_id;
 320	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
 321	ring->doorbell_index =
 322		(adev->doorbell_index.kiq +
 323		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
 324		<< 1;
 325
 326	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
 327	if (r)
 328		return r;
 329
 330	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 331	ring->no_scheduler = true;
 332	snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d",
 333		 xcc_id, ring->me, ring->pipe, ring->queue);
 
 334	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
 335			     AMDGPU_RING_PRIO_DEFAULT, NULL);
 336	if (r)
 337		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 338
 339	return r;
 340}
 341
 342void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 343{
 344	amdgpu_ring_fini(ring);
 345}
 346
 347void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
 348{
 349	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 350
 351	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
 352}
 353
 354int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
 355			unsigned int hpd_size, int xcc_id)
 356{
 357	int r;
 358	u32 *hpd;
 359	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 360
 361	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
 362				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
 363				    &kiq->eop_gpu_addr, (void **)&hpd);
 364	if (r) {
 365		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
 366		return r;
 367	}
 368
 369	memset(hpd, 0, hpd_size);
 370
 371	r = amdgpu_bo_reserve(kiq->eop_obj, true);
 372	if (unlikely(r != 0))
 373		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 374	amdgpu_bo_kunmap(kiq->eop_obj);
 375	amdgpu_bo_unreserve(kiq->eop_obj);
 376
 377	return 0;
 378}
 379
 380/* create MQD for each compute/gfx queue */
 381int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 382			   unsigned int mqd_size, int xcc_id)
 383{
 384	int r, i, j;
 385	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 386	struct amdgpu_ring *ring = &kiq->ring;
 387	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
 388
 389#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
 390	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
 391	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
 392		domain |= AMDGPU_GEM_DOMAIN_VRAM;
 393#endif
 394
 395	/* create MQD for KIQ */
 396	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
 397		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
 398		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
 399		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
 400		 * KIQ MQD no matter SRIOV or Bare-metal
 401		 */
 402		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 403					    AMDGPU_GEM_DOMAIN_VRAM |
 404					    AMDGPU_GEM_DOMAIN_GTT,
 405					    &ring->mqd_obj,
 406					    &ring->mqd_gpu_addr,
 407					    &ring->mqd_ptr);
 408		if (r) {
 409			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
 410			return r;
 411		}
 412
 413		/* prepare MQD backup */
 414		kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
 415		if (!kiq->mqd_backup) {
 416			dev_warn(adev->dev,
 417				 "no memory to create MQD backup for ring %s\n", ring->name);
 418			return -ENOMEM;
 419		}
 420	}
 421
 422	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
 423		/* create MQD for each KGQ */
 424		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 425			ring = &adev->gfx.gfx_ring[i];
 426			if (!ring->mqd_obj) {
 427				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 428							    domain, &ring->mqd_obj,
 429							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 430				if (r) {
 431					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 432					return r;
 433				}
 434
 435				ring->mqd_size = mqd_size;
 436				/* prepare MQD backup */
 437				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
 438				if (!adev->gfx.me.mqd_backup[i]) {
 439					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
 440					return -ENOMEM;
 441				}
 442			}
 443		}
 444	}
 445
 446	/* create MQD for each KCQ */
 447	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 448		j = i + xcc_id * adev->gfx.num_compute_rings;
 449		ring = &adev->gfx.compute_ring[j];
 450		if (!ring->mqd_obj) {
 451			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 452						    domain, &ring->mqd_obj,
 453						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 454			if (r) {
 455				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 456				return r;
 457			}
 458
 459			ring->mqd_size = mqd_size;
 460			/* prepare MQD backup */
 461			adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
 462			if (!adev->gfx.mec.mqd_backup[j]) {
 463				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
 464				return -ENOMEM;
 465			}
 466		}
 467	}
 468
 469	return 0;
 470}
 471
 472void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
 473{
 474	struct amdgpu_ring *ring = NULL;
 475	int i, j;
 476	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 477
 478	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
 479		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 480			ring = &adev->gfx.gfx_ring[i];
 481			kfree(adev->gfx.me.mqd_backup[i]);
 482			amdgpu_bo_free_kernel(&ring->mqd_obj,
 483					      &ring->mqd_gpu_addr,
 484					      &ring->mqd_ptr);
 485		}
 486	}
 487
 488	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 489		j = i + xcc_id * adev->gfx.num_compute_rings;
 490		ring = &adev->gfx.compute_ring[j];
 491		kfree(adev->gfx.mec.mqd_backup[j]);
 492		amdgpu_bo_free_kernel(&ring->mqd_obj,
 493				      &ring->mqd_gpu_addr,
 494				      &ring->mqd_ptr);
 495	}
 496
 497	ring = &kiq->ring;
 498	kfree(kiq->mqd_backup);
 499	amdgpu_bo_free_kernel(&ring->mqd_obj,
 500			      &ring->mqd_gpu_addr,
 501			      &ring->mqd_ptr);
 502}
 503
 504int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
 505{
 506	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 507	struct amdgpu_ring *kiq_ring = &kiq->ring;
 508	struct amdgpu_hive_info *hive;
 509	struct amdgpu_ras *ras;
 510	int hive_ras_recovery = 0;
 511	int i, r = 0;
 512	int j;
 513
 
 
 
 
 
 
 
 
 
 
 514	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 515		return -EINVAL;
 516
 
 
 
 517	spin_lock(&kiq->ring_lock);
 518	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
 519					adev->gfx.num_compute_rings)) {
 520		spin_unlock(&kiq->ring_lock);
 521		return -ENOMEM;
 522	}
 523
 524	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 525		j = i + xcc_id * adev->gfx.num_compute_rings;
 526		kiq->pmf->kiq_unmap_queues(kiq_ring,
 527					   &adev->gfx.compute_ring[j],
 528					   RESET_QUEUES, 0, 0);
 529	}
 530
 531	/**
 532	 * This is workaround: only skip kiq_ring test
 533	 * during ras recovery in suspend stage for gfx9.4.3
 
 
 534	 */
 535	hive = amdgpu_get_xgmi_hive(adev);
 536	if (hive) {
 537		hive_ras_recovery = atomic_read(&hive->ras_recovery);
 538		amdgpu_put_xgmi_hive(hive);
 539	}
 540
 541	ras = amdgpu_ras_get_context(adev);
 542	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
 543		ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) {
 544		spin_unlock(&kiq->ring_lock);
 545		return 0;
 546	}
 547
 548	if (kiq_ring->sched.ready && !adev->job_hang)
 549		r = amdgpu_ring_test_helper(kiq_ring);
 550	spin_unlock(&kiq->ring_lock);
 551
 552	return r;
 553}
 554
 555int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
 556{
 557	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 558	struct amdgpu_ring *kiq_ring = &kiq->ring;
 559	int i, r = 0;
 560	int j;
 561
 
 
 
 
 
 
 
 
 
 
 
 
 562	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 563		return -EINVAL;
 564
 565	spin_lock(&kiq->ring_lock);
 
 
 566	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 
 567		if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
 568						adev->gfx.num_gfx_rings)) {
 569			spin_unlock(&kiq->ring_lock);
 570			return -ENOMEM;
 571		}
 572
 573		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 574			j = i + xcc_id * adev->gfx.num_gfx_rings;
 575			kiq->pmf->kiq_unmap_queues(kiq_ring,
 576						   &adev->gfx.gfx_ring[j],
 577						   PREEMPT_QUEUES, 0, 0);
 578		}
 579	}
 
 580
 581	if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
 
 
 
 
 582		r = amdgpu_ring_test_helper(kiq_ring);
 583	spin_unlock(&kiq->ring_lock);
 
 584
 585	return r;
 586}
 587
 588int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 589					int queue_bit)
 590{
 591	int mec, pipe, queue;
 592	int set_resource_bit = 0;
 593
 594	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
 595
 596	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
 597
 598	return set_resource_bit;
 599}
 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 601int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
 602{
 603	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 604	struct amdgpu_ring *kiq_ring = &kiq->ring;
 605	uint64_t queue_mask = 0;
 606	int r, i, j;
 607
 
 
 
 608	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
 609		return -EINVAL;
 610
 611	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
 612		if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
 613			continue;
 614
 615		/* This situation may be hit in the future if a new HW
 616		 * generation exposes more than 64 queues. If so, the
 617		 * definition of queue_mask needs updating */
 618		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
 619			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
 620			break;
 621		}
 622
 623		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
 624	}
 625
 626	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
 627							kiq_ring->queue);
 628	amdgpu_device_flush_hdp(adev, NULL);
 629
 
 
 
 630	spin_lock(&kiq->ring_lock);
 631	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
 632					adev->gfx.num_compute_rings +
 633					kiq->pmf->set_resources_size);
 634	if (r) {
 635		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 636		spin_unlock(&kiq->ring_lock);
 637		return r;
 638	}
 639
 640	if (adev->enable_mes)
 641		queue_mask = ~0ULL;
 642
 643	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
 644	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 645		j = i + xcc_id * adev->gfx.num_compute_rings;
 646		kiq->pmf->kiq_map_queues(kiq_ring,
 647					 &adev->gfx.compute_ring[j]);
 648	}
 649
 
 
 
 
 
 
 650	r = amdgpu_ring_test_helper(kiq_ring);
 651	spin_unlock(&kiq->ring_lock);
 652	if (r)
 653		DRM_ERROR("KCQ enable failed\n");
 654
 655	return r;
 656}
 657
 658int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
 659{
 660	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 661	struct amdgpu_ring *kiq_ring = &kiq->ring;
 662	int r, i, j;
 663
 664	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
 665		return -EINVAL;
 666
 667	amdgpu_device_flush_hdp(adev, NULL);
 668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 669	spin_lock(&kiq->ring_lock);
 670	/* No need to map kcq on the slave */
 671	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 672		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
 673						adev->gfx.num_gfx_rings);
 674		if (r) {
 675			DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 676			spin_unlock(&kiq->ring_lock);
 677			return r;
 678		}
 679
 680		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 681			j = i + xcc_id * adev->gfx.num_gfx_rings;
 682			kiq->pmf->kiq_map_queues(kiq_ring,
 683						 &adev->gfx.gfx_ring[j]);
 684		}
 685	}
 686
 
 
 
 
 
 
 687	r = amdgpu_ring_test_helper(kiq_ring);
 688	spin_unlock(&kiq->ring_lock);
 689	if (r)
 690		DRM_ERROR("KGQ enable failed\n");
 691
 692	return r;
 693}
 694
 695/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
 696 *
 697 * @adev: amdgpu_device pointer
 698 * @bool enable true: enable gfx off feature, false: disable gfx off feature
 699 *
 700 * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
 701 * 2. other client can send request to disable gfx off feature, the request should be honored.
 702 * 3. other client can cancel their request of disable gfx off feature
 703 * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
 704 */
 705
 706void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 707{
 708	unsigned long delay = GFX_OFF_DELAY_ENABLE;
 709
 710	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 711		return;
 712
 713	mutex_lock(&adev->gfx.gfx_off_mutex);
 714
 715	if (enable) {
 716		/* If the count is already 0, it means there's an imbalance bug somewhere.
 717		 * Note that the bug may be in a different caller than the one which triggers the
 718		 * WARN_ON_ONCE.
 719		 */
 720		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
 721			goto unlock;
 722
 723		adev->gfx.gfx_off_req_count--;
 724
 725		if (adev->gfx.gfx_off_req_count == 0 &&
 726		    !adev->gfx.gfx_off_state) {
 727			/* If going to s2idle, no need to wait */
 728			if (adev->in_s0ix) {
 729				if (!amdgpu_dpm_set_powergating_by_smu(adev,
 730						AMD_IP_BLOCK_TYPE_GFX, true))
 731					adev->gfx.gfx_off_state = true;
 732			} else {
 733				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
 734					      delay);
 735			}
 736		}
 737	} else {
 738		if (adev->gfx.gfx_off_req_count == 0) {
 739			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
 740
 741			if (adev->gfx.gfx_off_state &&
 742			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
 743				adev->gfx.gfx_off_state = false;
 744
 745				if (adev->gfx.funcs->init_spm_golden) {
 746					dev_dbg(adev->dev,
 747						"GFXOFF is disabled, re-init SPM golden settings\n");
 748					amdgpu_gfx_init_spm_golden(adev);
 749				}
 750			}
 751		}
 752
 753		adev->gfx.gfx_off_req_count++;
 754	}
 755
 756unlock:
 757	mutex_unlock(&adev->gfx.gfx_off_mutex);
 758}
 759
 760int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
 761{
 762	int r = 0;
 763
 764	mutex_lock(&adev->gfx.gfx_off_mutex);
 765
 766	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
 767
 768	mutex_unlock(&adev->gfx.gfx_off_mutex);
 769
 770	return r;
 771}
 772
 773int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
 774{
 775	int r = 0;
 776
 777	mutex_lock(&adev->gfx.gfx_off_mutex);
 778
 779	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
 780
 781	mutex_unlock(&adev->gfx.gfx_off_mutex);
 782
 783	return r;
 784}
 785
 786int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
 787{
 788	int r = 0;
 789
 790	mutex_lock(&adev->gfx.gfx_off_mutex);
 791
 792	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
 793
 794	mutex_unlock(&adev->gfx.gfx_off_mutex);
 795
 796	return r;
 797}
 798
 799int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
 800{
 801
 802	int r = 0;
 803
 804	mutex_lock(&adev->gfx.gfx_off_mutex);
 805
 806	r = amdgpu_dpm_get_status_gfxoff(adev, value);
 807
 808	mutex_unlock(&adev->gfx.gfx_off_mutex);
 809
 810	return r;
 811}
 812
 813int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
 814{
 815	int r;
 816
 817	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
 818		if (!amdgpu_persistent_edc_harvesting_supported(adev))
 819			amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
 
 
 
 820
 821		r = amdgpu_ras_block_late_init(adev, ras_block);
 822		if (r)
 823			return r;
 824
 
 
 
 825		if (adev->gfx.cp_ecc_error_irq.funcs) {
 826			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
 827			if (r)
 828				goto late_fini;
 829		}
 830	} else {
 831		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
 832	}
 833
 834	return 0;
 835late_fini:
 836	amdgpu_ras_block_late_fini(adev, ras_block);
 837	return r;
 838}
 839
 840int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
 841{
 842	int err = 0;
 843	struct amdgpu_gfx_ras *ras = NULL;
 844
 845	/* adev->gfx.ras is NULL, which means gfx does not
 846	 * support ras function, then do nothing here.
 847	 */
 848	if (!adev->gfx.ras)
 849		return 0;
 850
 851	ras = adev->gfx.ras;
 852
 853	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
 854	if (err) {
 855		dev_err(adev->dev, "Failed to register gfx ras block!\n");
 856		return err;
 857	}
 858
 859	strcpy(ras->ras_block.ras_comm.name, "gfx");
 860	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
 861	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 862	adev->gfx.ras_if = &ras->ras_block.ras_comm;
 863
 864	/* If not define special ras_late_init function, use gfx default ras_late_init */
 865	if (!ras->ras_block.ras_late_init)
 866		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
 867
 868	/* If not defined special ras_cb function, use default ras_cb */
 869	if (!ras->ras_block.ras_cb)
 870		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
 871
 872	return 0;
 873}
 874
 875int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
 876						struct amdgpu_iv_entry *entry)
 877{
 878	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
 879		return adev->gfx.ras->poison_consumption_handler(adev, entry);
 880
 881	return 0;
 882}
 883
 884int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
 885		void *err_data,
 886		struct amdgpu_iv_entry *entry)
 887{
 888	/* TODO ue will trigger an interrupt.
 889	 *
 890	 * When “Full RAS” is enabled, the per-IP interrupt sources should
 891	 * be disabled and the driver should only look for the aggregated
 892	 * interrupt via sync flood
 893	 */
 894	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
 895		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 896		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
 897		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
 898			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
 899		amdgpu_ras_reset_gpu(adev);
 900	}
 901	return AMDGPU_RAS_SUCCESS;
 902}
 903
 904int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
 905				  struct amdgpu_irq_src *source,
 906				  struct amdgpu_iv_entry *entry)
 907{
 908	struct ras_common_if *ras_if = adev->gfx.ras_if;
 909	struct ras_dispatch_if ih_data = {
 910		.entry = entry,
 911	};
 912
 913	if (!ras_if)
 914		return 0;
 915
 916	ih_data.head = *ras_if;
 917
 918	DRM_ERROR("CP ECC ERROR IRQ\n");
 919	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
 920	return 0;
 921}
 922
 923void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
 924		void *ras_error_status,
 925		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
 926				int xcc_id))
 927{
 928	int i;
 929	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
 930	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
 931	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
 932
 933	if (err_data) {
 934		err_data->ue_count = 0;
 935		err_data->ce_count = 0;
 936	}
 937
 938	for_each_inst(i, xcc_mask)
 939		func(adev, ras_error_status, i);
 940}
 941
 942uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
 943{
 944	signed long r, cnt = 0;
 945	unsigned long flags;
 946	uint32_t seq, reg_val_offs = 0, value = 0;
 947	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 948	struct amdgpu_ring *ring = &kiq->ring;
 949
 950	if (amdgpu_device_skip_hw_access(adev))
 951		return 0;
 952
 953	if (adev->mes.ring.sched.ready)
 954		return amdgpu_mes_rreg(adev, reg);
 955
 956	BUG_ON(!ring->funcs->emit_rreg);
 957
 958	spin_lock_irqsave(&kiq->ring_lock, flags);
 959	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
 960		pr_err("critical bug! too many kiq readers\n");
 961		goto failed_unlock;
 962	}
 963	amdgpu_ring_alloc(ring, 32);
 
 
 
 964	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
 965	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
 966	if (r)
 967		goto failed_undo;
 968
 969	amdgpu_ring_commit(ring);
 970	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 971
 972	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 973
 974	/* don't wait anymore for gpu reset case because this way may
 975	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
 976	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
 977	 * never return if we keep waiting in virt_kiq_rreg, which cause
 978	 * gpu_recover() hang there.
 979	 *
 980	 * also don't wait anymore for IRQ context
 981	 * */
 982	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
 983		goto failed_kiq_read;
 984
 985	might_sleep();
 986	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
 987		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
 988		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 989	}
 990
 991	if (cnt > MAX_KIQ_REG_TRY)
 992		goto failed_kiq_read;
 993
 994	mb();
 995	value = adev->wb.wb[reg_val_offs];
 996	amdgpu_device_wb_free(adev, reg_val_offs);
 997	return value;
 998
 999failed_undo:
1000	amdgpu_ring_undo(ring);
1001failed_unlock:
1002	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1003failed_kiq_read:
1004	if (reg_val_offs)
1005		amdgpu_device_wb_free(adev, reg_val_offs);
1006	dev_err(adev->dev, "failed to read reg:%x\n", reg);
1007	return ~0;
1008}
1009
1010void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1011{
1012	signed long r, cnt = 0;
1013	unsigned long flags;
1014	uint32_t seq;
1015	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1016	struct amdgpu_ring *ring = &kiq->ring;
1017
1018	BUG_ON(!ring->funcs->emit_wreg);
1019
1020	if (amdgpu_device_skip_hw_access(adev))
1021		return;
1022
1023	if (adev->mes.ring.sched.ready) {
1024		amdgpu_mes_wreg(adev, reg, v);
1025		return;
1026	}
1027
1028	spin_lock_irqsave(&kiq->ring_lock, flags);
1029	amdgpu_ring_alloc(ring, 32);
 
 
 
1030	amdgpu_ring_emit_wreg(ring, reg, v);
1031	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1032	if (r)
1033		goto failed_undo;
1034
1035	amdgpu_ring_commit(ring);
1036	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1037
1038	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1039
1040	/* don't wait anymore for gpu reset case because this way may
1041	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1042	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1043	 * never return if we keep waiting in virt_kiq_rreg, which cause
1044	 * gpu_recover() hang there.
1045	 *
1046	 * also don't wait anymore for IRQ context
1047	 * */
1048	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1049		goto failed_kiq_write;
1050
1051	might_sleep();
1052	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1053
1054		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1055		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1056	}
1057
1058	if (cnt > MAX_KIQ_REG_TRY)
1059		goto failed_kiq_write;
1060
1061	return;
1062
1063failed_undo:
1064	amdgpu_ring_undo(ring);
 
1065	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1066failed_kiq_write:
1067	dev_err(adev->dev, "failed to write reg:%x\n", reg);
1068}
1069
1070int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1071{
1072	if (amdgpu_num_kcq == -1) {
1073		return 8;
1074	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1075		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1076		return 8;
1077	}
1078	return amdgpu_num_kcq;
1079}
1080
1081void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
1082				  uint32_t ucode_id)
1083{
1084	const struct gfx_firmware_header_v1_0 *cp_hdr;
1085	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1086	struct amdgpu_firmware_info *info = NULL;
1087	const struct firmware *ucode_fw;
1088	unsigned int fw_size;
1089
1090	switch (ucode_id) {
1091	case AMDGPU_UCODE_ID_CP_PFP:
1092		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1093			adev->gfx.pfp_fw->data;
1094		adev->gfx.pfp_fw_version =
1095			le32_to_cpu(cp_hdr->header.ucode_version);
1096		adev->gfx.pfp_feature_version =
1097			le32_to_cpu(cp_hdr->ucode_feature_version);
1098		ucode_fw = adev->gfx.pfp_fw;
1099		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1100		break;
1101	case AMDGPU_UCODE_ID_CP_RS64_PFP:
1102		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1103			adev->gfx.pfp_fw->data;
1104		adev->gfx.pfp_fw_version =
1105			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1106		adev->gfx.pfp_feature_version =
1107			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1108		ucode_fw = adev->gfx.pfp_fw;
1109		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1110		break;
1111	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1112	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1113		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1114			adev->gfx.pfp_fw->data;
1115		ucode_fw = adev->gfx.pfp_fw;
1116		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1117		break;
1118	case AMDGPU_UCODE_ID_CP_ME:
1119		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1120			adev->gfx.me_fw->data;
1121		adev->gfx.me_fw_version =
1122			le32_to_cpu(cp_hdr->header.ucode_version);
1123		adev->gfx.me_feature_version =
1124			le32_to_cpu(cp_hdr->ucode_feature_version);
1125		ucode_fw = adev->gfx.me_fw;
1126		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1127		break;
1128	case AMDGPU_UCODE_ID_CP_RS64_ME:
1129		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1130			adev->gfx.me_fw->data;
1131		adev->gfx.me_fw_version =
1132			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1133		adev->gfx.me_feature_version =
1134			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1135		ucode_fw = adev->gfx.me_fw;
1136		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1137		break;
1138	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1139	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1140		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1141			adev->gfx.me_fw->data;
1142		ucode_fw = adev->gfx.me_fw;
1143		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1144		break;
1145	case AMDGPU_UCODE_ID_CP_CE:
1146		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1147			adev->gfx.ce_fw->data;
1148		adev->gfx.ce_fw_version =
1149			le32_to_cpu(cp_hdr->header.ucode_version);
1150		adev->gfx.ce_feature_version =
1151			le32_to_cpu(cp_hdr->ucode_feature_version);
1152		ucode_fw = adev->gfx.ce_fw;
1153		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1154		break;
1155	case AMDGPU_UCODE_ID_CP_MEC1:
1156		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1157			adev->gfx.mec_fw->data;
1158		adev->gfx.mec_fw_version =
1159			le32_to_cpu(cp_hdr->header.ucode_version);
1160		adev->gfx.mec_feature_version =
1161			le32_to_cpu(cp_hdr->ucode_feature_version);
1162		ucode_fw = adev->gfx.mec_fw;
1163		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1164			  le32_to_cpu(cp_hdr->jt_size) * 4;
1165		break;
1166	case AMDGPU_UCODE_ID_CP_MEC1_JT:
1167		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1168			adev->gfx.mec_fw->data;
1169		ucode_fw = adev->gfx.mec_fw;
1170		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1171		break;
1172	case AMDGPU_UCODE_ID_CP_MEC2:
1173		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1174			adev->gfx.mec2_fw->data;
1175		adev->gfx.mec2_fw_version =
1176			le32_to_cpu(cp_hdr->header.ucode_version);
1177		adev->gfx.mec2_feature_version =
1178			le32_to_cpu(cp_hdr->ucode_feature_version);
1179		ucode_fw = adev->gfx.mec2_fw;
1180		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1181			  le32_to_cpu(cp_hdr->jt_size) * 4;
1182		break;
1183	case AMDGPU_UCODE_ID_CP_MEC2_JT:
1184		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1185			adev->gfx.mec2_fw->data;
1186		ucode_fw = adev->gfx.mec2_fw;
1187		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1188		break;
1189	case AMDGPU_UCODE_ID_CP_RS64_MEC:
1190		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1191			adev->gfx.mec_fw->data;
1192		adev->gfx.mec_fw_version =
1193			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1194		adev->gfx.mec_feature_version =
1195			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1196		ucode_fw = adev->gfx.mec_fw;
1197		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1198		break;
1199	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1200	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1201	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1202	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1203		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1204			adev->gfx.mec_fw->data;
1205		ucode_fw = adev->gfx.mec_fw;
1206		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1207		break;
1208	default:
1209		break;
 
1210	}
1211
1212	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1213		info = &adev->firmware.ucode[ucode_id];
1214		info->ucode_id = ucode_id;
1215		info->fw = ucode_fw;
1216		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1217	}
1218}
1219
1220bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
1221{
1222	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
1223			adev->gfx.num_xcc_per_xcp : 1));
1224}
1225
1226static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
1227						struct device_attribute *addr,
1228						char *buf)
1229{
1230	struct drm_device *ddev = dev_get_drvdata(dev);
1231	struct amdgpu_device *adev = drm_to_adev(ddev);
1232	int mode;
1233
1234	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1235					       AMDGPU_XCP_FL_NONE);
1236
1237	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
1238}
1239
1240static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
1241						struct device_attribute *addr,
1242						const char *buf, size_t count)
1243{
1244	struct drm_device *ddev = dev_get_drvdata(dev);
1245	struct amdgpu_device *adev = drm_to_adev(ddev);
1246	enum amdgpu_gfx_partition mode;
1247	int ret = 0, num_xcc;
1248
1249	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1250	if (num_xcc % 2 != 0)
1251		return -EINVAL;
1252
1253	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
1254		mode = AMDGPU_SPX_PARTITION_MODE;
1255	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1256		/*
1257		 * DPX mode needs AIDs to be in multiple of 2.
1258		 * Each AID connects 2 XCCs.
1259		 */
1260		if (num_xcc%4)
1261			return -EINVAL;
1262		mode = AMDGPU_DPX_PARTITION_MODE;
1263	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
1264		if (num_xcc != 6)
1265			return -EINVAL;
1266		mode = AMDGPU_TPX_PARTITION_MODE;
1267	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
1268		if (num_xcc != 8)
1269			return -EINVAL;
1270		mode = AMDGPU_QPX_PARTITION_MODE;
1271	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
1272		mode = AMDGPU_CPX_PARTITION_MODE;
1273	} else {
1274		return -EINVAL;
1275	}
1276
1277	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
1278
1279	if (ret)
1280		return ret;
1281
1282	return count;
1283}
1284
 
 
 
 
 
 
 
 
1285static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
1286						struct device_attribute *addr,
1287						char *buf)
1288{
1289	struct drm_device *ddev = dev_get_drvdata(dev);
1290	struct amdgpu_device *adev = drm_to_adev(ddev);
1291	char *supported_partition;
 
 
1292
1293	/* TBD */
1294	switch (NUM_XCC(adev->gfx.xcc_mask)) {
1295	case 8:
1296		supported_partition = "SPX, DPX, QPX, CPX";
1297		break;
1298	case 6:
1299		supported_partition = "SPX, TPX, CPX";
1300		break;
1301	case 4:
1302		supported_partition = "SPX, DPX, CPX";
1303		break;
1304	/* this seems only existing in emulation phase */
1305	case 2:
1306		supported_partition = "SPX, CPX";
1307		break;
1308	default:
1309		supported_partition = "Not supported";
1310		break;
1311	}
1312
1313	return sysfs_emit(buf, "%s\n", supported_partition);
 
 
1314}
1315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1316static DEVICE_ATTR(current_compute_partition, 0644,
1317		   amdgpu_gfx_get_current_compute_partition,
1318		   amdgpu_gfx_set_compute_partition);
1319
1320static DEVICE_ATTR(available_compute_partition, 0444,
1321		   amdgpu_gfx_get_available_compute_partition, NULL);
 
 
1322
1323int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
 
 
 
1324{
 
 
1325	int r;
1326
 
 
 
 
 
 
 
 
 
 
1327	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
1328	if (r)
1329		return r;
1330
1331	r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
 
 
1332
1333	return r;
1334}
1335
1336void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1337{
 
 
 
 
 
 
 
 
1338	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1339	device_remove_file(adev->dev, &dev_attr_available_compute_partition);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1340}

   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 */
  25
  26#include <linux/firmware.h>
  27#include <linux/pm_runtime.h>
  28
  29#include "amdgpu.h"
  30#include "amdgpu_gfx.h"
  31#include "amdgpu_rlc.h"
  32#include "amdgpu_ras.h"
  33#include "amdgpu_reset.h"
  34#include "amdgpu_xcp.h"
  35#include "amdgpu_xgmi.h"
  36
  37/* delay 0.1 second to enable gfx off feature */
  38#define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
  39
  40#define GFX_OFF_NO_DELAY 0
  41
  42/*
  43 * GPU GFX IP block helpers function.
  44 */
  45
  46int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
  47				int pipe, int queue)
  48{
  49	int bit = 0;
  50
  51	bit += mec * adev->gfx.mec.num_pipe_per_mec
  52		* adev->gfx.mec.num_queue_per_pipe;
  53	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
  54	bit += queue;
  55
  56	return bit;
  57}
  58
  59void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
  60				 int *mec, int *pipe, int *queue)
  61{
  62	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
  63	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
  64		% adev->gfx.mec.num_pipe_per_mec;
  65	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
  66	       / adev->gfx.mec.num_pipe_per_mec;
  67
  68}
  69
  70bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
  71				     int xcc_id, int mec, int pipe, int queue)
  72{
  73	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
  74			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
  75}
  76
  77int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
  78			       int me, int pipe, int queue)
  79{
  80	int bit = 0;
  81
  82	bit += me * adev->gfx.me.num_pipe_per_me
  83		* adev->gfx.me.num_queue_per_pipe;
  84	bit += pipe * adev->gfx.me.num_queue_per_pipe;
  85	bit += queue;
  86
  87	return bit;
  88}
  89
 
 
 
 
 
 
 
 
 
 
  90bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
  91				    int me, int pipe, int queue)
  92{
  93	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
  94			adev->gfx.me.queue_bitmap);
  95}
  96
  97/**
  98 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
  99 *
 100 * @mask: array in which the per-shader array disable masks will be stored
 101 * @max_se: number of SEs
 102 * @max_sh: number of SHs
 103 *
 104 * The bitmask of CUs to be disabled in the shader array determined by se and
 105 * sh is stored in mask[se * max_sh + sh].
 106 */
 107void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
 108{
 109	unsigned int se, sh, cu;
 110	const char *p;
 111
 112	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
 113
 114	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
 115		return;
 116
 117	p = amdgpu_disable_cu;
 118	for (;;) {
 119		char *next;
 120		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
 121
 122		if (ret < 3) {
 123			DRM_ERROR("amdgpu: could not parse disable_cu\n");
 124			return;
 125		}
 126
 127		if (se < max_se && sh < max_sh && cu < 16) {
 128			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
 129			mask[se * max_sh + sh] |= 1u << cu;
 130		} else {
 131			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
 132				  se, sh, cu);
 133		}
 134
 135		next = strchr(p, ',');
 136		if (!next)
 137			break;
 138		p = next + 1;
 139	}
 140}
 141
 142static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
 143{
 144	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
 145}
 146
 147static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
 148{
 149	if (amdgpu_compute_multipipe != -1) {
 150		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
 151			 amdgpu_compute_multipipe);
 152		return amdgpu_compute_multipipe == 1;
 153	}
 154
 155	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
 156		return true;
 157
 158	/* FIXME: spreading the queues across pipes causes perf regressions
 159	 * on POLARIS11 compute workloads */
 160	if (adev->asic_type == CHIP_POLARIS11)
 161		return false;
 162
 163	return adev->gfx.mec.num_mec > 1;
 164}
 165
 166bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
 167						struct amdgpu_ring *ring)
 168{
 169	int queue = ring->queue;
 170	int pipe = ring->pipe;
 171
 172	/* Policy: use pipe1 queue0 as high priority graphics queue if we
 173	 * have more than one gfx pipe.
 174	 */
 175	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
 176	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
 177		int me = ring->me;
 178		int bit;
 179
 180		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
 181		if (ring == &adev->gfx.gfx_ring[bit])
 182			return true;
 183	}
 184
 185	return false;
 186}
 187
 188bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 189					       struct amdgpu_ring *ring)
 190{
 191	/* Policy: use 1st queue as high priority compute queue if we
 192	 * have more than one compute queue.
 193	 */
 194	if (adev->gfx.num_compute_rings > 1 &&
 195	    ring == &adev->gfx.compute_ring[0])
 196		return true;
 197
 198	return false;
 199}
 200
 201void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 202{
 203	int i, j, queue, pipe;
 204	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
 205	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
 206				     adev->gfx.mec.num_queue_per_pipe,
 207				     adev->gfx.num_compute_rings);
 208	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
 209
 210	if (multipipe_policy) {
 211		/* policy: make queues evenly cross all pipes on MEC1 only
 212		 * for multiple xcc, just use the original policy for simplicity */
 213		for (j = 0; j < num_xcc; j++) {
 214			for (i = 0; i < max_queues_per_mec; i++) {
 215				pipe = i % adev->gfx.mec.num_pipe_per_mec;
 216				queue = (i / adev->gfx.mec.num_pipe_per_mec) %
 217					 adev->gfx.mec.num_queue_per_pipe;
 218
 219				set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
 220					adev->gfx.mec_bitmap[j].queue_bitmap);
 221			}
 222		}
 223	} else {
 224		/* policy: amdgpu owns all queues in the given pipe */
 225		for (j = 0; j < num_xcc; j++) {
 226			for (i = 0; i < max_queues_per_mec; ++i)
 227				set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
 228		}
 229	}
 230
 231	for (j = 0; j < num_xcc; j++) {
 232		dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
 233			bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
 234	}
 235}
 236
 237void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 238{
 239	int i, queue, pipe;
 240	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
 241	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
 242					adev->gfx.me.num_queue_per_pipe;
 243
 244	if (multipipe_policy) {
 245		/* policy: amdgpu owns the first queue per pipe at this stage
 246		 * will extend to mulitple queues per pipe later */
 247		for (i = 0; i < max_queues_per_me; i++) {
 248			pipe = i % adev->gfx.me.num_pipe_per_me;
 249			queue = (i / adev->gfx.me.num_pipe_per_me) %
 250				adev->gfx.me.num_queue_per_pipe;
 251
 252			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
 253				adev->gfx.me.queue_bitmap);
 254		}
 255	} else {
 256		for (i = 0; i < max_queues_per_me; ++i)
 257			set_bit(i, adev->gfx.me.queue_bitmap);
 258	}
 259
 260	/* update the number of active graphics rings */
 261	adev->gfx.num_gfx_rings =
 262		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
 263}
 264
 265static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 266				  struct amdgpu_ring *ring, int xcc_id)
 267{
 268	int queue_bit;
 269	int mec, pipe, queue;
 270
 271	queue_bit = adev->gfx.mec.num_mec
 272		    * adev->gfx.mec.num_pipe_per_mec
 273		    * adev->gfx.mec.num_queue_per_pipe;
 274
 275	while (--queue_bit >= 0) {
 276		if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
 277			continue;
 278
 279		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
 280
 281		/*
 282		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
 283		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
 284		 * only can be issued on queue 0.
 285		 */
 286		if ((mec == 1 && pipe > 1) || queue != 0)
 287			continue;
 288
 289		ring->me = mec + 1;
 290		ring->pipe = pipe;
 291		ring->queue = queue;
 292
 293		return 0;
 294	}
 295
 296	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
 297	return -EINVAL;
 298}
 299
 300int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
 301{
 302	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 303	struct amdgpu_irq_src *irq = &kiq->irq;
 304	struct amdgpu_ring *ring = &kiq->ring;
 305	int r = 0;
 306
 307	spin_lock_init(&kiq->ring_lock);
 308
 309	ring->adev = NULL;
 310	ring->ring_obj = NULL;
 311	ring->use_doorbell = true;
 312	ring->xcc_id = xcc_id;
 313	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
 314	ring->doorbell_index =
 315		(adev->doorbell_index.kiq +
 316		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
 317		<< 1;
 318
 319	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
 320	if (r)
 321		return r;
 322
 323	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 324	ring->no_scheduler = true;
 325	snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
 326		 (unsigned char)xcc_id, (unsigned char)ring->me,
 327		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
 328	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
 329			     AMDGPU_RING_PRIO_DEFAULT, NULL);
 330	if (r)
 331		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 332
 333	return r;
 334}
 335
 336void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 337{
 338	amdgpu_ring_fini(ring);
 339}
 340
 341void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
 342{
 343	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 344
 345	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
 346}
 347
 348int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
 349			unsigned int hpd_size, int xcc_id)
 350{
 351	int r;
 352	u32 *hpd;
 353	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 354
 355	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
 356				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
 357				    &kiq->eop_gpu_addr, (void **)&hpd);
 358	if (r) {
 359		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
 360		return r;
 361	}
 362
 363	memset(hpd, 0, hpd_size);
 364
 365	r = amdgpu_bo_reserve(kiq->eop_obj, true);
 366	if (unlikely(r != 0))
 367		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 368	amdgpu_bo_kunmap(kiq->eop_obj);
 369	amdgpu_bo_unreserve(kiq->eop_obj);
 370
 371	return 0;
 372}
 373
 374/* create MQD for each compute/gfx queue */
 375int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 376			   unsigned int mqd_size, int xcc_id)
 377{
 378	int r, i, j;
 379	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 380	struct amdgpu_ring *ring = &kiq->ring;
 381	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
 382
 383#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
 384	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
 385	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
 386		domain |= AMDGPU_GEM_DOMAIN_VRAM;
 387#endif
 388
 389	/* create MQD for KIQ */
 390	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
 391		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
 392		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
 393		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
 394		 * KIQ MQD no matter SRIOV or Bare-metal
 395		 */
 396		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 397					    AMDGPU_GEM_DOMAIN_VRAM |
 398					    AMDGPU_GEM_DOMAIN_GTT,
 399					    &ring->mqd_obj,
 400					    &ring->mqd_gpu_addr,
 401					    &ring->mqd_ptr);
 402		if (r) {
 403			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
 404			return r;
 405		}
 406
 407		/* prepare MQD backup */
 408		kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
 409		if (!kiq->mqd_backup) {
 410			dev_warn(adev->dev,
 411				 "no memory to create MQD backup for ring %s\n", ring->name);
 412			return -ENOMEM;
 413		}
 414	}
 415
 416	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
 417		/* create MQD for each KGQ */
 418		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 419			ring = &adev->gfx.gfx_ring[i];
 420			if (!ring->mqd_obj) {
 421				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 422							    domain, &ring->mqd_obj,
 423							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 424				if (r) {
 425					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 426					return r;
 427				}
 428
 429				ring->mqd_size = mqd_size;
 430				/* prepare MQD backup */
 431				adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
 432				if (!adev->gfx.me.mqd_backup[i]) {
 433					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
 434					return -ENOMEM;
 435				}
 436			}
 437		}
 438	}
 439
 440	/* create MQD for each KCQ */
 441	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 442		j = i + xcc_id * adev->gfx.num_compute_rings;
 443		ring = &adev->gfx.compute_ring[j];
 444		if (!ring->mqd_obj) {
 445			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
 446						    domain, &ring->mqd_obj,
 447						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 448			if (r) {
 449				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 450				return r;
 451			}
 452
 453			ring->mqd_size = mqd_size;
 454			/* prepare MQD backup */
 455			adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
 456			if (!adev->gfx.mec.mqd_backup[j]) {
 457				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
 458				return -ENOMEM;
 459			}
 460		}
 461	}
 462
 463	return 0;
 464}
 465
 466void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
 467{
 468	struct amdgpu_ring *ring = NULL;
 469	int i, j;
 470	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 471
 472	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
 473		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 474			ring = &adev->gfx.gfx_ring[i];
 475			kfree(adev->gfx.me.mqd_backup[i]);
 476			amdgpu_bo_free_kernel(&ring->mqd_obj,
 477					      &ring->mqd_gpu_addr,
 478					      &ring->mqd_ptr);
 479		}
 480	}
 481
 482	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 483		j = i + xcc_id * adev->gfx.num_compute_rings;
 484		ring = &adev->gfx.compute_ring[j];
 485		kfree(adev->gfx.mec.mqd_backup[j]);
 486		amdgpu_bo_free_kernel(&ring->mqd_obj,
 487				      &ring->mqd_gpu_addr,
 488				      &ring->mqd_ptr);
 489	}
 490
 491	ring = &kiq->ring;
 492	kfree(kiq->mqd_backup);
 493	amdgpu_bo_free_kernel(&ring->mqd_obj,
 494			      &ring->mqd_gpu_addr,
 495			      &ring->mqd_ptr);
 496}
 497
 498int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
 499{
 500	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 501	struct amdgpu_ring *kiq_ring = &kiq->ring;
 
 
 
 502	int i, r = 0;
 503	int j;
 504
 505	if (adev->enable_mes) {
 506		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 507			j = i + xcc_id * adev->gfx.num_compute_rings;
 508			amdgpu_mes_unmap_legacy_queue(adev,
 509						   &adev->gfx.compute_ring[j],
 510						   RESET_QUEUES, 0, 0);
 511		}
 512		return 0;
 513	}
 514
 515	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 516		return -EINVAL;
 517
 518	if (!kiq_ring->sched.ready || adev->job_hang || amdgpu_in_reset(adev))
 519		return 0;
 520
 521	spin_lock(&kiq->ring_lock);
 522	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
 523					adev->gfx.num_compute_rings)) {
 524		spin_unlock(&kiq->ring_lock);
 525		return -ENOMEM;
 526	}
 527
 528	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 529		j = i + xcc_id * adev->gfx.num_compute_rings;
 530		kiq->pmf->kiq_unmap_queues(kiq_ring,
 531					   &adev->gfx.compute_ring[j],
 532					   RESET_QUEUES, 0, 0);
 533	}
 534	/* Submit unmap queue packet */
 535	amdgpu_ring_commit(kiq_ring);
 536	/*
 537	 * Ring test will do a basic scratch register change check. Just run
 538	 * this to ensure that unmap queues that is submitted before got
 539	 * processed successfully before returning.
 540	 */
 541	r = amdgpu_ring_test_helper(kiq_ring);
 
 
 
 
 
 
 
 
 
 
 
 542
 
 
 543	spin_unlock(&kiq->ring_lock);
 544
 545	return r;
 546}
 547
 548int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
 549{
 550	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 551	struct amdgpu_ring *kiq_ring = &kiq->ring;
 552	int i, r = 0;
 553	int j;
 554
 555	if (adev->enable_mes) {
 556		if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 557			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 558				j = i + xcc_id * adev->gfx.num_gfx_rings;
 559				amdgpu_mes_unmap_legacy_queue(adev,
 560						      &adev->gfx.gfx_ring[j],
 561						      PREEMPT_QUEUES, 0, 0);
 562			}
 563		}
 564		return 0;
 565	}
 566
 567	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 568		return -EINVAL;
 569
 570	if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
 571		return 0;
 572
 573	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 574		spin_lock(&kiq->ring_lock);
 575		if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
 576						adev->gfx.num_gfx_rings)) {
 577			spin_unlock(&kiq->ring_lock);
 578			return -ENOMEM;
 579		}
 580
 581		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 582			j = i + xcc_id * adev->gfx.num_gfx_rings;
 583			kiq->pmf->kiq_unmap_queues(kiq_ring,
 584						   &adev->gfx.gfx_ring[j],
 585						   PREEMPT_QUEUES, 0, 0);
 586		}
 587		/* Submit unmap queue packet */
 588		amdgpu_ring_commit(kiq_ring);
 589
 590		/*
 591		 * Ring test will do a basic scratch register change check.
 592		 * Just run this to ensure that unmap queues that is submitted
 593		 * before got processed successfully before returning.
 594		 */
 595		r = amdgpu_ring_test_helper(kiq_ring);
 596		spin_unlock(&kiq->ring_lock);
 597	}
 598
 599	return r;
 600}
 601
 602int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 603					int queue_bit)
 604{
 605	int mec, pipe, queue;
 606	int set_resource_bit = 0;
 607
 608	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
 609
 610	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
 611
 612	return set_resource_bit;
 613}
 614
 615static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
 616{
 617	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 618	struct amdgpu_ring *kiq_ring = &kiq->ring;
 619	uint64_t queue_mask = ~0ULL;
 620	int r, i, j;
 621
 622	amdgpu_device_flush_hdp(adev, NULL);
 623
 624	if (!adev->enable_uni_mes) {
 625		spin_lock(&kiq->ring_lock);
 626		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
 627		if (r) {
 628			dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
 629			spin_unlock(&kiq->ring_lock);
 630			return r;
 631		}
 632
 633		kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
 634		r = amdgpu_ring_test_helper(kiq_ring);
 635		spin_unlock(&kiq->ring_lock);
 636		if (r)
 637			dev_err(adev->dev, "KIQ failed to set resources\n");
 638	}
 639
 640	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 641		j = i + xcc_id * adev->gfx.num_compute_rings;
 642		r = amdgpu_mes_map_legacy_queue(adev,
 643						&adev->gfx.compute_ring[j]);
 644		if (r) {
 645			dev_err(adev->dev, "failed to map compute queue\n");
 646			return r;
 647		}
 648	}
 649
 650	return 0;
 651}
 652
 653int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
 654{
 655	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 656	struct amdgpu_ring *kiq_ring = &kiq->ring;
 657	uint64_t queue_mask = 0;
 658	int r, i, j;
 659
 660	if (adev->mes.enable_legacy_queue_map)
 661		return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
 662
 663	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
 664		return -EINVAL;
 665
 666	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
 667		if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
 668			continue;
 669
 670		/* This situation may be hit in the future if a new HW
 671		 * generation exposes more than 64 queues. If so, the
 672		 * definition of queue_mask needs updating */
 673		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
 674			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
 675			break;
 676		}
 677
 678		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
 679	}
 680
 
 
 681	amdgpu_device_flush_hdp(adev, NULL);
 682
 683	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
 684		 kiq_ring->queue);
 685
 686	spin_lock(&kiq->ring_lock);
 687	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
 688					adev->gfx.num_compute_rings +
 689					kiq->pmf->set_resources_size);
 690	if (r) {
 691		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 692		spin_unlock(&kiq->ring_lock);
 693		return r;
 694	}
 695
 
 
 
 696	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
 697	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 698		j = i + xcc_id * adev->gfx.num_compute_rings;
 699		kiq->pmf->kiq_map_queues(kiq_ring,
 700					 &adev->gfx.compute_ring[j]);
 701	}
 702	/* Submit map queue packet */
 703	amdgpu_ring_commit(kiq_ring);
 704	/*
 705	 * Ring test will do a basic scratch register change check. Just run
 706	 * this to ensure that map queues that is submitted before got
 707	 * processed successfully before returning.
 708	 */
 709	r = amdgpu_ring_test_helper(kiq_ring);
 710	spin_unlock(&kiq->ring_lock);
 711	if (r)
 712		DRM_ERROR("KCQ enable failed\n");
 713
 714	return r;
 715}
 716
 717int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
 718{
 719	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 720	struct amdgpu_ring *kiq_ring = &kiq->ring;
 721	int r, i, j;
 722
 723	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
 724		return -EINVAL;
 725
 726	amdgpu_device_flush_hdp(adev, NULL);
 727
 728	if (adev->mes.enable_legacy_queue_map) {
 729		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 730			j = i + xcc_id * adev->gfx.num_gfx_rings;
 731			r = amdgpu_mes_map_legacy_queue(adev,
 732							&adev->gfx.gfx_ring[j]);
 733			if (r) {
 734				DRM_ERROR("failed to map gfx queue\n");
 735				return r;
 736			}
 737		}
 738
 739		return 0;
 740	}
 741
 742	spin_lock(&kiq->ring_lock);
 743	/* No need to map kcq on the slave */
 744	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
 745		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
 746						adev->gfx.num_gfx_rings);
 747		if (r) {
 748			DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 749			spin_unlock(&kiq->ring_lock);
 750			return r;
 751		}
 752
 753		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 754			j = i + xcc_id * adev->gfx.num_gfx_rings;
 755			kiq->pmf->kiq_map_queues(kiq_ring,
 756						 &adev->gfx.gfx_ring[j]);
 757		}
 758	}
 759	/* Submit map queue packet */
 760	amdgpu_ring_commit(kiq_ring);
 761	/*
 762	 * Ring test will do a basic scratch register change check. Just run
 763	 * this to ensure that map queues that is submitted before got
 764	 * processed successfully before returning.
 765	 */
 766	r = amdgpu_ring_test_helper(kiq_ring);
 767	spin_unlock(&kiq->ring_lock);
 768	if (r)
 769		DRM_ERROR("KGQ enable failed\n");
 770
 771	return r;
 772}
 773
 774/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
 775 *
 776 * @adev: amdgpu_device pointer
 777 * @bool enable true: enable gfx off feature, false: disable gfx off feature
 778 *
 779 * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
 780 * 2. other client can send request to disable gfx off feature, the request should be honored.
 781 * 3. other client can cancel their request of disable gfx off feature
 782 * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
 783 */
 784
 785void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 786{
 787	unsigned long delay = GFX_OFF_DELAY_ENABLE;
 788
 789	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 790		return;
 791
 792	mutex_lock(&adev->gfx.gfx_off_mutex);
 793
 794	if (enable) {
 795		/* If the count is already 0, it means there's an imbalance bug somewhere.
 796		 * Note that the bug may be in a different caller than the one which triggers the
 797		 * WARN_ON_ONCE.
 798		 */
 799		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
 800			goto unlock;
 801
 802		adev->gfx.gfx_off_req_count--;
 803
 804		if (adev->gfx.gfx_off_req_count == 0 &&
 805		    !adev->gfx.gfx_off_state) {
 806			/* If going to s2idle, no need to wait */
 807			if (adev->in_s0ix) {
 808				if (!amdgpu_dpm_set_powergating_by_smu(adev,
 809						AMD_IP_BLOCK_TYPE_GFX, true))
 810					adev->gfx.gfx_off_state = true;
 811			} else {
 812				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
 813					      delay);
 814			}
 815		}
 816	} else {
 817		if (adev->gfx.gfx_off_req_count == 0) {
 818			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
 819
 820			if (adev->gfx.gfx_off_state &&
 821			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
 822				adev->gfx.gfx_off_state = false;
 823
 824				if (adev->gfx.funcs->init_spm_golden) {
 825					dev_dbg(adev->dev,
 826						"GFXOFF is disabled, re-init SPM golden settings\n");
 827					amdgpu_gfx_init_spm_golden(adev);
 828				}
 829			}
 830		}
 831
 832		adev->gfx.gfx_off_req_count++;
 833	}
 834
 835unlock:
 836	mutex_unlock(&adev->gfx.gfx_off_mutex);
 837}
 838
 839int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
 840{
 841	int r = 0;
 842
 843	mutex_lock(&adev->gfx.gfx_off_mutex);
 844
 845	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
 846
 847	mutex_unlock(&adev->gfx.gfx_off_mutex);
 848
 849	return r;
 850}
 851
 852int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
 853{
 854	int r = 0;
 855
 856	mutex_lock(&adev->gfx.gfx_off_mutex);
 857
 858	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
 859
 860	mutex_unlock(&adev->gfx.gfx_off_mutex);
 861
 862	return r;
 863}
 864
 865int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
 866{
 867	int r = 0;
 868
 869	mutex_lock(&adev->gfx.gfx_off_mutex);
 870
 871	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
 872
 873	mutex_unlock(&adev->gfx.gfx_off_mutex);
 874
 875	return r;
 876}
 877
 878int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
 879{
 880
 881	int r = 0;
 882
 883	mutex_lock(&adev->gfx.gfx_off_mutex);
 884
 885	r = amdgpu_dpm_get_status_gfxoff(adev, value);
 886
 887	mutex_unlock(&adev->gfx.gfx_off_mutex);
 888
 889	return r;
 890}
 891
 892int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
 893{
 894	int r;
 895
 896	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
 897		if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
 898			r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
 899			if (r)
 900				return r;
 901		}
 902
 903		r = amdgpu_ras_block_late_init(adev, ras_block);
 904		if (r)
 905			return r;
 906
 907		if (amdgpu_sriov_vf(adev))
 908			return r;
 909
 910		if (adev->gfx.cp_ecc_error_irq.funcs) {
 911			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
 912			if (r)
 913				goto late_fini;
 914		}
 915	} else {
 916		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
 917	}
 918
 919	return 0;
 920late_fini:
 921	amdgpu_ras_block_late_fini(adev, ras_block);
 922	return r;
 923}
 924
 925int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
 926{
 927	int err = 0;
 928	struct amdgpu_gfx_ras *ras = NULL;
 929
 930	/* adev->gfx.ras is NULL, which means gfx does not
 931	 * support ras function, then do nothing here.
 932	 */
 933	if (!adev->gfx.ras)
 934		return 0;
 935
 936	ras = adev->gfx.ras;
 937
 938	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
 939	if (err) {
 940		dev_err(adev->dev, "Failed to register gfx ras block!\n");
 941		return err;
 942	}
 943
 944	strcpy(ras->ras_block.ras_comm.name, "gfx");
 945	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
 946	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 947	adev->gfx.ras_if = &ras->ras_block.ras_comm;
 948
 949	/* If not define special ras_late_init function, use gfx default ras_late_init */
 950	if (!ras->ras_block.ras_late_init)
 951		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
 952
 953	/* If not defined special ras_cb function, use default ras_cb */
 954	if (!ras->ras_block.ras_cb)
 955		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
 956
 957	return 0;
 958}
 959
 960int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
 961						struct amdgpu_iv_entry *entry)
 962{
 963	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
 964		return adev->gfx.ras->poison_consumption_handler(adev, entry);
 965
 966	return 0;
 967}
 968
 969int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
 970		void *err_data,
 971		struct amdgpu_iv_entry *entry)
 972{
 973	/* TODO ue will trigger an interrupt.
 974	 *
 975	 * When “Full RAS” is enabled, the per-IP interrupt sources should
 976	 * be disabled and the driver should only look for the aggregated
 977	 * interrupt via sync flood
 978	 */
 979	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
 980		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 981		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
 982		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
 983			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
 984		amdgpu_ras_reset_gpu(adev);
 985	}
 986	return AMDGPU_RAS_SUCCESS;
 987}
 988
 989int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
 990				  struct amdgpu_irq_src *source,
 991				  struct amdgpu_iv_entry *entry)
 992{
 993	struct ras_common_if *ras_if = adev->gfx.ras_if;
 994	struct ras_dispatch_if ih_data = {
 995		.entry = entry,
 996	};
 997
 998	if (!ras_if)
 999		return 0;
1000
1001	ih_data.head = *ras_if;
1002
1003	DRM_ERROR("CP ECC ERROR IRQ\n");
1004	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1005	return 0;
1006}
1007
1008void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1009		void *ras_error_status,
1010		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1011				int xcc_id))
1012{
1013	int i;
1014	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1015	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1016	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1017
1018	if (err_data) {
1019		err_data->ue_count = 0;
1020		err_data->ce_count = 0;
1021	}
1022
1023	for_each_inst(i, xcc_mask)
1024		func(adev, ras_error_status, i);
1025}
1026
1027uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1028{
1029	signed long r, cnt = 0;
1030	unsigned long flags;
1031	uint32_t seq, reg_val_offs = 0, value = 0;
1032	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1033	struct amdgpu_ring *ring = &kiq->ring;
1034
1035	if (amdgpu_device_skip_hw_access(adev))
1036		return 0;
1037
1038	if (adev->mes.ring[0].sched.ready)
1039		return amdgpu_mes_rreg(adev, reg);
1040
1041	BUG_ON(!ring->funcs->emit_rreg);
1042
1043	spin_lock_irqsave(&kiq->ring_lock, flags);
1044	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
1045		pr_err("critical bug! too many kiq readers\n");
1046		goto failed_unlock;
1047	}
1048	r = amdgpu_ring_alloc(ring, 32);
1049	if (r)
1050		goto failed_unlock;
1051
1052	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
1053	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1054	if (r)
1055		goto failed_undo;
1056
1057	amdgpu_ring_commit(ring);
1058	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1059
1060	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1061
1062	/* don't wait anymore for gpu reset case because this way may
1063	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1064	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1065	 * never return if we keep waiting in virt_kiq_rreg, which cause
1066	 * gpu_recover() hang there.
1067	 *
1068	 * also don't wait anymore for IRQ context
1069	 * */
1070	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1071		goto failed_kiq_read;
1072
1073	might_sleep();
1074	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1075		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1076		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1077	}
1078
1079	if (cnt > MAX_KIQ_REG_TRY)
1080		goto failed_kiq_read;
1081
1082	mb();
1083	value = adev->wb.wb[reg_val_offs];
1084	amdgpu_device_wb_free(adev, reg_val_offs);
1085	return value;
1086
1087failed_undo:
1088	amdgpu_ring_undo(ring);
1089failed_unlock:
1090	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1091failed_kiq_read:
1092	if (reg_val_offs)
1093		amdgpu_device_wb_free(adev, reg_val_offs);
1094	dev_err(adev->dev, "failed to read reg:%x\n", reg);
1095	return ~0;
1096}
1097
1098void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1099{
1100	signed long r, cnt = 0;
1101	unsigned long flags;
1102	uint32_t seq;
1103	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1104	struct amdgpu_ring *ring = &kiq->ring;
1105
1106	BUG_ON(!ring->funcs->emit_wreg);
1107
1108	if (amdgpu_device_skip_hw_access(adev))
1109		return;
1110
1111	if (adev->mes.ring[0].sched.ready) {
1112		amdgpu_mes_wreg(adev, reg, v);
1113		return;
1114	}
1115
1116	spin_lock_irqsave(&kiq->ring_lock, flags);
1117	r = amdgpu_ring_alloc(ring, 32);
1118	if (r)
1119		goto failed_unlock;
1120
1121	amdgpu_ring_emit_wreg(ring, reg, v);
1122	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1123	if (r)
1124		goto failed_undo;
1125
1126	amdgpu_ring_commit(ring);
1127	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1128
1129	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1130
1131	/* don't wait anymore for gpu reset case because this way may
1132	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1133	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1134	 * never return if we keep waiting in virt_kiq_rreg, which cause
1135	 * gpu_recover() hang there.
1136	 *
1137	 * also don't wait anymore for IRQ context
1138	 * */
1139	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1140		goto failed_kiq_write;
1141
1142	might_sleep();
1143	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1144
1145		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1146		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1147	}
1148
1149	if (cnt > MAX_KIQ_REG_TRY)
1150		goto failed_kiq_write;
1151
1152	return;
1153
1154failed_undo:
1155	amdgpu_ring_undo(ring);
1156failed_unlock:
1157	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1158failed_kiq_write:
1159	dev_err(adev->dev, "failed to write reg:%x\n", reg);
1160}
1161
1162int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1163{
1164	if (amdgpu_num_kcq == -1) {
1165		return 8;
1166	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1167		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1168		return 8;
1169	}
1170	return amdgpu_num_kcq;
1171}
1172
1173void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
1174				  uint32_t ucode_id)
1175{
1176	const struct gfx_firmware_header_v1_0 *cp_hdr;
1177	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1178	struct amdgpu_firmware_info *info = NULL;
1179	const struct firmware *ucode_fw;
1180	unsigned int fw_size;
1181
1182	switch (ucode_id) {
1183	case AMDGPU_UCODE_ID_CP_PFP:
1184		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1185			adev->gfx.pfp_fw->data;
1186		adev->gfx.pfp_fw_version =
1187			le32_to_cpu(cp_hdr->header.ucode_version);
1188		adev->gfx.pfp_feature_version =
1189			le32_to_cpu(cp_hdr->ucode_feature_version);
1190		ucode_fw = adev->gfx.pfp_fw;
1191		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1192		break;
1193	case AMDGPU_UCODE_ID_CP_RS64_PFP:
1194		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1195			adev->gfx.pfp_fw->data;
1196		adev->gfx.pfp_fw_version =
1197			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1198		adev->gfx.pfp_feature_version =
1199			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1200		ucode_fw = adev->gfx.pfp_fw;
1201		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1202		break;
1203	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1204	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1205		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1206			adev->gfx.pfp_fw->data;
1207		ucode_fw = adev->gfx.pfp_fw;
1208		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1209		break;
1210	case AMDGPU_UCODE_ID_CP_ME:
1211		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1212			adev->gfx.me_fw->data;
1213		adev->gfx.me_fw_version =
1214			le32_to_cpu(cp_hdr->header.ucode_version);
1215		adev->gfx.me_feature_version =
1216			le32_to_cpu(cp_hdr->ucode_feature_version);
1217		ucode_fw = adev->gfx.me_fw;
1218		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1219		break;
1220	case AMDGPU_UCODE_ID_CP_RS64_ME:
1221		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1222			adev->gfx.me_fw->data;
1223		adev->gfx.me_fw_version =
1224			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1225		adev->gfx.me_feature_version =
1226			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1227		ucode_fw = adev->gfx.me_fw;
1228		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1229		break;
1230	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1231	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1232		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1233			adev->gfx.me_fw->data;
1234		ucode_fw = adev->gfx.me_fw;
1235		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1236		break;
1237	case AMDGPU_UCODE_ID_CP_CE:
1238		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1239			adev->gfx.ce_fw->data;
1240		adev->gfx.ce_fw_version =
1241			le32_to_cpu(cp_hdr->header.ucode_version);
1242		adev->gfx.ce_feature_version =
1243			le32_to_cpu(cp_hdr->ucode_feature_version);
1244		ucode_fw = adev->gfx.ce_fw;
1245		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1246		break;
1247	case AMDGPU_UCODE_ID_CP_MEC1:
1248		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1249			adev->gfx.mec_fw->data;
1250		adev->gfx.mec_fw_version =
1251			le32_to_cpu(cp_hdr->header.ucode_version);
1252		adev->gfx.mec_feature_version =
1253			le32_to_cpu(cp_hdr->ucode_feature_version);
1254		ucode_fw = adev->gfx.mec_fw;
1255		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1256			  le32_to_cpu(cp_hdr->jt_size) * 4;
1257		break;
1258	case AMDGPU_UCODE_ID_CP_MEC1_JT:
1259		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1260			adev->gfx.mec_fw->data;
1261		ucode_fw = adev->gfx.mec_fw;
1262		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1263		break;
1264	case AMDGPU_UCODE_ID_CP_MEC2:
1265		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1266			adev->gfx.mec2_fw->data;
1267		adev->gfx.mec2_fw_version =
1268			le32_to_cpu(cp_hdr->header.ucode_version);
1269		adev->gfx.mec2_feature_version =
1270			le32_to_cpu(cp_hdr->ucode_feature_version);
1271		ucode_fw = adev->gfx.mec2_fw;
1272		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1273			  le32_to_cpu(cp_hdr->jt_size) * 4;
1274		break;
1275	case AMDGPU_UCODE_ID_CP_MEC2_JT:
1276		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1277			adev->gfx.mec2_fw->data;
1278		ucode_fw = adev->gfx.mec2_fw;
1279		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1280		break;
1281	case AMDGPU_UCODE_ID_CP_RS64_MEC:
1282		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1283			adev->gfx.mec_fw->data;
1284		adev->gfx.mec_fw_version =
1285			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1286		adev->gfx.mec_feature_version =
1287			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1288		ucode_fw = adev->gfx.mec_fw;
1289		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1290		break;
1291	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1292	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1293	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1294	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1295		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1296			adev->gfx.mec_fw->data;
1297		ucode_fw = adev->gfx.mec_fw;
1298		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1299		break;
1300	default:
1301		dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
1302		return;
1303	}
1304
1305	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1306		info = &adev->firmware.ucode[ucode_id];
1307		info->ucode_id = ucode_id;
1308		info->fw = ucode_fw;
1309		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1310	}
1311}
1312
1313bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
1314{
1315	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
1316			adev->gfx.num_xcc_per_xcp : 1));
1317}
1318
1319static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
1320						struct device_attribute *addr,
1321						char *buf)
1322{
1323	struct drm_device *ddev = dev_get_drvdata(dev);
1324	struct amdgpu_device *adev = drm_to_adev(ddev);
1325	int mode;
1326
1327	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1328					       AMDGPU_XCP_FL_NONE);
1329
1330	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
1331}
1332
1333static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
1334						struct device_attribute *addr,
1335						const char *buf, size_t count)
1336{
1337	struct drm_device *ddev = dev_get_drvdata(dev);
1338	struct amdgpu_device *adev = drm_to_adev(ddev);
1339	enum amdgpu_gfx_partition mode;
1340	int ret = 0, num_xcc;
1341
1342	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1343	if (num_xcc % 2 != 0)
1344		return -EINVAL;
1345
1346	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
1347		mode = AMDGPU_SPX_PARTITION_MODE;
1348	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1349		/*
1350		 * DPX mode needs AIDs to be in multiple of 2.
1351		 * Each AID connects 2 XCCs.
1352		 */
1353		if (num_xcc%4)
1354			return -EINVAL;
1355		mode = AMDGPU_DPX_PARTITION_MODE;
1356	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
1357		if (num_xcc != 6)
1358			return -EINVAL;
1359		mode = AMDGPU_TPX_PARTITION_MODE;
1360	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
1361		if (num_xcc != 8)
1362			return -EINVAL;
1363		mode = AMDGPU_QPX_PARTITION_MODE;
1364	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
1365		mode = AMDGPU_CPX_PARTITION_MODE;
1366	} else {
1367		return -EINVAL;
1368	}
1369
1370	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
1371
1372	if (ret)
1373		return ret;
1374
1375	return count;
1376}
1377
1378static const char *xcp_desc[] = {
1379	[AMDGPU_SPX_PARTITION_MODE] = "SPX",
1380	[AMDGPU_DPX_PARTITION_MODE] = "DPX",
1381	[AMDGPU_TPX_PARTITION_MODE] = "TPX",
1382	[AMDGPU_QPX_PARTITION_MODE] = "QPX",
1383	[AMDGPU_CPX_PARTITION_MODE] = "CPX",
1384};
1385
1386static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
1387						struct device_attribute *addr,
1388						char *buf)
1389{
1390	struct drm_device *ddev = dev_get_drvdata(dev);
1391	struct amdgpu_device *adev = drm_to_adev(ddev);
1392	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1393	int size = 0, mode;
1394	char *sep = "";
1395
1396	if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
1397		return sysfs_emit(buf, "Not supported\n");
1398
1399	for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
1400		size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
1401		sep = ", ";
 
 
 
 
 
 
 
 
 
 
 
 
1402	}
1403
1404	size += sysfs_emit_at(buf, size, "\n");
1405
1406	return size;
1407}
1408
1409static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1410{
1411	struct amdgpu_device *adev = ring->adev;
1412	struct drm_gpu_scheduler *sched = &ring->sched;
1413	struct drm_sched_entity entity;
1414	struct dma_fence *f;
1415	struct amdgpu_job *job;
1416	struct amdgpu_ib *ib;
1417	int i, r;
1418
1419	/* Initialize the scheduler entity */
1420	r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1421				  &sched, 1, NULL);
1422	if (r) {
1423		dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1424		goto err;
1425	}
1426
1427	r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
1428				     64, 0,
1429				     &job);
1430	if (r)
1431		goto err;
1432
1433	job->enforce_isolation = true;
1434
1435	ib = &job->ibs[0];
1436	for (i = 0; i <= ring->funcs->align_mask; ++i)
1437		ib->ptr[i] = ring->funcs->nop;
1438	ib->length_dw = ring->funcs->align_mask + 1;
1439
1440	f = amdgpu_job_submit(job);
1441
1442	r = dma_fence_wait(f, false);
1443	if (r)
1444		goto err;
1445
1446	dma_fence_put(f);
1447
1448	/* Clean up the scheduler entity */
1449	drm_sched_entity_destroy(&entity);
1450	return 0;
1451
1452err:
1453	return r;
1454}
1455
1456static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1457{
1458	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1459	struct amdgpu_ring *ring;
1460	int num_xcc_to_clear;
1461	int i, r, xcc_id;
1462
1463	if (adev->gfx.num_xcc_per_xcp)
1464		num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1465	else
1466		num_xcc_to_clear = 1;
1467
1468	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1469		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1470			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1471			if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1472				r = amdgpu_gfx_run_cleaner_shader_job(ring);
1473				if (r)
1474					return r;
1475				num_xcc_to_clear--;
1476				break;
1477			}
1478		}
1479	}
1480
1481	if (num_xcc_to_clear)
1482		return -ENOENT;
1483
1484	return 0;
1485}
1486
1487static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1488						 struct device_attribute *attr,
1489						 const char *buf,
1490						 size_t count)
1491{
1492	struct drm_device *ddev = dev_get_drvdata(dev);
1493	struct amdgpu_device *adev = drm_to_adev(ddev);
1494	int ret;
1495	long value;
1496
1497	if (amdgpu_in_reset(adev))
1498		return -EPERM;
1499	if (adev->in_suspend && !adev->in_runpm)
1500		return -EPERM;
1501
1502	ret = kstrtol(buf, 0, &value);
1503
1504	if (ret)
1505		return -EINVAL;
1506
1507	if (value < 0)
1508		return -EINVAL;
1509
1510	if (adev->xcp_mgr) {
1511		if (value >= adev->xcp_mgr->num_xcps)
1512			return -EINVAL;
1513	} else {
1514		if (value > 1)
1515			return -EINVAL;
1516	}
1517
1518	ret = pm_runtime_get_sync(ddev->dev);
1519	if (ret < 0) {
1520		pm_runtime_put_autosuspend(ddev->dev);
1521		return ret;
1522	}
1523
1524	ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1525
1526	pm_runtime_mark_last_busy(ddev->dev);
1527	pm_runtime_put_autosuspend(ddev->dev);
1528
1529	if (ret)
1530		return ret;
1531
1532	return count;
1533}
1534
1535static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1536						struct device_attribute *attr,
1537						char *buf)
1538{
1539	struct drm_device *ddev = dev_get_drvdata(dev);
1540	struct amdgpu_device *adev = drm_to_adev(ddev);
1541	int i;
1542	ssize_t size = 0;
1543
1544	if (adev->xcp_mgr) {
1545		for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1546			size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1547			if (i < (adev->xcp_mgr->num_xcps - 1))
1548				size += sysfs_emit_at(buf, size, " ");
1549		}
1550		buf[size++] = '\n';
1551	} else {
1552		size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1553	}
1554
1555	return size;
1556}
1557
1558static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1559						struct device_attribute *attr,
1560						const char *buf, size_t count)
1561{
1562	struct drm_device *ddev = dev_get_drvdata(dev);
1563	struct amdgpu_device *adev = drm_to_adev(ddev);
1564	long partition_values[MAX_XCP] = {0};
1565	int ret, i, num_partitions;
1566	const char *input_buf = buf;
1567
1568	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1569		ret = sscanf(input_buf, "%ld", &partition_values[i]);
1570		if (ret <= 0)
1571			break;
1572
1573		/* Move the pointer to the next value in the string */
1574		input_buf = strchr(input_buf, ' ');
1575		if (input_buf) {
1576			input_buf++;
1577		} else {
1578			i++;
1579			break;
1580		}
1581	}
1582	num_partitions = i;
1583
1584	if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1585		return -EINVAL;
1586
1587	if (!adev->xcp_mgr && num_partitions != 1)
1588		return -EINVAL;
1589
1590	for (i = 0; i < num_partitions; i++) {
1591		if (partition_values[i] != 0 && partition_values[i] != 1)
1592			return -EINVAL;
1593	}
1594
1595	mutex_lock(&adev->enforce_isolation_mutex);
1596	for (i = 0; i < num_partitions; i++) {
1597		if (adev->enforce_isolation[i] && !partition_values[i])
1598			/* Going from enabled to disabled */
1599			amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
1600		else if (!adev->enforce_isolation[i] && partition_values[i])
1601			/* Going from disabled to enabled */
1602			amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
1603		adev->enforce_isolation[i] = partition_values[i];
1604	}
1605	mutex_unlock(&adev->enforce_isolation_mutex);
1606
1607	amdgpu_mes_update_enforce_isolation(adev);
1608
1609	return count;
1610}
1611
1612static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
1613						struct device_attribute *attr,
1614						char *buf)
1615{
1616	struct drm_device *ddev = dev_get_drvdata(dev);
1617	struct amdgpu_device *adev = drm_to_adev(ddev);
1618
1619	if (!adev)
1620		return -ENODEV;
1621
1622	return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
1623}
1624
1625static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
1626						struct device_attribute *attr,
1627						char *buf)
1628{
1629	struct drm_device *ddev = dev_get_drvdata(dev);
1630	struct amdgpu_device *adev = drm_to_adev(ddev);
1631
1632	if (!adev)
1633		return -ENODEV;
1634
1635	return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
1636}
1637
1638static DEVICE_ATTR(run_cleaner_shader, 0200,
1639		   NULL, amdgpu_gfx_set_run_cleaner_shader);
1640
1641static DEVICE_ATTR(enforce_isolation, 0644,
1642		   amdgpu_gfx_get_enforce_isolation,
1643		   amdgpu_gfx_set_enforce_isolation);
1644
1645static DEVICE_ATTR(current_compute_partition, 0644,
1646		   amdgpu_gfx_get_current_compute_partition,
1647		   amdgpu_gfx_set_compute_partition);
1648
1649static DEVICE_ATTR(available_compute_partition, 0444,
1650		   amdgpu_gfx_get_available_compute_partition, NULL);
1651static DEVICE_ATTR(gfx_reset_mask, 0444,
1652		   amdgpu_gfx_get_gfx_reset_mask, NULL);
1653
1654static DEVICE_ATTR(compute_reset_mask, 0444,
1655		   amdgpu_gfx_get_compute_reset_mask, NULL);
1656
1657static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
1658{
1659	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1660	bool xcp_switch_supported;
1661	int r;
1662
1663	if (!xcp_mgr)
1664		return 0;
1665
1666	xcp_switch_supported =
1667		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1668
1669	if (!xcp_switch_supported)
1670		dev_attr_current_compute_partition.attr.mode &=
1671			~(S_IWUSR | S_IWGRP | S_IWOTH);
1672
1673	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
1674	if (r)
1675		return r;
1676
1677	if (xcp_switch_supported)
1678		r = device_create_file(adev->dev,
1679				       &dev_attr_available_compute_partition);
1680
1681	return r;
1682}
1683
1684static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
1685{
1686	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1687	bool xcp_switch_supported;
1688
1689	if (!xcp_mgr)
1690		return;
1691
1692	xcp_switch_supported =
1693		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1694	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1695
1696	if (xcp_switch_supported)
1697		device_remove_file(adev->dev,
1698				   &dev_attr_available_compute_partition);
1699}
1700
1701static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1702{
1703	int r;
1704
1705	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1706	if (r)
1707		return r;
1708	if (adev->gfx.enable_cleaner_shader)
1709		r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1710
1711	return r;
1712}
1713
1714static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1715{
1716	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1717	if (adev->gfx.enable_cleaner_shader)
1718		device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1719}
1720
1721static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
1722{
1723	int r = 0;
1724
1725	if (!amdgpu_gpu_recovery)
1726		return r;
1727
1728	if (adev->gfx.num_gfx_rings) {
1729		r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
1730		if (r)
1731			return r;
1732	}
1733
1734	if (adev->gfx.num_compute_rings) {
1735		r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
1736		if (r)
1737			return r;
1738	}
1739
1740	return r;
1741}
1742
1743static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
1744{
1745	if (!amdgpu_gpu_recovery)
1746		return;
1747
1748	if (adev->gfx.num_gfx_rings)
1749		device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
1750
1751	if (adev->gfx.num_compute_rings)
1752		device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
1753}
1754
1755int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1756{
1757	int r;
1758
1759	r = amdgpu_gfx_sysfs_xcp_init(adev);
1760	if (r) {
1761		dev_err(adev->dev, "failed to create xcp sysfs files");
1762		return r;
1763	}
1764
1765	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
1766	if (r)
1767		dev_err(adev->dev, "failed to create isolation sysfs files");
1768
1769	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
1770	if (r)
1771		dev_err(adev->dev, "failed to create reset mask sysfs files");
1772
1773	return r;
1774}
1775
1776void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1777{
1778	if (adev->dev->kobj.sd) {
1779		amdgpu_gfx_sysfs_xcp_fini(adev);
1780		amdgpu_gfx_sysfs_isolation_shader_fini(adev);
1781		amdgpu_gfx_sysfs_reset_mask_fini(adev);
1782	}
1783}
1784
1785int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1786				      unsigned int cleaner_shader_size)
1787{
1788	if (!adev->gfx.enable_cleaner_shader)
1789		return -EOPNOTSUPP;
1790
1791	return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1792				       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1793				       &adev->gfx.cleaner_shader_obj,
1794				       &adev->gfx.cleaner_shader_gpu_addr,
1795				       (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1796}
1797
1798void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1799{
1800	if (!adev->gfx.enable_cleaner_shader)
1801		return;
1802
1803	amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1804			      &adev->gfx.cleaner_shader_gpu_addr,
1805			      (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1806}
1807
1808void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1809				    unsigned int cleaner_shader_size,
1810				    const void *cleaner_shader_ptr)
1811{
1812	if (!adev->gfx.enable_cleaner_shader)
1813		return;
1814
1815	if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1816		memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1817			    cleaner_shader_size);
1818}
1819
1820/**
1821 * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1822 * @adev: amdgpu_device pointer
1823 * @idx: Index of the scheduler to control
1824 * @enable: Whether to enable or disable the KFD scheduler
1825 *
1826 * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1827 * from the KGD. It is part of the cleaner shader feature. This function plays
1828 * a key role in enforcing process isolation on the GPU.
1829 *
1830 * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1831 * track of the number of requests to enable the KFD scheduler. When a request
1832 * to enable the KFD scheduler is made, the reference count is decremented.
1833 * When the reference count reaches zero, a delayed work is scheduled to
1834 * enforce isolation after a delay of GFX_SLICE_PERIOD.
1835 *
1836 * When a request to disable the KFD scheduler is made, the function first
1837 * checks if the reference count is zero. If it is, it cancels the delayed work
1838 * for enforcing isolation and checks if the KFD scheduler is active. If the
1839 * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1840 * sets the KFD scheduler state to inactive. Then, it increments the reference
1841 * count.
1842 *
1843 * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1844 * scheduler state and reference count are updated atomically.
1845 *
1846 * Note: If the reference count is already zero when a request to enable the
1847 * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1848 * function triggers a warning in this case.
1849 */
1850static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1851				    bool enable)
1852{
1853	mutex_lock(&adev->gfx.kfd_sch_mutex);
1854
1855	if (enable) {
1856		/* If the count is already 0, it means there's an imbalance bug somewhere.
1857		 * Note that the bug may be in a different caller than the one which triggers the
1858		 * WARN_ON_ONCE.
1859		 */
1860		if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1861			dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1862			goto unlock;
1863		}
1864
1865		adev->gfx.kfd_sch_req_count[idx]--;
1866
1867		if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1868		    adev->gfx.kfd_sch_inactive[idx]) {
1869			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1870					      msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
1871		}
1872	} else {
1873		if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1874			cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1875			if (!adev->gfx.kfd_sch_inactive[idx]) {
1876				amdgpu_amdkfd_stop_sched(adev, idx);
1877				adev->gfx.kfd_sch_inactive[idx] = true;
1878			}
1879		}
1880
1881		adev->gfx.kfd_sch_req_count[idx]++;
1882	}
1883
1884unlock:
1885	mutex_unlock(&adev->gfx.kfd_sch_mutex);
1886}
1887
1888/**
1889 * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1890 *
1891 * @work: work_struct.
1892 *
1893 * This function is the work handler for enforcing shader isolation on AMD GPUs.
1894 * It counts the number of emitted fences for each GFX and compute ring. If there
1895 * are any fences, it schedules the `enforce_isolation_work` to be run after a
1896 * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1897 * Driver (KFD) to resume the runqueue. The function is synchronized using the
1898 * `enforce_isolation_mutex`.
1899 */
1900void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1901{
1902	struct amdgpu_isolation_work *isolation_work =
1903		container_of(work, struct amdgpu_isolation_work, work.work);
1904	struct amdgpu_device *adev = isolation_work->adev;
1905	u32 i, idx, fences = 0;
1906
1907	if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1908		idx = 0;
1909	else
1910		idx = isolation_work->xcp_id;
1911
1912	if (idx >= MAX_XCP)
1913		return;
1914
1915	mutex_lock(&adev->enforce_isolation_mutex);
1916	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1917		if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1918			fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1919	}
1920	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1921		if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1922			fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1923	}
1924	if (fences) {
1925		/* we've already had our timeslice, so let's wrap this up */
1926		schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1927				      msecs_to_jiffies(1));
1928	} else {
1929		/* Tell KFD to resume the runqueue */
1930		if (adev->kfd.init_complete) {
1931			WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
1932			WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
1933				amdgpu_amdkfd_start_sched(adev, idx);
1934				adev->gfx.kfd_sch_inactive[idx] = false;
1935		}
1936	}
1937	mutex_unlock(&adev->enforce_isolation_mutex);
1938}
1939
1940static void
1941amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
1942					  u32 idx)
1943{
1944	unsigned long cjiffies;
1945	bool wait = false;
1946
1947	mutex_lock(&adev->enforce_isolation_mutex);
1948	if (adev->enforce_isolation[idx]) {
1949		/* set the initial values if nothing is set */
1950		if (!adev->gfx.enforce_isolation_jiffies[idx]) {
1951			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
1952			adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
1953		}
1954		/* Make sure KFD gets a chance to run */
1955		if (amdgpu_amdkfd_compute_active(adev, idx)) {
1956			cjiffies = jiffies;
1957			if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
1958				cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
1959				if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
1960					/* if our time is up, let KGD work drain before scheduling more */
1961					wait = true;
1962					/* reset the timer period */
1963					adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
1964				} else {
1965					/* set the timer period to what's left in our time slice */
1966					adev->gfx.enforce_isolation_time[idx] =
1967						GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
1968				}
1969			} else {
1970				/* if jiffies wrap around we will just wait a little longer */
1971				adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
1972			}
1973		} else {
1974			/* if there is no KFD work, then set the full slice period */
1975			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
1976			adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
1977		}
1978	}
1979	mutex_unlock(&adev->enforce_isolation_mutex);
1980
1981	if (wait)
1982		msleep(GFX_SLICE_PERIOD_MS);
1983}
1984
1985void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
1986{
1987	struct amdgpu_device *adev = ring->adev;
1988	u32 idx;
1989	bool sched_work = false;
1990
1991	if (!adev->gfx.enable_cleaner_shader)
1992		return;
1993
1994	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1995		idx = 0;
1996	else
1997		idx = ring->xcp_id;
1998
1999	if (idx >= MAX_XCP)
2000		return;
2001
2002	/* Don't submit more work until KFD has had some time */
2003	amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
2004
2005	mutex_lock(&adev->enforce_isolation_mutex);
2006	if (adev->enforce_isolation[idx]) {
2007		if (adev->kfd.init_complete)
2008			sched_work = true;
2009	}
2010	mutex_unlock(&adev->enforce_isolation_mutex);
2011
2012	if (sched_work)
2013		amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
2014}
2015
2016void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
2017{
2018	struct amdgpu_device *adev = ring->adev;
2019	u32 idx;
2020	bool sched_work = false;
2021
2022	if (!adev->gfx.enable_cleaner_shader)
2023		return;
2024
2025	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
2026		idx = 0;
2027	else
2028		idx = ring->xcp_id;
2029
2030	if (idx >= MAX_XCP)
2031		return;
2032
2033	mutex_lock(&adev->enforce_isolation_mutex);
2034	if (adev->enforce_isolation[idx]) {
2035		if (adev->kfd.init_complete)
2036			sched_work = true;
2037	}
2038	mutex_unlock(&adev->enforce_isolation_mutex);
2039
2040	if (sched_work)
2041		amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
2042}
2043
2044/*
2045 * debugfs for to enable/disable gfx job submission to specific core.
2046 */
2047#if defined(CONFIG_DEBUG_FS)
2048static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
2049{
2050	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2051	u32 i;
2052	u64 mask = 0;
2053	struct amdgpu_ring *ring;
2054
2055	if (!adev)
2056		return -ENODEV;
2057
2058	mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
2059	if ((val & mask) == 0)
2060		return -EINVAL;
2061
2062	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2063		ring = &adev->gfx.gfx_ring[i];
2064		if (val & (1 << i))
2065			ring->sched.ready = true;
2066		else
2067			ring->sched.ready = false;
2068	}
2069	/* publish sched.ready flag update effective immediately across smp */
2070	smp_rmb();
2071	return 0;
2072}
2073
2074static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
2075{
2076	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2077	u32 i;
2078	u64 mask = 0;
2079	struct amdgpu_ring *ring;
2080
2081	if (!adev)
2082		return -ENODEV;
2083	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2084		ring = &adev->gfx.gfx_ring[i];
2085		if (ring->sched.ready)
2086			mask |= 1ULL << i;
2087	}
2088
2089	*val = mask;
2090	return 0;
2091}
2092
2093DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
2094			 amdgpu_debugfs_gfx_sched_mask_get,
2095			 amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
2096
2097#endif
2098
2099void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
2100{
2101#if defined(CONFIG_DEBUG_FS)
2102	struct drm_minor *minor = adev_to_drm(adev)->primary;
2103	struct dentry *root = minor->debugfs_root;
2104	char name[32];
2105
2106	if (!(adev->gfx.num_gfx_rings > 1))
2107		return;
2108	sprintf(name, "amdgpu_gfx_sched_mask");
2109	debugfs_create_file(name, 0600, root, adev,
2110			    &amdgpu_debugfs_gfx_sched_mask_fops);
2111#endif
2112}
2113
2114/*
2115 * debugfs for to enable/disable compute job submission to specific core.
2116 */
2117#if defined(CONFIG_DEBUG_FS)
2118static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
2119{
2120	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2121	u32 i;
2122	u64 mask = 0;
2123	struct amdgpu_ring *ring;
2124
2125	if (!adev)
2126		return -ENODEV;
2127
2128	mask = (1ULL << adev->gfx.num_compute_rings) - 1;
2129	if ((val & mask) == 0)
2130		return -EINVAL;
2131
2132	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2133		ring = &adev->gfx.compute_ring[i];
2134		if (val & (1 << i))
2135			ring->sched.ready = true;
2136		else
2137			ring->sched.ready = false;
2138	}
2139
2140	/* publish sched.ready flag update effective immediately across smp */
2141	smp_rmb();
2142	return 0;
2143}
2144
2145static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
2146{
2147	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2148	u32 i;
2149	u64 mask = 0;
2150	struct amdgpu_ring *ring;
2151
2152	if (!adev)
2153		return -ENODEV;
2154	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2155		ring = &adev->gfx.compute_ring[i];
2156		if (ring->sched.ready)
2157			mask |= 1ULL << i;
2158	}
2159
2160	*val = mask;
2161	return 0;
2162}
2163
2164DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
2165			 amdgpu_debugfs_compute_sched_mask_get,
2166			 amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
2167
2168#endif
2169
2170void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
2171{
2172#if defined(CONFIG_DEBUG_FS)
2173	struct drm_minor *minor = adev_to_drm(adev)->primary;
2174	struct dentry *root = minor->debugfs_root;
2175	char name[32];
2176
2177	if (!(adev->gfx.num_compute_rings > 1))
2178		return;
2179	sprintf(name, "amdgpu_compute_sched_mask");
2180	debugfs_create_file(name, 0600, root, adev,
2181			    &amdgpu_debugfs_compute_sched_mask_fops);
2182#endif
2183}