Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2008 Advanced Micro Devices, Inc.
   3 * Copyright 2008 Red Hat Inc.
   4 * Copyright 2009 Jerome Glisse.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22 * OTHER DEALINGS IN THE SOFTWARE.
  23 *
  24 * Authors: Dave Airlie
  25 *          Alex Deucher
  26 *          Jerome Glisse
  27 */
  28#include <linux/power_supply.h>
  29#include <linux/kthread.h>
  30#include <linux/module.h>
  31#include <linux/console.h>
  32#include <linux/slab.h>
  33
  34#include <drm/drm_atomic_helper.h>
  35#include <drm/drm_probe_helper.h>
  36#include <drm/amdgpu_drm.h>
  37#include <linux/vgaarb.h>
  38#include <linux/vga_switcheroo.h>
  39#include <linux/efi.h>
  40#include "amdgpu.h"
  41#include "amdgpu_trace.h"
  42#include "amdgpu_i2c.h"
  43#include "atom.h"
  44#include "amdgpu_atombios.h"
  45#include "amdgpu_atomfirmware.h"
  46#include "amd_pcie.h"
  47#ifdef CONFIG_DRM_AMDGPU_SI
  48#include "si.h"
  49#endif
  50#ifdef CONFIG_DRM_AMDGPU_CIK
  51#include "cik.h"
  52#endif
  53#include "vi.h"
  54#include "soc15.h"
  55#include "nv.h"
  56#include "bif/bif_4_1_d.h"
  57#include <linux/pci.h>
  58#include <linux/firmware.h>
  59#include "amdgpu_vf_error.h"
  60
  61#include "amdgpu_amdkfd.h"
  62#include "amdgpu_pm.h"
  63
  64#include "amdgpu_xgmi.h"
  65#include "amdgpu_ras.h"
  66#include "amdgpu_pmu.h"
  67
  68MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  69MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  70MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  71MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  72MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  73MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  74MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
  75MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
  76MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
  77MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  78
  79#define AMDGPU_RESUME_MS		2000
  80
  81static const char *amdgpu_asic_name[] = {
  82	"TAHITI",
  83	"PITCAIRN",
  84	"VERDE",
  85	"OLAND",
  86	"HAINAN",
  87	"BONAIRE",
  88	"KAVERI",
  89	"KABINI",
  90	"HAWAII",
  91	"MULLINS",
  92	"TOPAZ",
  93	"TONGA",
  94	"FIJI",
  95	"CARRIZO",
  96	"STONEY",
  97	"POLARIS10",
  98	"POLARIS11",
  99	"POLARIS12",
 100	"VEGAM",
 101	"VEGA10",
 102	"VEGA12",
 103	"VEGA20",
 104	"RAVEN",
 105	"ARCTURUS",
 106	"RENOIR",
 107	"NAVI10",
 108	"NAVI14",
 109	"NAVI12",
 110	"LAST",
 111};
 112
 113/**
 114 * DOC: pcie_replay_count
 115 *
 116 * The amdgpu driver provides a sysfs API for reporting the total number
 117 * of PCIe replays (NAKs)
 118 * The file pcie_replay_count is used for this and returns the total
 119 * number of replays as a sum of the NAKs generated and NAKs received
 120 */
 121
 122static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 123		struct device_attribute *attr, char *buf)
 124{
 125	struct drm_device *ddev = dev_get_drvdata(dev);
 126	struct amdgpu_device *adev = ddev->dev_private;
 127	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 128
 129	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
 130}
 131
 132static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
 133		amdgpu_device_get_pcie_replay_count, NULL);
 134
 135static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 136
 137/**
 138 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
 139 *
 140 * @dev: drm_device pointer
 141 *
 142 * Returns true if the device is a dGPU with HG/PX power control,
 143 * otherwise return false.
 144 */
 145bool amdgpu_device_is_px(struct drm_device *dev)
 146{
 147	struct amdgpu_device *adev = dev->dev_private;
 148
 149	if (adev->flags & AMD_IS_PX)
 150		return true;
 151	return false;
 152}
 153
 154/*
 155 * MMIO register access helper functions.
 156 */
 157/**
 158 * amdgpu_mm_rreg - read a memory mapped IO register
 159 *
 160 * @adev: amdgpu_device pointer
 161 * @reg: dword aligned register offset
 162 * @acc_flags: access flags which require special behavior
 163 *
 164 * Returns the 32 bit value from the offset specified.
 165 */
 166uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 167			uint32_t acc_flags)
 168{
 169	uint32_t ret;
 170
 171	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 172		return amdgpu_virt_kiq_rreg(adev, reg);
 173
 174	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 175		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 176	else {
 177		unsigned long flags;
 178
 179		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 180		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 181		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 182		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 183	}
 184	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
 185	return ret;
 186}
 187
 188/*
 189 * MMIO register read with bytes helper functions
 190 * @offset:bytes offset from MMIO start
 191 *
 192*/
 193
 194/**
 195 * amdgpu_mm_rreg8 - read a memory mapped IO register
 196 *
 197 * @adev: amdgpu_device pointer
 198 * @offset: byte aligned register offset
 199 *
 200 * Returns the 8 bit value from the offset specified.
 201 */
 202uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
 203	if (offset < adev->rmmio_size)
 204		return (readb(adev->rmmio + offset));
 205	BUG();
 206}
 207
 208/*
 209 * MMIO register write with bytes helper functions
 210 * @offset:bytes offset from MMIO start
 211 * @value: the value want to be written to the register
 212 *
 213*/
 214/**
 215 * amdgpu_mm_wreg8 - read a memory mapped IO register
 216 *
 217 * @adev: amdgpu_device pointer
 218 * @offset: byte aligned register offset
 219 * @value: 8 bit value to write
 220 *
 221 * Writes the value specified to the offset specified.
 222 */
 223void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
 224	if (offset < adev->rmmio_size)
 225		writeb(value, adev->rmmio + offset);
 226	else
 227		BUG();
 228}
 229
 230/**
 231 * amdgpu_mm_wreg - write to a memory mapped IO register
 232 *
 233 * @adev: amdgpu_device pointer
 234 * @reg: dword aligned register offset
 235 * @v: 32 bit value to write to the register
 236 * @acc_flags: access flags which require special behavior
 237 *
 238 * Writes the value specified to the offset specified.
 239 */
 240void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 241		    uint32_t acc_flags)
 242{
 243	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
 244
 245	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
 246		adev->last_mm_index = v;
 247	}
 248
 249	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
 250		return amdgpu_virt_kiq_wreg(adev, reg, v);
 251
 252	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
 253		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 254	else {
 255		unsigned long flags;
 256
 257		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 258		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 259		writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 260		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 261	}
 262
 263	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
 264		udelay(500);
 265	}
 266}
 267
 268/**
 269 * amdgpu_io_rreg - read an IO register
 270 *
 271 * @adev: amdgpu_device pointer
 272 * @reg: dword aligned register offset
 273 *
 274 * Returns the 32 bit value from the offset specified.
 275 */
 276u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 277{
 278	if ((reg * 4) < adev->rio_mem_size)
 279		return ioread32(adev->rio_mem + (reg * 4));
 280	else {
 281		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 282		return ioread32(adev->rio_mem + (mmMM_DATA * 4));
 283	}
 284}
 285
 286/**
 287 * amdgpu_io_wreg - write to an IO register
 288 *
 289 * @adev: amdgpu_device pointer
 290 * @reg: dword aligned register offset
 291 * @v: 32 bit value to write to the register
 292 *
 293 * Writes the value specified to the offset specified.
 294 */
 295void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 296{
 297	if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
 298		adev->last_mm_index = v;
 299	}
 300
 301	if ((reg * 4) < adev->rio_mem_size)
 302		iowrite32(v, adev->rio_mem + (reg * 4));
 303	else {
 304		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
 305		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
 306	}
 307
 308	if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
 309		udelay(500);
 310	}
 311}
 312
 313/**
 314 * amdgpu_mm_rdoorbell - read a doorbell dword
 315 *
 316 * @adev: amdgpu_device pointer
 317 * @index: doorbell index
 318 *
 319 * Returns the value in the doorbell aperture at the
 320 * requested doorbell index (CIK).
 321 */
 322u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 323{
 324	if (index < adev->doorbell.num_doorbells) {
 325		return readl(adev->doorbell.ptr + index);
 326	} else {
 327		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 328		return 0;
 329	}
 330}
 331
 332/**
 333 * amdgpu_mm_wdoorbell - write a doorbell dword
 334 *
 335 * @adev: amdgpu_device pointer
 336 * @index: doorbell index
 337 * @v: value to write
 338 *
 339 * Writes @v to the doorbell aperture at the
 340 * requested doorbell index (CIK).
 341 */
 342void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 343{
 344	if (index < adev->doorbell.num_doorbells) {
 345		writel(v, adev->doorbell.ptr + index);
 346	} else {
 347		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 348	}
 349}
 350
 351/**
 352 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
 353 *
 354 * @adev: amdgpu_device pointer
 355 * @index: doorbell index
 356 *
 357 * Returns the value in the doorbell aperture at the
 358 * requested doorbell index (VEGA10+).
 359 */
 360u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 361{
 362	if (index < adev->doorbell.num_doorbells) {
 363		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
 364	} else {
 365		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
 366		return 0;
 367	}
 368}
 369
 370/**
 371 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
 372 *
 373 * @adev: amdgpu_device pointer
 374 * @index: doorbell index
 375 * @v: value to write
 376 *
 377 * Writes @v to the doorbell aperture at the
 378 * requested doorbell index (VEGA10+).
 379 */
 380void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 381{
 382	if (index < adev->doorbell.num_doorbells) {
 383		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
 384	} else {
 385		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
 386	}
 387}
 388
 389/**
 390 * amdgpu_invalid_rreg - dummy reg read function
 391 *
 392 * @adev: amdgpu device pointer
 393 * @reg: offset of register
 394 *
 395 * Dummy register read function.  Used for register blocks
 396 * that certain asics don't have (all asics).
 397 * Returns the value in the register.
 398 */
 399static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 400{
 401	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
 402	BUG();
 403	return 0;
 404}
 405
 406/**
 407 * amdgpu_invalid_wreg - dummy reg write function
 408 *
 409 * @adev: amdgpu device pointer
 410 * @reg: offset of register
 411 * @v: value to write to the register
 412 *
 413 * Dummy register read function.  Used for register blocks
 414 * that certain asics don't have (all asics).
 415 */
 416static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 417{
 418	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
 419		  reg, v);
 420	BUG();
 421}
 422
 423/**
 424 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
 425 *
 426 * @adev: amdgpu device pointer
 427 * @reg: offset of register
 428 *
 429 * Dummy register read function.  Used for register blocks
 430 * that certain asics don't have (all asics).
 431 * Returns the value in the register.
 432 */
 433static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 434{
 435	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
 436	BUG();
 437	return 0;
 438}
 439
 440/**
 441 * amdgpu_invalid_wreg64 - dummy reg write function
 442 *
 443 * @adev: amdgpu device pointer
 444 * @reg: offset of register
 445 * @v: value to write to the register
 446 *
 447 * Dummy register read function.  Used for register blocks
 448 * that certain asics don't have (all asics).
 449 */
 450static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
 451{
 452	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
 453		  reg, v);
 454	BUG();
 455}
 456
 457/**
 458 * amdgpu_block_invalid_rreg - dummy reg read function
 459 *
 460 * @adev: amdgpu device pointer
 461 * @block: offset of instance
 462 * @reg: offset of register
 463 *
 464 * Dummy register read function.  Used for register blocks
 465 * that certain asics don't have (all asics).
 466 * Returns the value in the register.
 467 */
 468static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 469					  uint32_t block, uint32_t reg)
 470{
 471	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
 472		  reg, block);
 473	BUG();
 474	return 0;
 475}
 476
 477/**
 478 * amdgpu_block_invalid_wreg - dummy reg write function
 479 *
 480 * @adev: amdgpu device pointer
 481 * @block: offset of instance
 482 * @reg: offset of register
 483 * @v: value to write to the register
 484 *
 485 * Dummy register read function.  Used for register blocks
 486 * that certain asics don't have (all asics).
 487 */
 488static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 489				      uint32_t block,
 490				      uint32_t reg, uint32_t v)
 491{
 492	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
 493		  reg, block, v);
 494	BUG();
 495}
 496
 497/**
 498 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
 499 *
 500 * @adev: amdgpu device pointer
 501 *
 502 * Allocates a scratch page of VRAM for use by various things in the
 503 * driver.
 504 */
 505static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
 506{
 507	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
 508				       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 509				       &adev->vram_scratch.robj,
 510				       &adev->vram_scratch.gpu_addr,
 511				       (void **)&adev->vram_scratch.ptr);
 512}
 513
 514/**
 515 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
 516 *
 517 * @adev: amdgpu device pointer
 518 *
 519 * Frees the VRAM scratch page.
 520 */
 521static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
 522{
 523	amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
 524}
 525
 526/**
 527 * amdgpu_device_program_register_sequence - program an array of registers.
 528 *
 529 * @adev: amdgpu_device pointer
 530 * @registers: pointer to the register array
 531 * @array_size: size of the register array
 532 *
 533 * Programs an array or registers with and and or masks.
 534 * This is a helper for setting golden registers.
 535 */
 536void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 537					     const u32 *registers,
 538					     const u32 array_size)
 539{
 540	u32 tmp, reg, and_mask, or_mask;
 541	int i;
 542
 543	if (array_size % 3)
 544		return;
 545
 546	for (i = 0; i < array_size; i +=3) {
 547		reg = registers[i + 0];
 548		and_mask = registers[i + 1];
 549		or_mask = registers[i + 2];
 550
 551		if (and_mask == 0xffffffff) {
 552			tmp = or_mask;
 553		} else {
 554			tmp = RREG32(reg);
 555			tmp &= ~and_mask;
 556			if (adev->family >= AMDGPU_FAMILY_AI)
 557				tmp |= (or_mask & and_mask);
 558			else
 559				tmp |= or_mask;
 560		}
 561		WREG32(reg, tmp);
 562	}
 563}
 564
 565/**
 566 * amdgpu_device_pci_config_reset - reset the GPU
 567 *
 568 * @adev: amdgpu_device pointer
 569 *
 570 * Resets the GPU using the pci config reset sequence.
 571 * Only applicable to asics prior to vega10.
 572 */
 573void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
 574{
 575	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
 576}
 577
 578/*
 579 * GPU doorbell aperture helpers function.
 580 */
 581/**
 582 * amdgpu_device_doorbell_init - Init doorbell driver information.
 583 *
 584 * @adev: amdgpu_device pointer
 585 *
 586 * Init doorbell driver information (CIK)
 587 * Returns 0 on success, error on failure.
 588 */
 589static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 590{
 591
 592	/* No doorbell on SI hardware generation */
 593	if (adev->asic_type < CHIP_BONAIRE) {
 594		adev->doorbell.base = 0;
 595		adev->doorbell.size = 0;
 596		adev->doorbell.num_doorbells = 0;
 597		adev->doorbell.ptr = NULL;
 598		return 0;
 599	}
 600
 601	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
 602		return -EINVAL;
 603
 604	amdgpu_asic_init_doorbell_index(adev);
 605
 606	/* doorbell bar mapping */
 607	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
 608	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
 609
 610	adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
 611					     adev->doorbell_index.max_assignment+1);
 612	if (adev->doorbell.num_doorbells == 0)
 613		return -EINVAL;
 614
 615	/* For Vega, reserve and map two pages on doorbell BAR since SDMA
 616	 * paging queue doorbell use the second page. The
 617	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
 618	 * doorbells are in the first page. So with paging queue enabled,
 619	 * the max num_doorbells should + 1 page (0x400 in dword)
 620	 */
 621	if (adev->asic_type >= CHIP_VEGA10)
 622		adev->doorbell.num_doorbells += 0x400;
 623
 624	adev->doorbell.ptr = ioremap(adev->doorbell.base,
 625				     adev->doorbell.num_doorbells *
 626				     sizeof(u32));
 627	if (adev->doorbell.ptr == NULL)
 628		return -ENOMEM;
 629
 630	return 0;
 631}
 632
 633/**
 634 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
 635 *
 636 * @adev: amdgpu_device pointer
 637 *
 638 * Tear down doorbell driver information (CIK)
 639 */
 640static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
 641{
 642	iounmap(adev->doorbell.ptr);
 643	adev->doorbell.ptr = NULL;
 644}
 645
 646
 647
 648/*
 649 * amdgpu_device_wb_*()
 650 * Writeback is the method by which the GPU updates special pages in memory
 651 * with the status of certain GPU events (fences, ring pointers,etc.).
 652 */
 653
 654/**
 655 * amdgpu_device_wb_fini - Disable Writeback and free memory
 656 *
 657 * @adev: amdgpu_device pointer
 658 *
 659 * Disables Writeback and frees the Writeback memory (all asics).
 660 * Used at driver shutdown.
 661 */
 662static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
 663{
 664	if (adev->wb.wb_obj) {
 665		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
 666				      &adev->wb.gpu_addr,
 667				      (void **)&adev->wb.wb);
 668		adev->wb.wb_obj = NULL;
 669	}
 670}
 671
 672/**
 673 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
 674 *
 675 * @adev: amdgpu_device pointer
 676 *
 677 * Initializes writeback and allocates writeback memory (all asics).
 678 * Used at driver startup.
 679 * Returns 0 on success or an -error on failure.
 680 */
 681static int amdgpu_device_wb_init(struct amdgpu_device *adev)
 682{
 683	int r;
 684
 685	if (adev->wb.wb_obj == NULL) {
 686		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
 687		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
 688					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 689					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
 690					    (void **)&adev->wb.wb);
 691		if (r) {
 692			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
 693			return r;
 694		}
 695
 696		adev->wb.num_wb = AMDGPU_MAX_WB;
 697		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 698
 699		/* clear wb memory */
 700		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
 701	}
 702
 703	return 0;
 704}
 705
 706/**
 707 * amdgpu_device_wb_get - Allocate a wb entry
 708 *
 709 * @adev: amdgpu_device pointer
 710 * @wb: wb index
 711 *
 712 * Allocate a wb slot for use by the driver (all asics).
 713 * Returns 0 on success or -EINVAL on failure.
 714 */
 715int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
 716{
 717	unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
 718
 719	if (offset < adev->wb.num_wb) {
 720		__set_bit(offset, adev->wb.used);
 721		*wb = offset << 3; /* convert to dw offset */
 722		return 0;
 723	} else {
 724		return -EINVAL;
 725	}
 726}
 727
 728/**
 729 * amdgpu_device_wb_free - Free a wb entry
 730 *
 731 * @adev: amdgpu_device pointer
 732 * @wb: wb index
 733 *
 734 * Free a wb slot allocated for use by the driver (all asics)
 735 */
 736void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 737{
 738	wb >>= 3;
 739	if (wb < adev->wb.num_wb)
 740		__clear_bit(wb, adev->wb.used);
 741}
 742
 743/**
 744 * amdgpu_device_resize_fb_bar - try to resize FB BAR
 745 *
 746 * @adev: amdgpu_device pointer
 747 *
 748 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
 749 * to fail, but if any of the BARs is not accessible after the size we abort
 750 * driver loading by returning -ENODEV.
 751 */
 752int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 753{
 754	u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
 755	u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
 756	struct pci_bus *root;
 757	struct resource *res;
 758	unsigned i;
 759	u16 cmd;
 760	int r;
 761
 762	/* Bypass for VF */
 763	if (amdgpu_sriov_vf(adev))
 764		return 0;
 765
 766	/* Check if the root BUS has 64bit memory resources */
 767	root = adev->pdev->bus;
 768	while (root->parent)
 769		root = root->parent;
 770
 771	pci_bus_for_each_resource(root, res, i) {
 772		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
 773		    res->start > 0x100000000ull)
 774			break;
 775	}
 776
 777	/* Trying to resize is pointless without a root hub window above 4GB */
 778	if (!res)
 779		return 0;
 780
 781	/* Disable memory decoding while we change the BAR addresses and size */
 782	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
 783	pci_write_config_word(adev->pdev, PCI_COMMAND,
 784			      cmd & ~PCI_COMMAND_MEMORY);
 785
 786	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
 787	amdgpu_device_doorbell_fini(adev);
 788	if (adev->asic_type >= CHIP_BONAIRE)
 789		pci_release_resource(adev->pdev, 2);
 790
 791	pci_release_resource(adev->pdev, 0);
 792
 793	r = pci_resize_resource(adev->pdev, 0, rbar_size);
 794	if (r == -ENOSPC)
 795		DRM_INFO("Not enough PCI address space for a large BAR.");
 796	else if (r && r != -ENOTSUPP)
 797		DRM_ERROR("Problem resizing BAR0 (%d).", r);
 798
 799	pci_assign_unassigned_bus_resources(adev->pdev->bus);
 800
 801	/* When the doorbell or fb BAR isn't available we have no chance of
 802	 * using the device.
 803	 */
 804	r = amdgpu_device_doorbell_init(adev);
 805	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
 806		return -ENODEV;
 807
 808	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
 809
 810	return 0;
 811}
 812
 813/*
 814 * GPU helpers function.
 815 */
 816/**
 817 * amdgpu_device_need_post - check if the hw need post or not
 818 *
 819 * @adev: amdgpu_device pointer
 820 *
 821 * Check if the asic has been initialized (all asics) at driver startup
 822 * or post is needed if  hw reset is performed.
 823 * Returns true if need or false if not.
 824 */
 825bool amdgpu_device_need_post(struct amdgpu_device *adev)
 826{
 827	uint32_t reg;
 828
 829	if (amdgpu_sriov_vf(adev))
 830		return false;
 831
 832	if (amdgpu_passthrough(adev)) {
 833		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
 834		 * some old smc fw still need driver do vPost otherwise gpu hang, while
 835		 * those smc fw version above 22.15 doesn't have this flaw, so we force
 836		 * vpost executed for smc version below 22.15
 837		 */
 838		if (adev->asic_type == CHIP_FIJI) {
 839			int err;
 840			uint32_t fw_ver;
 841			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
 842			/* force vPost if error occured */
 843			if (err)
 844				return true;
 845
 846			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
 847			if (fw_ver < 0x00160e00)
 848				return true;
 849		}
 850	}
 851
 852	if (adev->has_hw_reset) {
 853		adev->has_hw_reset = false;
 854		return true;
 855	}
 856
 857	/* bios scratch used on CIK+ */
 858	if (adev->asic_type >= CHIP_BONAIRE)
 859		return amdgpu_atombios_scratch_need_asic_init(adev);
 860
 861	/* check MEM_SIZE for older asics */
 862	reg = amdgpu_asic_get_config_memsize(adev);
 863
 864	if ((reg != 0) && (reg != 0xffffffff))
 865		return false;
 866
 867	return true;
 868}
 869
 870/* if we get transitioned to only one device, take VGA back */
 871/**
 872 * amdgpu_device_vga_set_decode - enable/disable vga decode
 873 *
 874 * @cookie: amdgpu_device pointer
 875 * @state: enable/disable vga decode
 876 *
 877 * Enable/disable vga decode (all asics).
 878 * Returns VGA resource flags.
 879 */
 880static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
 881{
 882	struct amdgpu_device *adev = cookie;
 883	amdgpu_asic_set_vga_state(adev, state);
 884	if (state)
 885		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
 886		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 887	else
 888		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 889}
 890
 891/**
 892 * amdgpu_device_check_block_size - validate the vm block size
 893 *
 894 * @adev: amdgpu_device pointer
 895 *
 896 * Validates the vm block size specified via module parameter.
 897 * The vm block size defines number of bits in page table versus page directory,
 898 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
 899 * page table and the remaining bits are in the page directory.
 900 */
 901static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
 902{
 903	/* defines number of bits in page table versus page directory,
 904	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
 905	 * page table and the remaining bits are in the page directory */
 906	if (amdgpu_vm_block_size == -1)
 907		return;
 908
 909	if (amdgpu_vm_block_size < 9) {
 910		dev_warn(adev->dev, "VM page table size (%d) too small\n",
 911			 amdgpu_vm_block_size);
 912		amdgpu_vm_block_size = -1;
 913	}
 914}
 915
 916/**
 917 * amdgpu_device_check_vm_size - validate the vm size
 918 *
 919 * @adev: amdgpu_device pointer
 920 *
 921 * Validates the vm size in GB specified via module parameter.
 922 * The VM size is the size of the GPU virtual memory space in GB.
 923 */
 924static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
 925{
 926	/* no need to check the default value */
 927	if (amdgpu_vm_size == -1)
 928		return;
 929
 930	if (amdgpu_vm_size < 1) {
 931		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
 932			 amdgpu_vm_size);
 933		amdgpu_vm_size = -1;
 934	}
 935}
 936
 937static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
 938{
 939	struct sysinfo si;
 940	bool is_os_64 = (sizeof(void *) == 8) ? true : false;
 941	uint64_t total_memory;
 942	uint64_t dram_size_seven_GB = 0x1B8000000;
 943	uint64_t dram_size_three_GB = 0xB8000000;
 944
 945	if (amdgpu_smu_memory_pool_size == 0)
 946		return;
 947
 948	if (!is_os_64) {
 949		DRM_WARN("Not 64-bit OS, feature not supported\n");
 950		goto def_value;
 951	}
 952	si_meminfo(&si);
 953	total_memory = (uint64_t)si.totalram * si.mem_unit;
 954
 955	if ((amdgpu_smu_memory_pool_size == 1) ||
 956		(amdgpu_smu_memory_pool_size == 2)) {
 957		if (total_memory < dram_size_three_GB)
 958			goto def_value1;
 959	} else if ((amdgpu_smu_memory_pool_size == 4) ||
 960		(amdgpu_smu_memory_pool_size == 8)) {
 961		if (total_memory < dram_size_seven_GB)
 962			goto def_value1;
 963	} else {
 964		DRM_WARN("Smu memory pool size not supported\n");
 965		goto def_value;
 966	}
 967	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
 968
 969	return;
 970
 971def_value1:
 972	DRM_WARN("No enough system memory\n");
 973def_value:
 974	adev->pm.smu_prv_buffer_size = 0;
 975}
 976
 977/**
 978 * amdgpu_device_check_arguments - validate module params
 979 *
 980 * @adev: amdgpu_device pointer
 981 *
 982 * Validates certain module parameters and updates
 983 * the associated values used by the driver (all asics).
 984 */
 985static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 986{
 987	int ret = 0;
 988
 989	if (amdgpu_sched_jobs < 4) {
 990		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
 991			 amdgpu_sched_jobs);
 992		amdgpu_sched_jobs = 4;
 993	} else if (!is_power_of_2(amdgpu_sched_jobs)){
 994		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
 995			 amdgpu_sched_jobs);
 996		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
 997	}
 998
 999	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1000		/* gart size must be greater or equal to 32M */
1001		dev_warn(adev->dev, "gart size (%d) too small\n",
1002			 amdgpu_gart_size);
1003		amdgpu_gart_size = -1;
1004	}
1005
1006	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1007		/* gtt size must be greater or equal to 32M */
1008		dev_warn(adev->dev, "gtt size (%d) too small\n",
1009				 amdgpu_gtt_size);
1010		amdgpu_gtt_size = -1;
1011	}
1012
1013	/* valid range is between 4 and 9 inclusive */
1014	if (amdgpu_vm_fragment_size != -1 &&
1015	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1016		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1017		amdgpu_vm_fragment_size = -1;
1018	}
1019
1020	amdgpu_device_check_smu_prv_buffer_size(adev);
1021
1022	amdgpu_device_check_vm_size(adev);
1023
1024	amdgpu_device_check_block_size(adev);
1025
1026	ret = amdgpu_device_get_job_timeout_settings(adev);
1027	if (ret) {
1028		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
1029		return ret;
1030	}
1031
1032	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1033
1034	return ret;
1035}
1036
1037/**
1038 * amdgpu_switcheroo_set_state - set switcheroo state
1039 *
1040 * @pdev: pci dev pointer
1041 * @state: vga_switcheroo state
1042 *
1043 * Callback for the switcheroo driver.  Suspends or resumes the
1044 * the asics before or after it is powered up using ACPI methods.
1045 */
1046static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1047{
1048	struct drm_device *dev = pci_get_drvdata(pdev);
1049
1050	if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1051		return;
1052
1053	if (state == VGA_SWITCHEROO_ON) {
1054		pr_info("amdgpu: switched on\n");
1055		/* don't suspend or resume card normally */
1056		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1057
1058		amdgpu_device_resume(dev, true, true);
1059
1060		dev->switch_power_state = DRM_SWITCH_POWER_ON;
1061		drm_kms_helper_poll_enable(dev);
1062	} else {
1063		pr_info("amdgpu: switched off\n");
1064		drm_kms_helper_poll_disable(dev);
1065		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1066		amdgpu_device_suspend(dev, true, true);
1067		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1068	}
1069}
1070
1071/**
1072 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1073 *
1074 * @pdev: pci dev pointer
1075 *
1076 * Callback for the switcheroo driver.  Check of the switcheroo
1077 * state can be changed.
1078 * Returns true if the state can be changed, false if not.
1079 */
1080static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1081{
1082	struct drm_device *dev = pci_get_drvdata(pdev);
1083
1084	/*
1085	* FIXME: open_count is protected by drm_global_mutex but that would lead to
1086	* locking inversion with the driver load path. And the access here is
1087	* completely racy anyway. So don't bother with locking for now.
1088	*/
1089	return dev->open_count == 0;
1090}
1091
1092static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1093	.set_gpu_state = amdgpu_switcheroo_set_state,
1094	.reprobe = NULL,
1095	.can_switch = amdgpu_switcheroo_can_switch,
1096};
1097
1098/**
1099 * amdgpu_device_ip_set_clockgating_state - set the CG state
1100 *
1101 * @dev: amdgpu_device pointer
1102 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1103 * @state: clockgating state (gate or ungate)
1104 *
1105 * Sets the requested clockgating state for all instances of
1106 * the hardware IP specified.
1107 * Returns the error code from the last instance.
1108 */
1109int amdgpu_device_ip_set_clockgating_state(void *dev,
1110					   enum amd_ip_block_type block_type,
1111					   enum amd_clockgating_state state)
1112{
1113	struct amdgpu_device *adev = dev;
1114	int i, r = 0;
1115
1116	for (i = 0; i < adev->num_ip_blocks; i++) {
1117		if (!adev->ip_blocks[i].status.valid)
1118			continue;
1119		if (adev->ip_blocks[i].version->type != block_type)
1120			continue;
1121		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1122			continue;
1123		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1124			(void *)adev, state);
1125		if (r)
1126			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1127				  adev->ip_blocks[i].version->funcs->name, r);
1128	}
1129	return r;
1130}
1131
1132/**
1133 * amdgpu_device_ip_set_powergating_state - set the PG state
1134 *
1135 * @dev: amdgpu_device pointer
1136 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1137 * @state: powergating state (gate or ungate)
1138 *
1139 * Sets the requested powergating state for all instances of
1140 * the hardware IP specified.
1141 * Returns the error code from the last instance.
1142 */
1143int amdgpu_device_ip_set_powergating_state(void *dev,
1144					   enum amd_ip_block_type block_type,
1145					   enum amd_powergating_state state)
1146{
1147	struct amdgpu_device *adev = dev;
1148	int i, r = 0;
1149
1150	for (i = 0; i < adev->num_ip_blocks; i++) {
1151		if (!adev->ip_blocks[i].status.valid)
1152			continue;
1153		if (adev->ip_blocks[i].version->type != block_type)
1154			continue;
1155		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1156			continue;
1157		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1158			(void *)adev, state);
1159		if (r)
1160			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1161				  adev->ip_blocks[i].version->funcs->name, r);
1162	}
1163	return r;
1164}
1165
1166/**
1167 * amdgpu_device_ip_get_clockgating_state - get the CG state
1168 *
1169 * @adev: amdgpu_device pointer
1170 * @flags: clockgating feature flags
1171 *
1172 * Walks the list of IPs on the device and updates the clockgating
1173 * flags for each IP.
1174 * Updates @flags with the feature flags for each hardware IP where
1175 * clockgating is enabled.
1176 */
1177void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1178					    u32 *flags)
1179{
1180	int i;
1181
1182	for (i = 0; i < adev->num_ip_blocks; i++) {
1183		if (!adev->ip_blocks[i].status.valid)
1184			continue;
1185		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1186			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1187	}
1188}
1189
1190/**
1191 * amdgpu_device_ip_wait_for_idle - wait for idle
1192 *
1193 * @adev: amdgpu_device pointer
1194 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1195 *
1196 * Waits for the request hardware IP to be idle.
1197 * Returns 0 for success or a negative error code on failure.
1198 */
1199int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1200				   enum amd_ip_block_type block_type)
1201{
1202	int i, r;
1203
1204	for (i = 0; i < adev->num_ip_blocks; i++) {
1205		if (!adev->ip_blocks[i].status.valid)
1206			continue;
1207		if (adev->ip_blocks[i].version->type == block_type) {
1208			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1209			if (r)
1210				return r;
1211			break;
1212		}
1213	}
1214	return 0;
1215
1216}
1217
1218/**
1219 * amdgpu_device_ip_is_idle - is the hardware IP idle
1220 *
1221 * @adev: amdgpu_device pointer
1222 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1223 *
1224 * Check if the hardware IP is idle or not.
1225 * Returns true if it the IP is idle, false if not.
1226 */
1227bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1228			      enum amd_ip_block_type block_type)
1229{
1230	int i;
1231
1232	for (i = 0; i < adev->num_ip_blocks; i++) {
1233		if (!adev->ip_blocks[i].status.valid)
1234			continue;
1235		if (adev->ip_blocks[i].version->type == block_type)
1236			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1237	}
1238	return true;
1239
1240}
1241
1242/**
1243 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1244 *
1245 * @adev: amdgpu_device pointer
1246 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1247 *
1248 * Returns a pointer to the hardware IP block structure
1249 * if it exists for the asic, otherwise NULL.
1250 */
1251struct amdgpu_ip_block *
1252amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1253			      enum amd_ip_block_type type)
1254{
1255	int i;
1256
1257	for (i = 0; i < adev->num_ip_blocks; i++)
1258		if (adev->ip_blocks[i].version->type == type)
1259			return &adev->ip_blocks[i];
1260
1261	return NULL;
1262}
1263
1264/**
1265 * amdgpu_device_ip_block_version_cmp
1266 *
1267 * @adev: amdgpu_device pointer
1268 * @type: enum amd_ip_block_type
1269 * @major: major version
1270 * @minor: minor version
1271 *
1272 * return 0 if equal or greater
1273 * return 1 if smaller or the ip_block doesn't exist
1274 */
1275int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1276				       enum amd_ip_block_type type,
1277				       u32 major, u32 minor)
1278{
1279	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1280
1281	if (ip_block && ((ip_block->version->major > major) ||
1282			((ip_block->version->major == major) &&
1283			(ip_block->version->minor >= minor))))
1284		return 0;
1285
1286	return 1;
1287}
1288
1289/**
1290 * amdgpu_device_ip_block_add
1291 *
1292 * @adev: amdgpu_device pointer
1293 * @ip_block_version: pointer to the IP to add
1294 *
1295 * Adds the IP block driver information to the collection of IPs
1296 * on the asic.
1297 */
1298int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1299			       const struct amdgpu_ip_block_version *ip_block_version)
1300{
1301	if (!ip_block_version)
1302		return -EINVAL;
1303
1304	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1305		  ip_block_version->funcs->name);
1306
1307	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1308
1309	return 0;
1310}
1311
1312/**
1313 * amdgpu_device_enable_virtual_display - enable virtual display feature
1314 *
1315 * @adev: amdgpu_device pointer
1316 *
1317 * Enabled the virtual display feature if the user has enabled it via
1318 * the module parameter virtual_display.  This feature provides a virtual
1319 * display hardware on headless boards or in virtualized environments.
1320 * This function parses and validates the configuration string specified by
1321 * the user and configues the virtual display configuration (number of
1322 * virtual connectors, crtcs, etc.) specified.
1323 */
1324static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1325{
1326	adev->enable_virtual_display = false;
1327
1328	if (amdgpu_virtual_display) {
1329		struct drm_device *ddev = adev->ddev;
1330		const char *pci_address_name = pci_name(ddev->pdev);
1331		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1332
1333		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1334		pciaddstr_tmp = pciaddstr;
1335		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1336			pciaddname = strsep(&pciaddname_tmp, ",");
1337			if (!strcmp("all", pciaddname)
1338			    || !strcmp(pci_address_name, pciaddname)) {
1339				long num_crtc;
1340				int res = -1;
1341
1342				adev->enable_virtual_display = true;
1343
1344				if (pciaddname_tmp)
1345					res = kstrtol(pciaddname_tmp, 10,
1346						      &num_crtc);
1347
1348				if (!res) {
1349					if (num_crtc < 1)
1350						num_crtc = 1;
1351					if (num_crtc > 6)
1352						num_crtc = 6;
1353					adev->mode_info.num_crtc = num_crtc;
1354				} else {
1355					adev->mode_info.num_crtc = 1;
1356				}
1357				break;
1358			}
1359		}
1360
1361		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1362			 amdgpu_virtual_display, pci_address_name,
1363			 adev->enable_virtual_display, adev->mode_info.num_crtc);
1364
1365		kfree(pciaddstr);
1366	}
1367}
1368
1369/**
1370 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1371 *
1372 * @adev: amdgpu_device pointer
1373 *
1374 * Parses the asic configuration parameters specified in the gpu info
1375 * firmware and makes them availale to the driver for use in configuring
1376 * the asic.
1377 * Returns 0 on success, -EINVAL on failure.
1378 */
1379static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1380{
1381	const char *chip_name;
1382	char fw_name[30];
1383	int err;
1384	const struct gpu_info_firmware_header_v1_0 *hdr;
1385
1386	adev->firmware.gpu_info_fw = NULL;
1387
1388	switch (adev->asic_type) {
1389	case CHIP_TOPAZ:
1390	case CHIP_TONGA:
1391	case CHIP_FIJI:
1392	case CHIP_POLARIS10:
1393	case CHIP_POLARIS11:
1394	case CHIP_POLARIS12:
1395	case CHIP_VEGAM:
1396	case CHIP_CARRIZO:
1397	case CHIP_STONEY:
1398#ifdef CONFIG_DRM_AMDGPU_SI
1399	case CHIP_VERDE:
1400	case CHIP_TAHITI:
1401	case CHIP_PITCAIRN:
1402	case CHIP_OLAND:
1403	case CHIP_HAINAN:
1404#endif
1405#ifdef CONFIG_DRM_AMDGPU_CIK
1406	case CHIP_BONAIRE:
1407	case CHIP_HAWAII:
1408	case CHIP_KAVERI:
1409	case CHIP_KABINI:
1410	case CHIP_MULLINS:
1411#endif
1412	case CHIP_VEGA20:
1413	default:
1414		return 0;
1415	case CHIP_VEGA10:
1416		chip_name = "vega10";
1417		break;
1418	case CHIP_VEGA12:
1419		chip_name = "vega12";
1420		break;
1421	case CHIP_RAVEN:
1422		if (adev->rev_id >= 8)
1423			chip_name = "raven2";
1424		else if (adev->pdev->device == 0x15d8)
1425			chip_name = "picasso";
1426		else
1427			chip_name = "raven";
1428		break;
1429	case CHIP_ARCTURUS:
1430		chip_name = "arcturus";
1431		break;
1432	case CHIP_RENOIR:
1433		chip_name = "renoir";
1434		break;
1435	case CHIP_NAVI10:
1436		chip_name = "navi10";
1437		break;
1438	case CHIP_NAVI14:
1439		chip_name = "navi14";
1440		break;
1441	case CHIP_NAVI12:
1442		chip_name = "navi12";
1443		break;
1444	}
1445
1446	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1447	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1448	if (err) {
1449		dev_err(adev->dev,
1450			"Failed to load gpu_info firmware \"%s\"\n",
1451			fw_name);
1452		goto out;
1453	}
1454	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1455	if (err) {
1456		dev_err(adev->dev,
1457			"Failed to validate gpu_info firmware \"%s\"\n",
1458			fw_name);
1459		goto out;
1460	}
1461
1462	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1463	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1464
1465	switch (hdr->version_major) {
1466	case 1:
1467	{
1468		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1469			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1470								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1471
1472		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1473		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1474		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1475		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1476		adev->gfx.config.max_texture_channel_caches =
1477			le32_to_cpu(gpu_info_fw->gc_num_tccs);
1478		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1479		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1480		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1481		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1482		adev->gfx.config.double_offchip_lds_buf =
1483			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1484		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1485		adev->gfx.cu_info.max_waves_per_simd =
1486			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1487		adev->gfx.cu_info.max_scratch_slots_per_cu =
1488			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1489		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1490		if (hdr->version_minor >= 1) {
1491			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1492				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1493									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1494			adev->gfx.config.num_sc_per_sh =
1495				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1496			adev->gfx.config.num_packer_per_sc =
1497				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1498		}
1499#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1500		if (hdr->version_minor == 2) {
1501			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1502				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1503									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1504			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1505		}
1506#endif
1507		break;
1508	}
1509	default:
1510		dev_err(adev->dev,
1511			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1512		err = -EINVAL;
1513		goto out;
1514	}
1515out:
1516	return err;
1517}
1518
1519/**
1520 * amdgpu_device_ip_early_init - run early init for hardware IPs
1521 *
1522 * @adev: amdgpu_device pointer
1523 *
1524 * Early initialization pass for hardware IPs.  The hardware IPs that make
1525 * up each asic are discovered each IP's early_init callback is run.  This
1526 * is the first stage in initializing the asic.
1527 * Returns 0 on success, negative error code on failure.
1528 */
1529static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1530{
1531	int i, r;
1532
1533	amdgpu_device_enable_virtual_display(adev);
1534
1535	switch (adev->asic_type) {
1536	case CHIP_TOPAZ:
1537	case CHIP_TONGA:
1538	case CHIP_FIJI:
1539	case CHIP_POLARIS10:
1540	case CHIP_POLARIS11:
1541	case CHIP_POLARIS12:
1542	case CHIP_VEGAM:
1543	case CHIP_CARRIZO:
1544	case CHIP_STONEY:
1545		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1546			adev->family = AMDGPU_FAMILY_CZ;
1547		else
1548			adev->family = AMDGPU_FAMILY_VI;
1549
1550		r = vi_set_ip_blocks(adev);
1551		if (r)
1552			return r;
1553		break;
1554#ifdef CONFIG_DRM_AMDGPU_SI
1555	case CHIP_VERDE:
1556	case CHIP_TAHITI:
1557	case CHIP_PITCAIRN:
1558	case CHIP_OLAND:
1559	case CHIP_HAINAN:
1560		adev->family = AMDGPU_FAMILY_SI;
1561		r = si_set_ip_blocks(adev);
1562		if (r)
1563			return r;
1564		break;
1565#endif
1566#ifdef CONFIG_DRM_AMDGPU_CIK
1567	case CHIP_BONAIRE:
1568	case CHIP_HAWAII:
1569	case CHIP_KAVERI:
1570	case CHIP_KABINI:
1571	case CHIP_MULLINS:
1572		if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1573			adev->family = AMDGPU_FAMILY_CI;
1574		else
1575			adev->family = AMDGPU_FAMILY_KV;
1576
1577		r = cik_set_ip_blocks(adev);
1578		if (r)
1579			return r;
1580		break;
1581#endif
1582	case CHIP_VEGA10:
1583	case CHIP_VEGA12:
1584	case CHIP_VEGA20:
1585	case CHIP_RAVEN:
1586	case CHIP_ARCTURUS:
1587	case CHIP_RENOIR:
1588		if (adev->asic_type == CHIP_RAVEN ||
1589		    adev->asic_type == CHIP_RENOIR)
1590			adev->family = AMDGPU_FAMILY_RV;
1591		else
1592			adev->family = AMDGPU_FAMILY_AI;
1593
1594		r = soc15_set_ip_blocks(adev);
1595		if (r)
1596			return r;
1597		break;
1598	case  CHIP_NAVI10:
1599	case  CHIP_NAVI14:
1600	case  CHIP_NAVI12:
1601		adev->family = AMDGPU_FAMILY_NV;
1602
1603		r = nv_set_ip_blocks(adev);
1604		if (r)
1605			return r;
1606		break;
1607	default:
1608		/* FIXME: not supported yet */
1609		return -EINVAL;
1610	}
1611
1612	r = amdgpu_device_parse_gpu_info_fw(adev);
1613	if (r)
1614		return r;
1615
1616	amdgpu_amdkfd_device_probe(adev);
1617
1618	if (amdgpu_sriov_vf(adev)) {
1619		r = amdgpu_virt_request_full_gpu(adev, true);
1620		if (r)
1621			return -EAGAIN;
1622	}
1623
1624	adev->pm.pp_feature = amdgpu_pp_feature_mask;
1625	if (amdgpu_sriov_vf(adev))
1626		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1627
1628	for (i = 0; i < adev->num_ip_blocks; i++) {
1629		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1630			DRM_ERROR("disabled ip block: %d <%s>\n",
1631				  i, adev->ip_blocks[i].version->funcs->name);
1632			adev->ip_blocks[i].status.valid = false;
1633		} else {
1634			if (adev->ip_blocks[i].version->funcs->early_init) {
1635				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
1636				if (r == -ENOENT) {
1637					adev->ip_blocks[i].status.valid = false;
1638				} else if (r) {
1639					DRM_ERROR("early_init of IP block <%s> failed %d\n",
1640						  adev->ip_blocks[i].version->funcs->name, r);
1641					return r;
1642				} else {
1643					adev->ip_blocks[i].status.valid = true;
1644				}
1645			} else {
1646				adev->ip_blocks[i].status.valid = true;
1647			}
1648		}
1649		/* get the vbios after the asic_funcs are set up */
1650		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1651			/* Read BIOS */
1652			if (!amdgpu_get_bios(adev))
1653				return -EINVAL;
1654
1655			r = amdgpu_atombios_init(adev);
1656			if (r) {
1657				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1658				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1659				return r;
1660			}
1661		}
1662	}
1663
1664	adev->cg_flags &= amdgpu_cg_mask;
1665	adev->pg_flags &= amdgpu_pg_mask;
1666
1667	return 0;
1668}
1669
1670static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1671{
1672	int i, r;
1673
1674	for (i = 0; i < adev->num_ip_blocks; i++) {
1675		if (!adev->ip_blocks[i].status.sw)
1676			continue;
1677		if (adev->ip_blocks[i].status.hw)
1678			continue;
1679		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1680		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1681		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1682			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1683			if (r) {
1684				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1685					  adev->ip_blocks[i].version->funcs->name, r);
1686				return r;
1687			}
1688			adev->ip_blocks[i].status.hw = true;
1689		}
1690	}
1691
1692	return 0;
1693}
1694
1695static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1696{
1697	int i, r;
1698
1699	for (i = 0; i < adev->num_ip_blocks; i++) {
1700		if (!adev->ip_blocks[i].status.sw)
1701			continue;
1702		if (adev->ip_blocks[i].status.hw)
1703			continue;
1704		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1705		if (r) {
1706			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1707				  adev->ip_blocks[i].version->funcs->name, r);
1708			return r;
1709		}
1710		adev->ip_blocks[i].status.hw = true;
1711	}
1712
1713	return 0;
1714}
1715
1716static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1717{
1718	int r = 0;
1719	int i;
1720	uint32_t smu_version;
1721
1722	if (adev->asic_type >= CHIP_VEGA10) {
1723		for (i = 0; i < adev->num_ip_blocks; i++) {
1724			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1725				continue;
1726
1727			/* no need to do the fw loading again if already done*/
1728			if (adev->ip_blocks[i].status.hw == true)
1729				break;
1730
1731			if (adev->in_gpu_reset || adev->in_suspend) {
1732				r = adev->ip_blocks[i].version->funcs->resume(adev);
1733				if (r) {
1734					DRM_ERROR("resume of IP block <%s> failed %d\n",
1735							  adev->ip_blocks[i].version->funcs->name, r);
1736					return r;
1737				}
1738			} else {
1739				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1740				if (r) {
1741					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1742							  adev->ip_blocks[i].version->funcs->name, r);
1743					return r;
1744				}
1745			}
1746
1747			adev->ip_blocks[i].status.hw = true;
1748			break;
1749		}
1750	}
1751
1752	r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
1753
1754	return r;
1755}
1756
1757/**
1758 * amdgpu_device_ip_init - run init for hardware IPs
1759 *
1760 * @adev: amdgpu_device pointer
1761 *
1762 * Main initialization pass for hardware IPs.  The list of all the hardware
1763 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1764 * are run.  sw_init initializes the software state associated with each IP
1765 * and hw_init initializes the hardware associated with each IP.
1766 * Returns 0 on success, negative error code on failure.
1767 */
1768static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1769{
1770	int i, r;
1771
1772	r = amdgpu_ras_init(adev);
1773	if (r)
1774		return r;
1775
1776	for (i = 0; i < adev->num_ip_blocks; i++) {
1777		if (!adev->ip_blocks[i].status.valid)
1778			continue;
1779		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
1780		if (r) {
1781			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1782				  adev->ip_blocks[i].version->funcs->name, r);
1783			goto init_failed;
1784		}
1785		adev->ip_blocks[i].status.sw = true;
1786
1787		/* need to do gmc hw init early so we can allocate gpu mem */
1788		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1789			r = amdgpu_device_vram_scratch_init(adev);
1790			if (r) {
1791				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1792				goto init_failed;
1793			}
1794			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1795			if (r) {
1796				DRM_ERROR("hw_init %d failed %d\n", i, r);
1797				goto init_failed;
1798			}
1799			r = amdgpu_device_wb_init(adev);
1800			if (r) {
1801				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1802				goto init_failed;
1803			}
1804			adev->ip_blocks[i].status.hw = true;
1805
1806			/* right after GMC hw init, we create CSA */
1807			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1808				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1809								AMDGPU_GEM_DOMAIN_VRAM,
1810								AMDGPU_CSA_SIZE);
1811				if (r) {
1812					DRM_ERROR("allocate CSA failed %d\n", r);
1813					goto init_failed;
1814				}
1815			}
1816		}
1817	}
1818
1819	r = amdgpu_ib_pool_init(adev);
1820	if (r) {
1821		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1822		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1823		goto init_failed;
1824	}
1825
1826	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1827	if (r)
1828		goto init_failed;
1829
1830	r = amdgpu_device_ip_hw_init_phase1(adev);
1831	if (r)
1832		goto init_failed;
1833
1834	r = amdgpu_device_fw_loading(adev);
1835	if (r)
1836		goto init_failed;
1837
1838	r = amdgpu_device_ip_hw_init_phase2(adev);
1839	if (r)
1840		goto init_failed;
1841
1842	if (adev->gmc.xgmi.num_physical_nodes > 1)
1843		amdgpu_xgmi_add_device(adev);
1844	amdgpu_amdkfd_device_init(adev);
1845
1846init_failed:
1847	if (amdgpu_sriov_vf(adev)) {
1848		if (!r)
1849			amdgpu_virt_init_data_exchange(adev);
1850		amdgpu_virt_release_full_gpu(adev, true);
1851	}
1852
1853	return r;
1854}
1855
1856/**
1857 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1858 *
1859 * @adev: amdgpu_device pointer
1860 *
1861 * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
1862 * this function before a GPU reset.  If the value is retained after a
1863 * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
1864 */
1865static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
1866{
1867	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1868}
1869
1870/**
1871 * amdgpu_device_check_vram_lost - check if vram is valid
1872 *
1873 * @adev: amdgpu_device pointer
1874 *
1875 * Checks the reset magic value written to the gart pointer in VRAM.
1876 * The driver calls this after a GPU reset to see if the contents of
1877 * VRAM is lost or now.
1878 * returns true if vram is lost, false if not.
1879 */
1880static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
1881{
1882	return !!memcmp(adev->gart.ptr, adev->reset_magic,
1883			AMDGPU_RESET_MAGIC_NUM);
1884}
1885
1886/**
1887 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
1888 *
1889 * @adev: amdgpu_device pointer
1890 *
1891 * The list of all the hardware IPs that make up the asic is walked and the
1892 * set_clockgating_state callbacks are run.
1893 * Late initialization pass enabling clockgating for hardware IPs.
1894 * Fini or suspend, pass disabling clockgating for hardware IPs.
1895 * Returns 0 on success, negative error code on failure.
1896 */
1897
1898static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1899						enum amd_clockgating_state state)
1900{
1901	int i, j, r;
1902
1903	if (amdgpu_emu_mode == 1)
1904		return 0;
1905
1906	for (j = 0; j < adev->num_ip_blocks; j++) {
1907		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1908		if (!adev->ip_blocks[i].status.late_initialized)
1909			continue;
1910		/* skip CG for VCE/UVD, it's handled specially */
1911		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1912		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1913		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1914		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1915			/* enable clockgating to save power */
1916			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1917										     state);
1918			if (r) {
1919				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
1920					  adev->ip_blocks[i].version->funcs->name, r);
1921				return r;
1922			}
1923		}
1924	}
1925
1926	return 0;
1927}
1928
1929static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
1930{
1931	int i, j, r;
1932
1933	if (amdgpu_emu_mode == 1)
1934		return 0;
1935
1936	for (j = 0; j < adev->num_ip_blocks; j++) {
1937		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
1938		if (!adev->ip_blocks[i].status.late_initialized)
1939			continue;
1940		/* skip CG for VCE/UVD, it's handled specially */
1941		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1942		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1943		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1944		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
1945			/* enable powergating to save power */
1946			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1947											state);
1948			if (r) {
1949				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1950					  adev->ip_blocks[i].version->funcs->name, r);
1951				return r;
1952			}
1953		}
1954	}
1955	return 0;
1956}
1957
1958static int amdgpu_device_enable_mgpu_fan_boost(void)
1959{
1960	struct amdgpu_gpu_instance *gpu_ins;
1961	struct amdgpu_device *adev;
1962	int i, ret = 0;
1963
1964	mutex_lock(&mgpu_info.mutex);
1965
1966	/*
1967	 * MGPU fan boost feature should be enabled
1968	 * only when there are two or more dGPUs in
1969	 * the system
1970	 */
1971	if (mgpu_info.num_dgpu < 2)
1972		goto out;
1973
1974	for (i = 0; i < mgpu_info.num_dgpu; i++) {
1975		gpu_ins = &(mgpu_info.gpu_ins[i]);
1976		adev = gpu_ins->adev;
1977		if (!(adev->flags & AMD_IS_APU) &&
1978		    !gpu_ins->mgpu_fan_enabled &&
1979		    adev->powerplay.pp_funcs &&
1980		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1981			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1982			if (ret)
1983				break;
1984
1985			gpu_ins->mgpu_fan_enabled = 1;
1986		}
1987	}
1988
1989out:
1990	mutex_unlock(&mgpu_info.mutex);
1991
1992	return ret;
1993}
1994
1995/**
1996 * amdgpu_device_ip_late_init - run late init for hardware IPs
1997 *
1998 * @adev: amdgpu_device pointer
1999 *
2000 * Late initialization pass for hardware IPs.  The list of all the hardware
2001 * IPs that make up the asic is walked and the late_init callbacks are run.
2002 * late_init covers any special initialization that an IP requires
2003 * after all of the have been initialized or something that needs to happen
2004 * late in the init process.
2005 * Returns 0 on success, negative error code on failure.
2006 */
2007static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2008{
2009	int i = 0, r;
2010
2011	for (i = 0; i < adev->num_ip_blocks; i++) {
2012		if (!adev->ip_blocks[i].status.hw)
2013			continue;
2014		if (adev->ip_blocks[i].version->funcs->late_init) {
2015			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2016			if (r) {
2017				DRM_ERROR("late_init of IP block <%s> failed %d\n",
2018					  adev->ip_blocks[i].version->funcs->name, r);
2019				return r;
2020			}
2021		}
2022		adev->ip_blocks[i].status.late_initialized = true;
2023	}
2024
2025	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2026	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2027
2028	amdgpu_device_fill_reset_magic(adev);
2029
2030	r = amdgpu_device_enable_mgpu_fan_boost();
2031	if (r)
2032		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2033
2034	/* set to low pstate by default */
2035	amdgpu_xgmi_set_pstate(adev, 0);
2036
2037	return 0;
2038}
2039
2040/**
2041 * amdgpu_device_ip_fini - run fini for hardware IPs
2042 *
2043 * @adev: amdgpu_device pointer
2044 *
2045 * Main teardown pass for hardware IPs.  The list of all the hardware
2046 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2047 * are run.  hw_fini tears down the hardware associated with each IP
2048 * and sw_fini tears down any software state associated with each IP.
2049 * Returns 0 on success, negative error code on failure.
2050 */
2051static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2052{
2053	int i, r;
2054
2055	amdgpu_ras_pre_fini(adev);
2056
2057	if (adev->gmc.xgmi.num_physical_nodes > 1)
2058		amdgpu_xgmi_remove_device(adev);
2059
2060	amdgpu_amdkfd_device_fini(adev);
2061
2062	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2063	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2064
2065	/* need to disable SMC first */
2066	for (i = 0; i < adev->num_ip_blocks; i++) {
2067		if (!adev->ip_blocks[i].status.hw)
2068			continue;
2069		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2070			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2071			/* XXX handle errors */
2072			if (r) {
2073				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2074					  adev->ip_blocks[i].version->funcs->name, r);
2075			}
2076			adev->ip_blocks[i].status.hw = false;
2077			break;
2078		}
2079	}
2080
2081	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2082		if (!adev->ip_blocks[i].status.hw)
2083			continue;
2084
2085		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2086		/* XXX handle errors */
2087		if (r) {
2088			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2089				  adev->ip_blocks[i].version->funcs->name, r);
2090		}
2091
2092		adev->ip_blocks[i].status.hw = false;
2093	}
2094
2095
2096	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2097		if (!adev->ip_blocks[i].status.sw)
2098			continue;
2099
2100		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2101			amdgpu_ucode_free_bo(adev);
2102			amdgpu_free_static_csa(&adev->virt.csa_obj);
2103			amdgpu_device_wb_fini(adev);
2104			amdgpu_device_vram_scratch_fini(adev);
2105			amdgpu_ib_pool_fini(adev);
2106		}
2107
2108		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2109		/* XXX handle errors */
2110		if (r) {
2111			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2112				  adev->ip_blocks[i].version->funcs->name, r);
2113		}
2114		adev->ip_blocks[i].status.sw = false;
2115		adev->ip_blocks[i].status.valid = false;
2116	}
2117
2118	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2119		if (!adev->ip_blocks[i].status.late_initialized)
2120			continue;
2121		if (adev->ip_blocks[i].version->funcs->late_fini)
2122			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2123		adev->ip_blocks[i].status.late_initialized = false;
2124	}
2125
2126	amdgpu_ras_fini(adev);
2127
2128	if (amdgpu_sriov_vf(adev))
2129		if (amdgpu_virt_release_full_gpu(adev, false))
2130			DRM_ERROR("failed to release exclusive mode on fini\n");
2131
2132	return 0;
2133}
2134
2135/**
2136 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2137 *
2138 * @work: work_struct.
2139 */
2140static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2141{
2142	struct amdgpu_device *adev =
2143		container_of(work, struct amdgpu_device, delayed_init_work.work);
2144	int r;
2145
2146	r = amdgpu_ib_ring_tests(adev);
2147	if (r)
2148		DRM_ERROR("ib ring test failed (%d).\n", r);
2149}
2150
2151static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2152{
2153	struct amdgpu_device *adev =
2154		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2155
2156	mutex_lock(&adev->gfx.gfx_off_mutex);
2157	if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2158		if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2159			adev->gfx.gfx_off_state = true;
2160	}
2161	mutex_unlock(&adev->gfx.gfx_off_mutex);
2162}
2163
2164/**
2165 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2166 *
2167 * @adev: amdgpu_device pointer
2168 *
2169 * Main suspend function for hardware IPs.  The list of all the hardware
2170 * IPs that make up the asic is walked, clockgating is disabled and the
2171 * suspend callbacks are run.  suspend puts the hardware and software state
2172 * in each IP into a state suitable for suspend.
2173 * Returns 0 on success, negative error code on failure.
2174 */
2175static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2176{
2177	int i, r;
2178
2179	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2180	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2181
2182	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2183		if (!adev->ip_blocks[i].status.valid)
2184			continue;
2185		/* displays are handled separately */
2186		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
2187			/* XXX handle errors */
2188			r = adev->ip_blocks[i].version->funcs->suspend(adev);
2189			/* XXX handle errors */
2190			if (r) {
2191				DRM_ERROR("suspend of IP block <%s> failed %d\n",
2192					  adev->ip_blocks[i].version->funcs->name, r);
2193				return r;
2194			}
2195			adev->ip_blocks[i].status.hw = false;
2196		}
2197	}
2198
2199	return 0;
2200}
2201
2202/**
2203 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2204 *
2205 * @adev: amdgpu_device pointer
2206 *
2207 * Main suspend function for hardware IPs.  The list of all the hardware
2208 * IPs that make up the asic is walked, clockgating is disabled and the
2209 * suspend callbacks are run.  suspend puts the hardware and software state
2210 * in each IP into a state suitable for suspend.
2211 * Returns 0 on success, negative error code on failure.
2212 */
2213static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2214{
2215	int i, r;
2216
2217	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2218		if (!adev->ip_blocks[i].status.valid)
2219			continue;
2220		/* displays are handled in phase1 */
2221		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2222			continue;
2223		/* XXX handle errors */
2224		r = adev->ip_blocks[i].version->funcs->suspend(adev);
2225		/* XXX handle errors */
2226		if (r) {
2227			DRM_ERROR("suspend of IP block <%s> failed %d\n",
2228				  adev->ip_blocks[i].version->funcs->name, r);
2229		}
2230		adev->ip_blocks[i].status.hw = false;
2231		/* handle putting the SMC in the appropriate state */
2232		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2233			if (is_support_sw_smu(adev)) {
2234				/* todo */
2235			} else if (adev->powerplay.pp_funcs &&
2236					   adev->powerplay.pp_funcs->set_mp1_state) {
2237				r = adev->powerplay.pp_funcs->set_mp1_state(
2238					adev->powerplay.pp_handle,
2239					adev->mp1_state);
2240				if (r) {
2241					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2242						  adev->mp1_state, r);
2243					return r;
2244				}
2245			}
2246		}
2247
2248		adev->ip_blocks[i].status.hw = false;
2249	}
2250
2251	return 0;
2252}
2253
2254/**
2255 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2256 *
2257 * @adev: amdgpu_device pointer
2258 *
2259 * Main suspend function for hardware IPs.  The list of all the hardware
2260 * IPs that make up the asic is walked, clockgating is disabled and the
2261 * suspend callbacks are run.  suspend puts the hardware and software state
2262 * in each IP into a state suitable for suspend.
2263 * Returns 0 on success, negative error code on failure.
2264 */
2265int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2266{
2267	int r;
2268
2269	if (amdgpu_sriov_vf(adev))
2270		amdgpu_virt_request_full_gpu(adev, false);
2271
2272	r = amdgpu_device_ip_suspend_phase1(adev);
2273	if (r)
2274		return r;
2275	r = amdgpu_device_ip_suspend_phase2(adev);
2276
2277	if (amdgpu_sriov_vf(adev))
2278		amdgpu_virt_release_full_gpu(adev, false);
2279
2280	return r;
2281}
2282
2283static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2284{
2285	int i, r;
2286
2287	static enum amd_ip_block_type ip_order[] = {
2288		AMD_IP_BLOCK_TYPE_GMC,
2289		AMD_IP_BLOCK_TYPE_COMMON,
2290		AMD_IP_BLOCK_TYPE_PSP,
2291		AMD_IP_BLOCK_TYPE_IH,
2292	};
2293
2294	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2295		int j;
2296		struct amdgpu_ip_block *block;
2297
2298		for (j = 0; j < adev->num_ip_blocks; j++) {
2299			block = &adev->ip_blocks[j];
2300
2301			block->status.hw = false;
2302			if (block->version->type != ip_order[i] ||
2303				!block->status.valid)
2304				continue;
2305
2306			r = block->version->funcs->hw_init(adev);
2307			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2308			if (r)
2309				return r;
2310			block->status.hw = true;
2311		}
2312	}
2313
2314	return 0;
2315}
2316
2317static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2318{
2319	int i, r;
2320
2321	static enum amd_ip_block_type ip_order[] = {
2322		AMD_IP_BLOCK_TYPE_SMC,
2323		AMD_IP_BLOCK_TYPE_DCE,
2324		AMD_IP_BLOCK_TYPE_GFX,
2325		AMD_IP_BLOCK_TYPE_SDMA,
2326		AMD_IP_BLOCK_TYPE_UVD,
2327		AMD_IP_BLOCK_TYPE_VCE
2328	};
2329
2330	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2331		int j;
2332		struct amdgpu_ip_block *block;
2333
2334		for (j = 0; j < adev->num_ip_blocks; j++) {
2335			block = &adev->ip_blocks[j];
2336
2337			if (block->version->type != ip_order[i] ||
2338				!block->status.valid ||
2339				block->status.hw)
2340				continue;
2341
2342			r = block->version->funcs->hw_init(adev);
2343			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2344			if (r)
2345				return r;
2346			block->status.hw = true;
2347		}
2348	}
2349
2350	return 0;
2351}
2352
2353/**
2354 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2355 *
2356 * @adev: amdgpu_device pointer
2357 *
2358 * First resume function for hardware IPs.  The list of all the hardware
2359 * IPs that make up the asic is walked and the resume callbacks are run for
2360 * COMMON, GMC, and IH.  resume puts the hardware into a functional state
2361 * after a suspend and updates the software state as necessary.  This
2362 * function is also used for restoring the GPU after a GPU reset.
2363 * Returns 0 on success, negative error code on failure.
2364 */
2365static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
2366{
2367	int i, r;
2368
2369	for (i = 0; i < adev->num_ip_blocks; i++) {
2370		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2371			continue;
2372		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2373		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2374		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2375
2376			r = adev->ip_blocks[i].version->funcs->resume(adev);
2377			if (r) {
2378				DRM_ERROR("resume of IP block <%s> failed %d\n",
2379					  adev->ip_blocks[i].version->funcs->name, r);
2380				return r;
2381			}
2382			adev->ip_blocks[i].status.hw = true;
2383		}
2384	}
2385
2386	return 0;
2387}
2388
2389/**
2390 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2391 *
2392 * @adev: amdgpu_device pointer
2393 *
2394 * First resume function for hardware IPs.  The list of all the hardware
2395 * IPs that make up the asic is walked and the resume callbacks are run for
2396 * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
2397 * functional state after a suspend and updates the software state as
2398 * necessary.  This function is also used for restoring the GPU after a GPU
2399 * reset.
2400 * Returns 0 on success, negative error code on failure.
2401 */
2402static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
2403{
2404	int i, r;
2405
2406	for (i = 0; i < adev->num_ip_blocks; i++) {
2407		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
2408			continue;
2409		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2410		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2411		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2412		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
2413			continue;
2414		r = adev->ip_blocks[i].version->funcs->resume(adev);
2415		if (r) {
2416			DRM_ERROR("resume of IP block <%s> failed %d\n",
2417				  adev->ip_blocks[i].version->funcs->name, r);
2418			return r;
2419		}
2420		adev->ip_blocks[i].status.hw = true;
2421	}
2422
2423	return 0;
2424}
2425
2426/**
2427 * amdgpu_device_ip_resume - run resume for hardware IPs
2428 *
2429 * @adev: amdgpu_device pointer
2430 *
2431 * Main resume function for hardware IPs.  The hardware IPs
2432 * are split into two resume functions because they are
2433 * are also used in in recovering from a GPU reset and some additional
2434 * steps need to be take between them.  In this case (S3/S4) they are
2435 * run sequentially.
2436 * Returns 0 on success, negative error code on failure.
2437 */
2438static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
2439{
2440	int r;
2441
2442	r = amdgpu_device_ip_resume_phase1(adev);
2443	if (r)
2444		return r;
2445
2446	r = amdgpu_device_fw_loading(adev);
2447	if (r)
2448		return r;
2449
2450	r = amdgpu_device_ip_resume_phase2(adev);
2451
2452	return r;
2453}
2454
2455/**
2456 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2457 *
2458 * @adev: amdgpu_device pointer
2459 *
2460 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2461 */
2462static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
2463{
2464	if (amdgpu_sriov_vf(adev)) {
2465		if (adev->is_atom_fw) {
2466			if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2467				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2468		} else {
2469			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2470				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2471		}
2472
2473		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2474			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
2475	}
2476}
2477
2478/**
2479 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2480 *
2481 * @asic_type: AMD asic type
2482 *
2483 * Check if there is DC (new modesetting infrastructre) support for an asic.
2484 * returns true if DC has support, false if not.
2485 */
2486bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2487{
2488	switch (asic_type) {
2489#if defined(CONFIG_DRM_AMD_DC)
2490	case CHIP_BONAIRE:
2491	case CHIP_KAVERI:
2492	case CHIP_KABINI:
2493	case CHIP_MULLINS:
2494		/*
2495		 * We have systems in the wild with these ASICs that require
2496		 * LVDS and VGA support which is not supported with DC.
2497		 *
2498		 * Fallback to the non-DC driver here by default so as not to
2499		 * cause regressions.
2500		 */
2501		return amdgpu_dc > 0;
2502	case CHIP_HAWAII:
2503	case CHIP_CARRIZO:
2504	case CHIP_STONEY:
2505	case CHIP_POLARIS10:
2506	case CHIP_POLARIS11:
2507	case CHIP_POLARIS12:
2508	case CHIP_VEGAM:
2509	case CHIP_TONGA:
2510	case CHIP_FIJI:
2511	case CHIP_VEGA10:
2512	case CHIP_VEGA12:
2513	case CHIP_VEGA20:
2514#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
2515	case CHIP_RAVEN:
2516#endif
2517#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2518	case CHIP_NAVI10:
2519	case CHIP_NAVI14:
2520	case CHIP_NAVI12:
2521#endif
2522#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
2523	case CHIP_RENOIR:
2524#endif
2525		return amdgpu_dc != 0;
2526#endif
2527	default:
2528		return false;
2529	}
2530}
2531
2532/**
2533 * amdgpu_device_has_dc_support - check if dc is supported
2534 *
2535 * @adev: amdgpu_device_pointer
2536 *
2537 * Returns true for supported, false for not supported
2538 */
2539bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2540{
2541	if (amdgpu_sriov_vf(adev))
2542		return false;
2543
2544	return amdgpu_device_asic_has_dc_support(adev->asic_type);
2545}
2546
2547
2548static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2549{
2550	struct amdgpu_device *adev =
2551		container_of(__work, struct amdgpu_device, xgmi_reset_work);
2552
2553	adev->asic_reset_res =  amdgpu_asic_reset(adev);
2554	if (adev->asic_reset_res)
2555		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
2556			 adev->asic_reset_res, adev->ddev->unique);
2557}
2558
2559
2560/**
2561 * amdgpu_device_init - initialize the driver
2562 *
2563 * @adev: amdgpu_device pointer
2564 * @ddev: drm dev pointer
2565 * @pdev: pci dev pointer
2566 * @flags: driver flags
2567 *
2568 * Initializes the driver info and hw (all asics).
2569 * Returns 0 for success or an error on failure.
2570 * Called at driver startup.
2571 */
2572int amdgpu_device_init(struct amdgpu_device *adev,
2573		       struct drm_device *ddev,
2574		       struct pci_dev *pdev,
2575		       uint32_t flags)
2576{
2577	int r, i;
2578	bool runtime = false;
2579	u32 max_MBps;
2580
2581	adev->shutdown = false;
2582	adev->dev = &pdev->dev;
2583	adev->ddev = ddev;
2584	adev->pdev = pdev;
2585	adev->flags = flags;
2586	adev->asic_type = flags & AMD_ASIC_MASK;
2587	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
2588	if (amdgpu_emu_mode == 1)
2589		adev->usec_timeout *= 2;
2590	adev->gmc.gart_size = 512 * 1024 * 1024;
2591	adev->accel_working = false;
2592	adev->num_rings = 0;
2593	adev->mman.buffer_funcs = NULL;
2594	adev->mman.buffer_funcs_ring = NULL;
2595	adev->vm_manager.vm_pte_funcs = NULL;
2596	adev->vm_manager.vm_pte_num_rqs = 0;
2597	adev->gmc.gmc_funcs = NULL;
2598	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2599	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2600
2601	adev->smc_rreg = &amdgpu_invalid_rreg;
2602	adev->smc_wreg = &amdgpu_invalid_wreg;
2603	adev->pcie_rreg = &amdgpu_invalid_rreg;
2604	adev->pcie_wreg = &amdgpu_invalid_wreg;
2605	adev->pciep_rreg = &amdgpu_invalid_rreg;
2606	adev->pciep_wreg = &amdgpu_invalid_wreg;
2607	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2608	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
2609	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2610	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2611	adev->didt_rreg = &amdgpu_invalid_rreg;
2612	adev->didt_wreg = &amdgpu_invalid_wreg;
2613	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2614	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
2615	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2616	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2617
2618	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2619		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2620		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
2621
2622	/* mutex initialization are all done here so we
2623	 * can recall function without having locking issues */
2624	atomic_set(&adev->irq.ih.lock, 0);
2625	mutex_init(&adev->firmware.mutex);
2626	mutex_init(&adev->pm.mutex);
2627	mutex_init(&adev->gfx.gpu_clock_mutex);
2628	mutex_init(&adev->srbm_mutex);
2629	mutex_init(&adev->gfx.pipe_reserve_mutex);
2630	mutex_init(&adev->gfx.gfx_off_mutex);
2631	mutex_init(&adev->grbm_idx_mutex);
2632	mutex_init(&adev->mn_lock);
2633	mutex_init(&adev->virt.vf_errors.lock);
2634	hash_init(adev->mn_hash);
2635	mutex_init(&adev->lock_reset);
2636	mutex_init(&adev->virt.dpm_mutex);
2637	mutex_init(&adev->psp.mutex);
2638
2639	r = amdgpu_device_check_arguments(adev);
2640	if (r)
2641		return r;
2642
2643	spin_lock_init(&adev->mmio_idx_lock);
2644	spin_lock_init(&adev->smc_idx_lock);
2645	spin_lock_init(&adev->pcie_idx_lock);
2646	spin_lock_init(&adev->uvd_ctx_idx_lock);
2647	spin_lock_init(&adev->didt_idx_lock);
2648	spin_lock_init(&adev->gc_cac_idx_lock);
2649	spin_lock_init(&adev->se_cac_idx_lock);
2650	spin_lock_init(&adev->audio_endpt_idx_lock);
2651	spin_lock_init(&adev->mm_stats.lock);
2652
2653	INIT_LIST_HEAD(&adev->shadow_list);
2654	mutex_init(&adev->shadow_list_lock);
2655
2656	INIT_LIST_HEAD(&adev->ring_lru_list);
2657	spin_lock_init(&adev->ring_lru_list_lock);
2658
2659	INIT_DELAYED_WORK(&adev->delayed_init_work,
2660			  amdgpu_device_delayed_init_work_handler);
2661	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2662			  amdgpu_device_delay_enable_gfx_off);
2663
2664	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2665
2666	adev->gfx.gfx_off_req_count = 1;
2667	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2668
2669	/* Registers mapping */
2670	/* TODO: block userspace mapping of io register */
2671	if (adev->asic_type >= CHIP_BONAIRE) {
2672		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2673		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2674	} else {
2675		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2676		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2677	}
2678
2679	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2680	if (adev->rmmio == NULL) {
2681		return -ENOMEM;
2682	}
2683	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2684	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2685
2686	/* io port mapping */
2687	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2688		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2689			adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2690			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2691			break;
2692		}
2693	}
2694	if (adev->rio_mem == NULL)
2695		DRM_INFO("PCI I/O BAR is not found.\n");
2696
2697	/* enable PCIE atomic ops */
2698	r = pci_enable_atomic_ops_to_root(adev->pdev,
2699					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2700					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2701	if (r) {
2702		adev->have_atomics_support = false;
2703		DRM_INFO("PCIE atomic ops is not supported\n");
2704	} else {
2705		adev->have_atomics_support = true;
2706	}
2707
2708	amdgpu_device_get_pcie_info(adev);
2709
2710	if (amdgpu_mcbp)
2711		DRM_INFO("MCBP is enabled\n");
2712
2713	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2714		adev->enable_mes = true;
2715
2716	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
2717		r = amdgpu_discovery_init(adev);
2718		if (r) {
2719			dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2720			return r;
2721		}
2722	}
2723
2724	/* early init functions */
2725	r = amdgpu_device_ip_early_init(adev);
2726	if (r)
2727		return r;
2728
2729	/* doorbell bar mapping and doorbell index init*/
2730	amdgpu_device_doorbell_init(adev);
2731
2732	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2733	/* this will fail for cards that aren't VGA class devices, just
2734	 * ignore it */
2735	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
2736
2737	if (amdgpu_device_is_px(ddev))
2738		runtime = true;
2739	if (!pci_is_thunderbolt_attached(adev->pdev))
2740		vga_switcheroo_register_client(adev->pdev,
2741					       &amdgpu_switcheroo_ops, runtime);
2742	if (runtime)
2743		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2744
2745	if (amdgpu_emu_mode == 1) {
2746		/* post the asic on emulation mode */
2747		emu_soc_asic_init(adev);
2748		goto fence_driver_init;
2749	}
2750
2751	/* detect if we are with an SRIOV vbios */
2752	amdgpu_device_detect_sriov_bios(adev);
2753
2754	/* check if we need to reset the asic
2755	 *  E.g., driver was not cleanly unloaded previously, etc.
2756	 */
2757	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
2758		r = amdgpu_asic_reset(adev);
2759		if (r) {
2760			dev_err(adev->dev, "asic reset on init failed\n");
2761			goto failed;
2762		}
2763	}
2764
2765	/* Post card if necessary */
2766	if (amdgpu_device_need_post(adev)) {
2767		if (!adev->bios) {
2768			dev_err(adev->dev, "no vBIOS found\n");
2769			r = -EINVAL;
2770			goto failed;
2771		}
2772		DRM_INFO("GPU posting now...\n");
2773		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2774		if (r) {
2775			dev_err(adev->dev, "gpu post error!\n");
2776			goto failed;
2777		}
2778	}
2779
2780	if (adev->is_atom_fw) {
2781		/* Initialize clocks */
2782		r = amdgpu_atomfirmware_get_clock_info(adev);
2783		if (r) {
2784			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
2785			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2786			goto failed;
2787		}
2788	} else {
2789		/* Initialize clocks */
2790		r = amdgpu_atombios_get_clock_info(adev);
2791		if (r) {
2792			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
2793			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
2794			goto failed;
2795		}
2796		/* init i2c buses */
2797		if (!amdgpu_device_has_dc_support(adev))
2798			amdgpu_atombios_i2c_init(adev);
2799	}
2800
2801fence_driver_init:
2802	/* Fence driver */
2803	r = amdgpu_fence_driver_init(adev);
2804	if (r) {
2805		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
2806		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
2807		goto failed;
2808	}
2809
2810	/* init the mode config */
2811	drm_mode_config_init(adev->ddev);
2812
2813	r = amdgpu_device_ip_init(adev);
2814	if (r) {
2815		/* failed in exclusive mode due to timeout */
2816		if (amdgpu_sriov_vf(adev) &&
2817		    !amdgpu_sriov_runtime(adev) &&
2818		    amdgpu_virt_mmio_blocked(adev) &&
2819		    !amdgpu_virt_wait_reset(adev)) {
2820			dev_err(adev->dev, "VF exclusive mode timeout\n");
2821			/* Don't send request since VF is inactive. */
2822			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2823			adev->virt.ops = NULL;
2824			r = -EAGAIN;
2825			goto failed;
2826		}
2827		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
2828		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2829		if (amdgpu_virt_request_full_gpu(adev, false))
2830			amdgpu_virt_release_full_gpu(adev, false);
2831		goto failed;
2832	}
2833
2834	adev->accel_working = true;
2835
2836	amdgpu_vm_check_compute_bug(adev);
2837
2838	/* Initialize the buffer migration limit. */
2839	if (amdgpu_moverate >= 0)
2840		max_MBps = amdgpu_moverate;
2841	else
2842		max_MBps = 8; /* Allow 8 MB/s. */
2843	/* Get a log2 for easy divisions. */
2844	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2845
2846	amdgpu_fbdev_init(adev);
2847
2848	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2849		amdgpu_pm_virt_sysfs_init(adev);
2850
2851	r = amdgpu_pm_sysfs_init(adev);
2852	if (r)
2853		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2854
2855	r = amdgpu_ucode_sysfs_init(adev);
2856	if (r)
2857		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2858
2859	r = amdgpu_debugfs_gem_init(adev);
2860	if (r)
2861		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
2862
2863	r = amdgpu_debugfs_regs_init(adev);
2864	if (r)
2865		DRM_ERROR("registering register debugfs failed (%d).\n", r);
2866
2867	r = amdgpu_debugfs_firmware_init(adev);
2868	if (r)
2869		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
2870
2871	r = amdgpu_debugfs_init(adev);
2872	if (r)
2873		DRM_ERROR("Creating debugfs files failed (%d).\n", r);
2874
2875	if ((amdgpu_testing & 1)) {
2876		if (adev->accel_working)
2877			amdgpu_test_moves(adev);
2878		else
2879			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2880	}
2881	if (amdgpu_benchmarking) {
2882		if (adev->accel_working)
2883			amdgpu_benchmark(adev, amdgpu_benchmarking);
2884		else
2885			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2886	}
2887
2888	/*
2889	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
2890	 * Otherwise the mgpu fan boost feature will be skipped due to the
2891	 * gpu instance is counted less.
2892	 */
2893	amdgpu_register_gpu_instance(adev);
2894
2895	/* enable clockgating, etc. after ib tests, etc. since some blocks require
2896	 * explicit gating rather than handling it automatically.
2897	 */
2898	r = amdgpu_device_ip_late_init(adev);
2899	if (r) {
2900		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
2901		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
2902		goto failed;
2903	}
2904
2905	/* must succeed. */
2906	amdgpu_ras_resume(adev);
2907
2908	queue_delayed_work(system_wq, &adev->delayed_init_work,
2909			   msecs_to_jiffies(AMDGPU_RESUME_MS));
2910
2911	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2912	if (r) {
2913		dev_err(adev->dev, "Could not create pcie_replay_count");
2914		return r;
2915	}
2916
2917	if (IS_ENABLED(CONFIG_PERF_EVENTS))
2918		r = amdgpu_pmu_init(adev);
2919	if (r)
2920		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2921
2922	return 0;
2923
2924failed:
2925	amdgpu_vf_error_trans_all(adev);
2926	if (runtime)
2927		vga_switcheroo_fini_domain_pm_ops(adev->dev);
2928
2929	return r;
2930}
2931
2932/**
2933 * amdgpu_device_fini - tear down the driver
2934 *
2935 * @adev: amdgpu_device pointer
2936 *
2937 * Tear down the driver info (all asics).
2938 * Called at driver shutdown.
2939 */
2940void amdgpu_device_fini(struct amdgpu_device *adev)
2941{
2942	int r;
2943
2944	DRM_INFO("amdgpu: finishing device.\n");
2945	adev->shutdown = true;
2946	/* disable all interrupts */
2947	amdgpu_irq_disable_all(adev);
2948	if (adev->mode_info.mode_config_initialized){
2949		if (!amdgpu_device_has_dc_support(adev))
2950			drm_helper_force_disable_all(adev->ddev);
2951		else
2952			drm_atomic_helper_shutdown(adev->ddev);
2953	}
2954	amdgpu_fence_driver_fini(adev);
2955	amdgpu_pm_sysfs_fini(adev);
2956	amdgpu_fbdev_fini(adev);
2957	r = amdgpu_device_ip_fini(adev);
2958	if (adev->firmware.gpu_info_fw) {
2959		release_firmware(adev->firmware.gpu_info_fw);
2960		adev->firmware.gpu_info_fw = NULL;
2961	}
2962	adev->accel_working = false;
2963	cancel_delayed_work_sync(&adev->delayed_init_work);
2964	/* free i2c buses */
2965	if (!amdgpu_device_has_dc_support(adev))
2966		amdgpu_i2c_fini(adev);
2967
2968	if (amdgpu_emu_mode != 1)
2969		amdgpu_atombios_fini(adev);
2970
2971	kfree(adev->bios);
2972	adev->bios = NULL;
2973	if (!pci_is_thunderbolt_attached(adev->pdev))
2974		vga_switcheroo_unregister_client(adev->pdev);
2975	if (adev->flags & AMD_IS_PX)
2976		vga_switcheroo_fini_domain_pm_ops(adev->dev);
2977	vga_client_register(adev->pdev, NULL, NULL, NULL);
2978	if (adev->rio_mem)
2979		pci_iounmap(adev->pdev, adev->rio_mem);
2980	adev->rio_mem = NULL;
2981	iounmap(adev->rmmio);
2982	adev->rmmio = NULL;
2983	amdgpu_device_doorbell_fini(adev);
2984	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2985		amdgpu_pm_virt_sysfs_fini(adev);
2986
2987	amdgpu_debugfs_regs_cleanup(adev);
2988	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
2989	amdgpu_ucode_sysfs_fini(adev);
2990	if (IS_ENABLED(CONFIG_PERF_EVENTS))
2991		amdgpu_pmu_fini(adev);
2992	amdgpu_debugfs_preempt_cleanup(adev);
2993	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
2994		amdgpu_discovery_fini(adev);
2995}
2996
2997
2998/*
2999 * Suspend & resume.
3000 */
3001/**
3002 * amdgpu_device_suspend - initiate device suspend
3003 *
3004 * @dev: drm dev pointer
3005 * @suspend: suspend state
3006 * @fbcon : notify the fbdev of suspend
3007 *
3008 * Puts the hw in the suspend state (all asics).
3009 * Returns 0 for success or an error on failure.
3010 * Called at driver suspend.
3011 */
3012int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
3013{
3014	struct amdgpu_device *adev;
3015	struct drm_crtc *crtc;
3016	struct drm_connector *connector;
3017	int r;
3018
3019	if (dev == NULL || dev->dev_private == NULL) {
3020		return -ENODEV;
3021	}
3022
3023	adev = dev->dev_private;
3024
3025	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3026		return 0;
3027
3028	adev->in_suspend = true;
3029	drm_kms_helper_poll_disable(dev);
3030
3031	if (fbcon)
3032		amdgpu_fbdev_set_suspend(adev, 1);
3033
3034	cancel_delayed_work_sync(&adev->delayed_init_work);
3035
3036	if (!amdgpu_device_has_dc_support(adev)) {
3037		/* turn off display hw */
3038		drm_modeset_lock_all(dev);
3039		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3040			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
3041		}
3042		drm_modeset_unlock_all(dev);
3043			/* unpin the front buffers and cursors */
3044		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3045			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3046			struct drm_framebuffer *fb = crtc->primary->fb;
3047			struct amdgpu_bo *robj;
3048
3049			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3050				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3051				r = amdgpu_bo_reserve(aobj, true);
3052				if (r == 0) {
3053					amdgpu_bo_unpin(aobj);
3054					amdgpu_bo_unreserve(aobj);
3055				}
3056			}
3057
3058			if (fb == NULL || fb->obj[0] == NULL) {
3059				continue;
3060			}
3061			robj = gem_to_amdgpu_bo(fb->obj[0]);
3062			/* don't unpin kernel fb objects */
3063			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3064				r = amdgpu_bo_reserve(robj, true);
3065				if (r == 0) {
3066					amdgpu_bo_unpin(robj);
3067					amdgpu_bo_unreserve(robj);
3068				}
3069			}
3070		}
3071	}
3072
3073	amdgpu_amdkfd_suspend(adev);
3074
3075	amdgpu_ras_suspend(adev);
3076
3077	r = amdgpu_device_ip_suspend_phase1(adev);
3078
3079	/* evict vram memory */
3080	amdgpu_bo_evict_vram(adev);
3081
3082	amdgpu_fence_driver_suspend(adev);
3083
3084	r = amdgpu_device_ip_suspend_phase2(adev);
3085
3086	/* evict remaining vram memory
3087	 * This second call to evict vram is to evict the gart page table
3088	 * using the CPU.
3089	 */
3090	amdgpu_bo_evict_vram(adev);
3091
3092	pci_save_state(dev->pdev);
3093	if (suspend) {
3094		/* Shut down the device */
3095		pci_disable_device(dev->pdev);
3096		pci_set_power_state(dev->pdev, PCI_D3hot);
3097	} else {
3098		r = amdgpu_asic_reset(adev);
3099		if (r)
3100			DRM_ERROR("amdgpu asic reset failed\n");
3101	}
3102
3103	return 0;
3104}
3105
3106/**
3107 * amdgpu_device_resume - initiate device resume
3108 *
3109 * @dev: drm dev pointer
3110 * @resume: resume state
3111 * @fbcon : notify the fbdev of resume
3112 *
3113 * Bring the hw back to operating state (all asics).
3114 * Returns 0 for success or an error on failure.
3115 * Called at driver resume.
3116 */
3117int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
3118{
3119	struct drm_connector *connector;
3120	struct amdgpu_device *adev = dev->dev_private;
3121	struct drm_crtc *crtc;
3122	int r = 0;
3123
3124	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3125		return 0;
3126
3127	if (resume) {
3128		pci_set_power_state(dev->pdev, PCI_D0);
3129		pci_restore_state(dev->pdev);
3130		r = pci_enable_device(dev->pdev);
3131		if (r)
3132			return r;
3133	}
3134
3135	/* post card */
3136	if (amdgpu_device_need_post(adev)) {
3137		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3138		if (r)
3139			DRM_ERROR("amdgpu asic init failed\n");
3140	}
3141
3142	r = amdgpu_device_ip_resume(adev);
3143	if (r) {
3144		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
3145		return r;
3146	}
3147	amdgpu_fence_driver_resume(adev);
3148
3149
3150	r = amdgpu_device_ip_late_init(adev);
3151	if (r)
3152		return r;
3153
3154	queue_delayed_work(system_wq, &adev->delayed_init_work,
3155			   msecs_to_jiffies(AMDGPU_RESUME_MS));
3156
3157	if (!amdgpu_device_has_dc_support(adev)) {
3158		/* pin cursors */
3159		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3160			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3161
3162			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
3163				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3164				r = amdgpu_bo_reserve(aobj, true);
3165				if (r == 0) {
3166					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3167					if (r != 0)
3168						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3169					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3170					amdgpu_bo_unreserve(aobj);
3171				}
3172			}
3173		}
3174	}
3175	r = amdgpu_amdkfd_resume(adev);
3176	if (r)
3177		return r;
3178
3179	/* Make sure IB tests flushed */
3180	flush_delayed_work(&adev->delayed_init_work);
3181
3182	/* blat the mode back in */
3183	if (fbcon) {
3184		if (!amdgpu_device_has_dc_support(adev)) {
3185			/* pre DCE11 */
3186			drm_helper_resume_force_mode(dev);
3187
3188			/* turn on display hw */
3189			drm_modeset_lock_all(dev);
3190			list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3191				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3192			}
3193			drm_modeset_unlock_all(dev);
3194		}
3195		amdgpu_fbdev_set_suspend(adev, 0);
3196	}
3197
3198	drm_kms_helper_poll_enable(dev);
3199
3200	amdgpu_ras_resume(adev);
3201
3202	/*
3203	 * Most of the connector probing functions try to acquire runtime pm
3204	 * refs to ensure that the GPU is powered on when connector polling is
3205	 * performed. Since we're calling this from a runtime PM callback,
3206	 * trying to acquire rpm refs will cause us to deadlock.
3207	 *
3208	 * Since we're guaranteed to be holding the rpm lock, it's safe to
3209	 * temporarily disable the rpm helpers so this doesn't deadlock us.
3210	 */
3211#ifdef CONFIG_PM
3212	dev->dev->power.disable_depth++;
3213#endif
3214	if (!amdgpu_device_has_dc_support(adev))
3215		drm_helper_hpd_irq_event(dev);
3216	else
3217		drm_kms_helper_hotplug_event(dev);
3218#ifdef CONFIG_PM
3219	dev->dev->power.disable_depth--;
3220#endif
3221	adev->in_suspend = false;
3222
3223	return 0;
3224}
3225
3226/**
3227 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3228 *
3229 * @adev: amdgpu_device pointer
3230 *
3231 * The list of all the hardware IPs that make up the asic is walked and
3232 * the check_soft_reset callbacks are run.  check_soft_reset determines
3233 * if the asic is still hung or not.
3234 * Returns true if any of the IPs are still in a hung state, false if not.
3235 */
3236static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
3237{
3238	int i;
3239	bool asic_hang = false;
3240
3241	if (amdgpu_sriov_vf(adev))
3242		return true;
3243
3244	if (amdgpu_asic_need_full_reset(adev))
3245		return true;
3246
3247	for (i = 0; i < adev->num_ip_blocks; i++) {
3248		if (!adev->ip_blocks[i].status.valid)
3249			continue;
3250		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3251			adev->ip_blocks[i].status.hang =
3252				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3253		if (adev->ip_blocks[i].status.hang) {
3254			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
3255			asic_hang = true;
3256		}
3257	}
3258	return asic_hang;
3259}
3260
3261/**
3262 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3263 *
3264 * @adev: amdgpu_device pointer
3265 *
3266 * The list of all the hardware IPs that make up the asic is walked and the
3267 * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
3268 * handles any IP specific hardware or software state changes that are
3269 * necessary for a soft reset to succeed.
3270 * Returns 0 on success, negative error code on failure.
3271 */
3272static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
3273{
3274	int i, r = 0;
3275
3276	for (i = 0; i < adev->num_ip_blocks; i++) {
3277		if (!adev->ip_blocks[i].status.valid)
3278			continue;
3279		if (adev->ip_blocks[i].status.hang &&
3280		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3281			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
3282			if (r)
3283				return r;
3284		}
3285	}
3286
3287	return 0;
3288}
3289
3290/**
3291 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3292 *
3293 * @adev: amdgpu_device pointer
3294 *
3295 * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
3296 * reset is necessary to recover.
3297 * Returns true if a full asic reset is required, false if not.
3298 */
3299static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
3300{
3301	int i;
3302
3303	if (amdgpu_asic_need_full_reset(adev))
3304		return true;
3305
3306	for (i = 0; i < adev->num_ip_blocks; i++) {
3307		if (!adev->ip_blocks[i].status.valid)
3308			continue;
3309		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3310		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3311		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
3312		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3313		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3314			if (adev->ip_blocks[i].status.hang) {
3315				DRM_INFO("Some block need full reset!\n");
3316				return true;
3317			}
3318		}
3319	}
3320	return false;
3321}
3322
3323/**
3324 * amdgpu_device_ip_soft_reset - do a soft reset
3325 *
3326 * @adev: amdgpu_device pointer
3327 *
3328 * The list of all the hardware IPs that make up the asic is walked and the
3329 * soft_reset callbacks are run if the block is hung.  soft_reset handles any
3330 * IP specific hardware or software state changes that are necessary to soft
3331 * reset the IP.
3332 * Returns 0 on success, negative error code on failure.
3333 */
3334static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
3335{
3336	int i, r = 0;
3337
3338	for (i = 0; i < adev->num_ip_blocks; i++) {
3339		if (!adev->ip_blocks[i].status.valid)
3340			continue;
3341		if (adev->ip_blocks[i].status.hang &&
3342		    adev->ip_blocks[i].version->funcs->soft_reset) {
3343			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
3344			if (r)
3345				return r;
3346		}
3347	}
3348
3349	return 0;
3350}
3351
3352/**
3353 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3354 *
3355 * @adev: amdgpu_device pointer
3356 *
3357 * The list of all the hardware IPs that make up the asic is walked and the
3358 * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
3359 * handles any IP specific hardware or software state changes that are
3360 * necessary after the IP has been soft reset.
3361 * Returns 0 on success, negative error code on failure.
3362 */
3363static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
3364{
3365	int i, r = 0;
3366
3367	for (i = 0; i < adev->num_ip_blocks; i++) {
3368		if (!adev->ip_blocks[i].status.valid)
3369			continue;
3370		if (adev->ip_blocks[i].status.hang &&
3371		    adev->ip_blocks[i].version->funcs->post_soft_reset)
3372			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
3373		if (r)
3374			return r;
3375	}
3376
3377	return 0;
3378}
3379
3380/**
3381 * amdgpu_device_recover_vram - Recover some VRAM contents
3382 *
3383 * @adev: amdgpu_device pointer
3384 *
3385 * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
3386 * restore things like GPUVM page tables after a GPU reset where
3387 * the contents of VRAM might be lost.
3388 *
3389 * Returns:
3390 * 0 on success, negative error code on failure.
3391 */
3392static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
3393{
3394	struct dma_fence *fence = NULL, *next = NULL;
3395	struct amdgpu_bo *shadow;
3396	long r = 1, tmo;
3397
3398	if (amdgpu_sriov_runtime(adev))
3399		tmo = msecs_to_jiffies(8000);
3400	else
3401		tmo = msecs_to_jiffies(100);
3402
3403	DRM_INFO("recover vram bo from shadow start\n");
3404	mutex_lock(&adev->shadow_list_lock);
3405	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3406
3407		/* No need to recover an evicted BO */
3408		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3409		    shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
3410		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3411			continue;
3412
3413		r = amdgpu_bo_restore_shadow(shadow, &next);
3414		if (r)
3415			break;
3416
3417		if (fence) {
3418			tmo = dma_fence_wait_timeout(fence, false, tmo);
3419			dma_fence_put(fence);
3420			fence = next;
3421			if (tmo == 0) {
3422				r = -ETIMEDOUT;
3423				break;
3424			} else if (tmo < 0) {
3425				r = tmo;
3426				break;
3427			}
3428		} else {
3429			fence = next;
3430		}
3431	}
3432	mutex_unlock(&adev->shadow_list_lock);
3433
3434	if (fence)
3435		tmo = dma_fence_wait_timeout(fence, false, tmo);
3436	dma_fence_put(fence);
3437
3438	if (r < 0 || tmo <= 0) {
3439		DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
3440		return -EIO;
3441	}
3442
3443	DRM_INFO("recover vram bo from shadow done\n");
3444	return 0;
3445}
3446
3447
3448/**
3449 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
3450 *
3451 * @adev: amdgpu device pointer
3452 * @from_hypervisor: request from hypervisor
3453 *
3454 * do VF FLR and reinitialize Asic
3455 * return 0 means succeeded otherwise failed
3456 */
3457static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3458				     bool from_hypervisor)
3459{
3460	int r;
3461
3462	if (from_hypervisor)
3463		r = amdgpu_virt_request_full_gpu(adev, true);
3464	else
3465		r = amdgpu_virt_reset_gpu(adev);
3466	if (r)
3467		return r;
3468
3469	amdgpu_amdkfd_pre_reset(adev);
3470
3471	/* Resume IP prior to SMC */
3472	r = amdgpu_device_ip_reinit_early_sriov(adev);
3473	if (r)
3474		goto error;
3475
3476	/* we need recover gart prior to run SMC/CP/SDMA resume */
3477	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
3478
3479	r = amdgpu_device_fw_loading(adev);
3480	if (r)
3481		return r;
3482
3483	/* now we are okay to resume SMC/CP/SDMA */
3484	r = amdgpu_device_ip_reinit_late_sriov(adev);
3485	if (r)
3486		goto error;
3487
3488	amdgpu_irq_gpu_reset_resume_helper(adev);
3489	r = amdgpu_ib_ring_tests(adev);
3490	amdgpu_amdkfd_post_reset(adev);
3491
3492error:
3493	amdgpu_virt_init_data_exchange(adev);
3494	amdgpu_virt_release_full_gpu(adev, true);
3495	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3496		amdgpu_inc_vram_lost(adev);
3497		r = amdgpu_device_recover_vram(adev);
3498	}
3499
3500	return r;
3501}
3502
3503/**
3504 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3505 *
3506 * @adev: amdgpu device pointer
3507 *
3508 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3509 * a hung GPU.
3510 */
3511bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3512{
3513	if (!amdgpu_device_ip_check_soft_reset(adev)) {
3514		DRM_INFO("Timeout, but no hardware hang detected.\n");
3515		return false;
3516	}
3517
3518	if (amdgpu_gpu_recovery == 0)
3519		goto disabled;
3520
3521	if (amdgpu_sriov_vf(adev))
3522		return true;
3523
3524	if (amdgpu_gpu_recovery == -1) {
3525		switch (adev->asic_type) {
3526		case CHIP_BONAIRE:
3527		case CHIP_HAWAII:
3528		case CHIP_TOPAZ:
3529		case CHIP_TONGA:
3530		case CHIP_FIJI:
3531		case CHIP_POLARIS10:
3532		case CHIP_POLARIS11:
3533		case CHIP_POLARIS12:
3534		case CHIP_VEGAM:
3535		case CHIP_VEGA20:
3536		case CHIP_VEGA10:
3537		case CHIP_VEGA12:
3538		case CHIP_RAVEN:
3539			break;
3540		default:
3541			goto disabled;
3542		}
3543	}
3544
3545	return true;
3546
3547disabled:
3548		DRM_INFO("GPU recovery disabled.\n");
3549		return false;
3550}
3551
3552
3553static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3554					struct amdgpu_job *job,
3555					bool *need_full_reset_arg)
3556{
3557	int i, r = 0;
3558	bool need_full_reset  = *need_full_reset_arg;
3559
3560	/* block all schedulers and reset given job's ring */
3561	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3562		struct amdgpu_ring *ring = adev->rings[i];
3563
3564		if (!ring || !ring->sched.thread)
3565			continue;
3566
3567		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3568		amdgpu_fence_driver_force_completion(ring);
3569	}
3570
3571	if(job)
3572		drm_sched_increase_karma(&job->base);
3573
3574	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
3575	if (!amdgpu_sriov_vf(adev)) {
3576
3577		if (!need_full_reset)
3578			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3579
3580		if (!need_full_reset) {
3581			amdgpu_device_ip_pre_soft_reset(adev);
3582			r = amdgpu_device_ip_soft_reset(adev);
3583			amdgpu_device_ip_post_soft_reset(adev);
3584			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3585				DRM_INFO("soft reset failed, will fallback to full reset!\n");
3586				need_full_reset = true;
3587			}
3588		}
3589
3590		if (need_full_reset)
3591			r = amdgpu_device_ip_suspend(adev);
3592
3593		*need_full_reset_arg = need_full_reset;
3594	}
3595
3596	return r;
3597}
3598
3599static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3600			       struct list_head *device_list_handle,
3601			       bool *need_full_reset_arg)
3602{
3603	struct amdgpu_device *tmp_adev = NULL;
3604	bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3605	int r = 0;
3606
3607	/*
3608	 * ASIC reset has to be done on all HGMI hive nodes ASAP
3609	 * to allow proper links negotiation in FW (within 1 sec)
3610	 */
3611	if (need_full_reset) {
3612		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3613			/* For XGMI run all resets in parallel to speed up the process */
3614			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3615				if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3616					r = -EALREADY;
3617			} else
3618				r = amdgpu_asic_reset(tmp_adev);
3619
3620			if (r) {
3621				DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3622					 r, tmp_adev->ddev->unique);
3623				break;
3624			}
3625		}
3626
3627		/* For XGMI wait for all PSP resets to complete before proceed */
3628		if (!r) {
3629			list_for_each_entry(tmp_adev, device_list_handle,
3630					    gmc.xgmi.head) {
3631				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3632					flush_work(&tmp_adev->xgmi_reset_work);
3633					r = tmp_adev->asic_reset_res;
3634					if (r)
3635						break;
3636				}
3637			}
3638
3639			list_for_each_entry(tmp_adev, device_list_handle,
3640					gmc.xgmi.head) {
3641				amdgpu_ras_reserve_bad_pages(tmp_adev);
3642			}
3643		}
3644	}
3645
3646
3647	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3648		if (need_full_reset) {
3649			/* post card */
3650			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3651				DRM_WARN("asic atom init failed!");
3652
3653			if (!r) {
3654				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3655				r = amdgpu_device_ip_resume_phase1(tmp_adev);
3656				if (r)
3657					goto out;
3658
3659				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3660				if (vram_lost) {
3661					DRM_INFO("VRAM is lost due to GPU reset!\n");
3662					amdgpu_inc_vram_lost(tmp_adev);
3663				}
3664
3665				r = amdgpu_gtt_mgr_recover(
3666					&tmp_adev->mman.bdev.man[TTM_PL_TT]);
3667				if (r)
3668					goto out;
3669
3670				r = amdgpu_device_fw_loading(tmp_adev);
3671				if (r)
3672					return r;
3673
3674				r = amdgpu_device_ip_resume_phase2(tmp_adev);
3675				if (r)
3676					goto out;
3677
3678				if (vram_lost)
3679					amdgpu_device_fill_reset_magic(tmp_adev);
3680
3681				/*
3682				 * Add this ASIC as tracked as reset was already
3683				 * complete successfully.
3684				 */
3685				amdgpu_register_gpu_instance(tmp_adev);
3686
3687				r = amdgpu_device_ip_late_init(tmp_adev);
3688				if (r)
3689					goto out;
3690
3691				/* must succeed. */
3692				amdgpu_ras_resume(tmp_adev);
3693
3694				/* Update PSP FW topology after reset */
3695				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3696					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3697			}
3698		}
3699
3700
3701out:
3702		if (!r) {
3703			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3704			r = amdgpu_ib_ring_tests(tmp_adev);
3705			if (r) {
3706				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3707				r = amdgpu_device_ip_suspend(tmp_adev);
3708				need_full_reset = true;
3709				r = -EAGAIN;
3710				goto end;
3711			}
3712		}
3713
3714		if (!r)
3715			r = amdgpu_device_recover_vram(tmp_adev);
3716		else
3717			tmp_adev->asic_reset_res = r;
3718	}
3719
3720end:
3721	*need_full_reset_arg = need_full_reset;
3722	return r;
3723}
3724
3725static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
3726{
3727	if (trylock) {
3728		if (!mutex_trylock(&adev->lock_reset))
3729			return false;
3730	} else
3731		mutex_lock(&adev->lock_reset);
3732
3733	atomic_inc(&adev->gpu_reset_counter);
3734	adev->in_gpu_reset = 1;
3735	switch (amdgpu_asic_reset_method(adev)) {
3736	case AMD_RESET_METHOD_MODE1:
3737		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3738		break;
3739	case AMD_RESET_METHOD_MODE2:
3740		adev->mp1_state = PP_MP1_STATE_RESET;
3741		break;
3742	default:
3743		adev->mp1_state = PP_MP1_STATE_NONE;
3744		break;
3745	}
3746	/* Block kfd: SRIOV would do it separately */
3747	if (!amdgpu_sriov_vf(adev))
3748                amdgpu_amdkfd_pre_reset(adev);
3749
3750	return true;
3751}
3752
3753static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3754{
3755	/*unlock kfd: SRIOV would do it separately */
3756	if (!amdgpu_sriov_vf(adev))
3757                amdgpu_amdkfd_post_reset(adev);
3758	amdgpu_vf_error_trans_all(adev);
3759	adev->mp1_state = PP_MP1_STATE_NONE;
3760	adev->in_gpu_reset = 0;
3761	mutex_unlock(&adev->lock_reset);
3762}
3763
3764
3765/**
3766 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3767 *
3768 * @adev: amdgpu device pointer
3769 * @job: which job trigger hang
3770 *
3771 * Attempt to reset the GPU if it has hung (all asics).
3772 * Attempt to do soft-reset or full-reset and reinitialize Asic
3773 * Returns 0 for success or an error on failure.
3774 */
3775
3776int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3777			      struct amdgpu_job *job)
3778{
3779	struct list_head device_list, *device_list_handle =  NULL;
3780	bool need_full_reset, job_signaled;
3781	struct amdgpu_hive_info *hive = NULL;
3782	struct amdgpu_device *tmp_adev = NULL;
3783	int i, r = 0;
3784
3785	need_full_reset = job_signaled = false;
3786	INIT_LIST_HEAD(&device_list);
3787
3788	dev_info(adev->dev, "GPU reset begin!\n");
3789
3790	cancel_delayed_work_sync(&adev->delayed_init_work);
3791
3792	hive = amdgpu_get_xgmi_hive(adev, false);
3793
3794	/*
3795	 * Here we trylock to avoid chain of resets executing from
3796	 * either trigger by jobs on different adevs in XGMI hive or jobs on
3797	 * different schedulers for same device while this TO handler is running.
3798	 * We always reset all schedulers for device and all devices for XGMI
3799	 * hive so that should take care of them too.
3800	 */
3801
3802	if (hive && !mutex_trylock(&hive->reset_lock)) {
3803		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3804			  job ? job->base.id : -1, hive->hive_id);
3805		return 0;
3806	}
3807
3808	/* Start with adev pre asic reset first for soft reset check.*/
3809	if (!amdgpu_device_lock_adev(adev, !hive)) {
3810		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3811			  job ? job->base.id : -1);
3812		return 0;
3813	}
3814
3815	/* Build list of devices to reset */
3816	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
3817		if (!hive) {
3818			amdgpu_device_unlock_adev(adev);
3819			return -ENODEV;
3820		}
3821
3822		/*
3823		 * In case we are in XGMI hive mode device reset is done for all the
3824		 * nodes in the hive to retrain all XGMI links and hence the reset
3825		 * sequence is executed in loop on all nodes.
3826		 */
3827		device_list_handle = &hive->device_list;
3828	} else {
3829		list_add_tail(&adev->gmc.xgmi.head, &device_list);
3830		device_list_handle = &device_list;
3831	}
3832
3833	/*
3834	 * Mark these ASICs to be reseted as untracked first
3835	 * And add them back after reset completed
3836	 */
3837	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
3838		amdgpu_unregister_gpu_instance(tmp_adev);
3839
3840	/* block all schedulers and reset given job's ring */
3841	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3842		/* disable ras on ALL IPs */
3843		if (amdgpu_device_ip_need_full_reset(tmp_adev))
3844			amdgpu_ras_suspend(tmp_adev);
3845
3846		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3847			struct amdgpu_ring *ring = tmp_adev->rings[i];
3848
3849			if (!ring || !ring->sched.thread)
3850				continue;
3851
3852			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
3853		}
3854	}
3855
3856
3857	/*
3858	 * Must check guilty signal here since after this point all old
3859	 * HW fences are force signaled.
3860	 *
3861	 * job->base holds a reference to parent fence
3862	 */
3863	if (job && job->base.s_fence->parent &&
3864	    dma_fence_is_signaled(job->base.s_fence->parent))
3865		job_signaled = true;
3866
3867	if (!amdgpu_device_ip_need_full_reset(adev))
3868		device_list_handle = &device_list;
3869
3870	if (job_signaled) {
3871		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3872		goto skip_hw_reset;
3873	}
3874
3875
3876	/* Guilty job will be freed after this*/
3877	r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
3878	if (r) {
3879		/*TODO Should we stop ?*/
3880		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3881			  r, adev->ddev->unique);
3882		adev->asic_reset_res = r;
3883	}
3884
3885retry:	/* Rest of adevs pre asic reset from XGMI hive. */
3886	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3887
3888		if (tmp_adev == adev)
3889			continue;
3890
3891		amdgpu_device_lock_adev(tmp_adev, false);
3892		r = amdgpu_device_pre_asic_reset(tmp_adev,
3893						 NULL,
3894						 &need_full_reset);
3895		/*TODO Should we stop ?*/
3896		if (r) {
3897			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3898				  r, tmp_adev->ddev->unique);
3899			tmp_adev->asic_reset_res = r;
3900		}
3901	}
3902
3903	/* Actual ASIC resets if needed.*/
3904	/* TODO Implement XGMI hive reset logic for SRIOV */
3905	if (amdgpu_sriov_vf(adev)) {
3906		r = amdgpu_device_reset_sriov(adev, job ? false : true);
3907		if (r)
3908			adev->asic_reset_res = r;
3909	} else {
3910		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3911		if (r && r == -EAGAIN)
3912			goto retry;
3913	}
3914
3915skip_hw_reset:
3916
3917	/* Post ASIC reset for all devs .*/
3918	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3919		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3920			struct amdgpu_ring *ring = tmp_adev->rings[i];
3921
3922			if (!ring || !ring->sched.thread)
3923				continue;
3924
3925			/* No point to resubmit jobs if we didn't HW reset*/
3926			if (!tmp_adev->asic_reset_res && !job_signaled)
3927				drm_sched_resubmit_jobs(&ring->sched);
3928
3929			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3930		}
3931
3932		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3933			drm_helper_resume_force_mode(tmp_adev->ddev);
3934		}
3935
3936		tmp_adev->asic_reset_res = 0;
3937
3938		if (r) {
3939			/* bad news, how to tell it to userspace ? */
3940			dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3941			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3942		} else {
3943			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3944		}
3945
3946		amdgpu_device_unlock_adev(tmp_adev);
3947	}
3948
3949	if (hive)
3950		mutex_unlock(&hive->reset_lock);
3951
3952	if (r)
3953		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
3954	return r;
3955}
3956
3957/**
3958 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3959 *
3960 * @adev: amdgpu_device pointer
3961 *
3962 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3963 * and lanes) of the slot the device is in. Handles APUs and
3964 * virtualized environments where PCIE config space may not be available.
3965 */
3966static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3967{
3968	struct pci_dev *pdev;
3969	enum pci_bus_speed speed_cap, platform_speed_cap;
3970	enum pcie_link_width platform_link_width;
3971
3972	if (amdgpu_pcie_gen_cap)
3973		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
3974
3975	if (amdgpu_pcie_lane_cap)
3976		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
3977
3978	/* covers APUs as well */
3979	if (pci_is_root_bus(adev->pdev->bus)) {
3980		if (adev->pm.pcie_gen_mask == 0)
3981			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3982		if (adev->pm.pcie_mlw_mask == 0)
3983			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
3984		return;
3985	}
3986
3987	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3988		return;
3989
3990	pcie_bandwidth_available(adev->pdev, NULL,
3991				 &platform_speed_cap, &platform_link_width);
3992
3993	if (adev->pm.pcie_gen_mask == 0) {
3994		/* asic caps */
3995		pdev = adev->pdev;
3996		speed_cap = pcie_get_speed_cap(pdev);
3997		if (speed_cap == PCI_SPEED_UNKNOWN) {
3998			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3999						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4000						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4001		} else {
4002			if (speed_cap == PCIE_SPEED_16_0GT)
4003				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4004							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4005							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4006							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4007			else if (speed_cap == PCIE_SPEED_8_0GT)
4008				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4009							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4010							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4011			else if (speed_cap == PCIE_SPEED_5_0GT)
4012				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4013							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4014			else
4015				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4016		}
4017		/* platform caps */
4018		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
4019			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4020						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4021		} else {
4022			if (platform_speed_cap == PCIE_SPEED_16_0GT)
4023				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4024							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4025							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4026							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
4027			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
4028				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4029							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4030							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
4031			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
4032				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4033							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4034			else
4035				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4036
4037		}
4038	}
4039	if (adev->pm.pcie_mlw_mask == 0) {
4040		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
4041			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4042		} else {
4043			switch (platform_link_width) {
4044			case PCIE_LNK_X32:
4045				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4046							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4047							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4048							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4049							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4050							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4051							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4052				break;
4053			case PCIE_LNK_X16:
4054				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4055							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4056							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4057							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4058							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4059							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4060				break;
4061			case PCIE_LNK_X12:
4062				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4063							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4064							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4065							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4066							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4067				break;
4068			case PCIE_LNK_X8:
4069				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4070							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4071							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4072							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4073				break;
4074			case PCIE_LNK_X4:
4075				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4076							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4077							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4078				break;
4079			case PCIE_LNK_X2:
4080				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4081							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4082				break;
4083			case PCIE_LNK_X1:
4084				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4085				break;
4086			default:
4087				break;
4088			}
4089		}
4090	}
4091}
4092