xe_device.c - drivers/gpu/drm/xe/xe_device.c - Linux source code v6.13.7

Note: File does not exist in v5.9.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2021 Intel Corporation
   4 */
   5
   6#include "xe_device.h"
   7
   8#include <linux/aperture.h>
   9#include <linux/delay.h>
  10#include <linux/fault-inject.h>
  11#include <linux/units.h>
  12
  13#include <drm/drm_atomic_helper.h>
  14#include <drm/drm_client.h>
  15#include <drm/drm_gem_ttm_helper.h>
  16#include <drm/drm_ioctl.h>
  17#include <drm/drm_managed.h>
  18#include <drm/drm_print.h>
  19#include <uapi/drm/xe_drm.h>
  20
  21#include "display/xe_display.h"
  22#include "instructions/xe_gpu_commands.h"
  23#include "regs/xe_gt_regs.h"
  24#include "regs/xe_regs.h"
  25#include "xe_bo.h"
  26#include "xe_debugfs.h"
  27#include "xe_devcoredump.h"
  28#include "xe_dma_buf.h"
  29#include "xe_drm_client.h"
  30#include "xe_drv.h"
  31#include "xe_exec.h"
  32#include "xe_exec_queue.h"
  33#include "xe_force_wake.h"
  34#include "xe_ggtt.h"
  35#include "xe_gsc_proxy.h"
  36#include "xe_gt.h"
  37#include "xe_gt_mcr.h"
  38#include "xe_gt_printk.h"
  39#include "xe_gt_sriov_vf.h"
  40#include "xe_guc.h"
  41#include "xe_hw_engine_group.h"
  42#include "xe_hwmon.h"
  43#include "xe_irq.h"
  44#include "xe_memirq.h"
  45#include "xe_mmio.h"
  46#include "xe_module.h"
  47#include "xe_observation.h"
  48#include "xe_pat.h"
  49#include "xe_pcode.h"
  50#include "xe_pm.h"
  51#include "xe_query.h"
  52#include "xe_sriov.h"
  53#include "xe_tile.h"
  54#include "xe_ttm_stolen_mgr.h"
  55#include "xe_ttm_sys_mgr.h"
  56#include "xe_vm.h"
  57#include "xe_vram.h"
  58#include "xe_wait_user_fence.h"
  59#include "xe_wa.h"
  60
  61#include <generated/xe_wa_oob.h>
  62
  63static int xe_file_open(struct drm_device *dev, struct drm_file *file)
  64{
  65	struct xe_device *xe = to_xe_device(dev);
  66	struct xe_drm_client *client;
  67	struct xe_file *xef;
  68	int ret = -ENOMEM;
  69	struct task_struct *task = NULL;
  70
  71	xef = kzalloc(sizeof(*xef), GFP_KERNEL);
  72	if (!xef)
  73		return ret;
  74
  75	client = xe_drm_client_alloc();
  76	if (!client) {
  77		kfree(xef);
  78		return ret;
  79	}
  80
  81	xef->drm = file;
  82	xef->client = client;
  83	xef->xe = xe;
  84
  85	mutex_init(&xef->vm.lock);
  86	xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
  87
  88	mutex_init(&xef->exec_queue.lock);
  89	xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
  90
  91	file->driver_priv = xef;
  92	kref_init(&xef->refcount);
  93
  94	task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
  95	if (task) {
  96		xef->process_name = kstrdup(task->comm, GFP_KERNEL);
  97		xef->pid = task->pid;
  98		put_task_struct(task);
  99	}
 100
 101	return 0;
 102}
 103
 104static void xe_file_destroy(struct kref *ref)
 105{
 106	struct xe_file *xef = container_of(ref, struct xe_file, refcount);
 107
 108	xa_destroy(&xef->exec_queue.xa);
 109	mutex_destroy(&xef->exec_queue.lock);
 110	xa_destroy(&xef->vm.xa);
 111	mutex_destroy(&xef->vm.lock);
 112
 113	xe_drm_client_put(xef->client);
 114	kfree(xef->process_name);
 115	kfree(xef);
 116}
 117
 118/**
 119 * xe_file_get() - Take a reference to the xe file object
 120 * @xef: Pointer to the xe file
 121 *
 122 * Anyone with a pointer to xef must take a reference to the xe file
 123 * object using this call.
 124 *
 125 * Return: xe file pointer
 126 */
 127struct xe_file *xe_file_get(struct xe_file *xef)
 128{
 129	kref_get(&xef->refcount);
 130	return xef;
 131}
 132
 133/**
 134 * xe_file_put() - Drop a reference to the xe file object
 135 * @xef: Pointer to the xe file
 136 *
 137 * Used to drop reference to the xef object
 138 */
 139void xe_file_put(struct xe_file *xef)
 140{
 141	kref_put(&xef->refcount, xe_file_destroy);
 142}
 143
 144static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 145{
 146	struct xe_device *xe = to_xe_device(dev);
 147	struct xe_file *xef = file->driver_priv;
 148	struct xe_vm *vm;
 149	struct xe_exec_queue *q;
 150	unsigned long idx;
 151
 152	xe_pm_runtime_get(xe);
 153
 154	/*
 155	 * No need for exec_queue.lock here as there is no contention for it
 156	 * when FD is closing as IOCTLs presumably can't be modifying the
 157	 * xarray. Taking exec_queue.lock here causes undue dependency on
 158	 * vm->lock taken during xe_exec_queue_kill().
 159	 */
 160	xa_for_each(&xef->exec_queue.xa, idx, q) {
 161		if (q->vm && q->hwe->hw_engine_group)
 162			xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
 163		xe_exec_queue_kill(q);
 164		xe_exec_queue_put(q);
 165	}
 166	xa_for_each(&xef->vm.xa, idx, vm)
 167		xe_vm_close_and_put(vm);
 168
 169	xe_file_put(xef);
 170
 171	xe_pm_runtime_put(xe);
 172}
 173
 174static const struct drm_ioctl_desc xe_ioctls[] = {
 175	DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
 176	DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
 177	DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
 178			  DRM_RENDER_ALLOW),
 179	DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
 180	DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 181	DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
 182	DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
 183	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
 184			  DRM_RENDER_ALLOW),
 185	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
 186			  DRM_RENDER_ALLOW),
 187	DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
 188			  DRM_RENDER_ALLOW),
 189	DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
 190			  DRM_RENDER_ALLOW),
 191	DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
 192};
 193
 194static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 195{
 196	struct drm_file *file_priv = file->private_data;
 197	struct xe_device *xe = to_xe_device(file_priv->minor->dev);
 198	long ret;
 199
 200	if (xe_device_wedged(xe))
 201		return -ECANCELED;
 202
 203	ret = xe_pm_runtime_get_ioctl(xe);
 204	if (ret >= 0)
 205		ret = drm_ioctl(file, cmd, arg);
 206	xe_pm_runtime_put(xe);
 207
 208	return ret;
 209}
 210
 211#ifdef CONFIG_COMPAT
 212static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 213{
 214	struct drm_file *file_priv = file->private_data;
 215	struct xe_device *xe = to_xe_device(file_priv->minor->dev);
 216	long ret;
 217
 218	if (xe_device_wedged(xe))
 219		return -ECANCELED;
 220
 221	ret = xe_pm_runtime_get_ioctl(xe);
 222	if (ret >= 0)
 223		ret = drm_compat_ioctl(file, cmd, arg);
 224	xe_pm_runtime_put(xe);
 225
 226	return ret;
 227}
 228#else
 229/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
 230#define xe_drm_compat_ioctl NULL
 231#endif
 232
 233static const struct file_operations xe_driver_fops = {
 234	.owner = THIS_MODULE,
 235	.open = drm_open,
 236	.release = drm_release_noglobal,
 237	.unlocked_ioctl = xe_drm_ioctl,
 238	.mmap = drm_gem_mmap,
 239	.poll = drm_poll,
 240	.read = drm_read,
 241	.compat_ioctl = xe_drm_compat_ioctl,
 242	.llseek = noop_llseek,
 243#ifdef CONFIG_PROC_FS
 244	.show_fdinfo = drm_show_fdinfo,
 245#endif
 246	.fop_flags = FOP_UNSIGNED_OFFSET,
 247};
 248
 249static struct drm_driver driver = {
 250	/* Don't use MTRRs here; the Xserver or userspace app should
 251	 * deal with them for Intel hardware.
 252	 */
 253	.driver_features =
 254	    DRIVER_GEM |
 255	    DRIVER_RENDER | DRIVER_SYNCOBJ |
 256	    DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
 257	.open = xe_file_open,
 258	.postclose = xe_file_close,
 259
 260	.gem_prime_import = xe_gem_prime_import,
 261
 262	.dumb_create = xe_bo_dumb_create,
 263	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
 264#ifdef CONFIG_PROC_FS
 265	.show_fdinfo = xe_drm_client_fdinfo,
 266#endif
 267	.ioctls = xe_ioctls,
 268	.num_ioctls = ARRAY_SIZE(xe_ioctls),
 269	.fops = &xe_driver_fops,
 270	.name = DRIVER_NAME,
 271	.desc = DRIVER_DESC,
 272	.date = DRIVER_DATE,
 273	.major = DRIVER_MAJOR,
 274	.minor = DRIVER_MINOR,
 275	.patchlevel = DRIVER_PATCHLEVEL,
 276};
 277
 278static void xe_device_destroy(struct drm_device *dev, void *dummy)
 279{
 280	struct xe_device *xe = to_xe_device(dev);
 281
 282	if (xe->preempt_fence_wq)
 283		destroy_workqueue(xe->preempt_fence_wq);
 284
 285	if (xe->ordered_wq)
 286		destroy_workqueue(xe->ordered_wq);
 287
 288	if (xe->unordered_wq)
 289		destroy_workqueue(xe->unordered_wq);
 290
 291	if (xe->destroy_wq)
 292		destroy_workqueue(xe->destroy_wq);
 293
 294	ttm_device_fini(&xe->ttm);
 295}
 296
 297struct xe_device *xe_device_create(struct pci_dev *pdev,
 298				   const struct pci_device_id *ent)
 299{
 300	struct xe_device *xe;
 301	int err;
 302
 303	xe_display_driver_set_hooks(&driver);
 304
 305	err = aperture_remove_conflicting_pci_devices(pdev, driver.name);
 306	if (err)
 307		return ERR_PTR(err);
 308
 309	xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
 310	if (IS_ERR(xe))
 311		return xe;
 312
 313	err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
 314			      xe->drm.anon_inode->i_mapping,
 315			      xe->drm.vma_offset_manager, false, false);
 316	if (WARN_ON(err))
 317		goto err;
 318
 319	err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
 320	if (err)
 321		goto err;
 322
 323	xe->info.devid = pdev->device;
 324	xe->info.revid = pdev->revision;
 325	xe->info.force_execlist = xe_modparam.force_execlist;
 326
 327	err = xe_irq_init(xe);
 328	if (err)
 329		goto err;
 330
 331	init_waitqueue_head(&xe->ufence_wq);
 332
 333	init_rwsem(&xe->usm.lock);
 334
 335	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
 336
 337	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
 338		/* Trigger a large asid and an early asid wrap. */
 339		u32 asid;
 340
 341		BUILD_BUG_ON(XE_MAX_ASID < 2);
 342		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
 343				      XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
 344				      &xe->usm.next_asid, GFP_KERNEL);
 345		drm_WARN_ON(&xe->drm, err);
 346		if (err >= 0)
 347			xa_erase(&xe->usm.asid_to_vm, asid);
 348	}
 349
 350	spin_lock_init(&xe->pinned.lock);
 351	INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
 352	INIT_LIST_HEAD(&xe->pinned.external_vram);
 353	INIT_LIST_HEAD(&xe->pinned.evicted);
 354
 355	xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
 356						       WQ_MEM_RECLAIM);
 357	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
 358	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
 359	xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
 360	if (!xe->ordered_wq || !xe->unordered_wq ||
 361	    !xe->preempt_fence_wq || !xe->destroy_wq) {
 362		/*
 363		 * Cleanup done in xe_device_destroy via
 364		 * drmm_add_action_or_reset register above
 365		 */
 366		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
 367		err = -ENOMEM;
 368		goto err;
 369	}
 370
 371	err = xe_display_create(xe);
 372	if (WARN_ON(err))
 373		goto err;
 374
 375	return xe;
 376
 377err:
 378	return ERR_PTR(err);
 379}
 380ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */
 381
 382static bool xe_driver_flr_disabled(struct xe_device *xe)
 383{
 384	return xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS;
 385}
 386
 387/*
 388 * The driver-initiated FLR is the highest level of reset that we can trigger
 389 * from within the driver. It is different from the PCI FLR in that it doesn't
 390 * fully reset the SGUnit and doesn't modify the PCI config space and therefore
 391 * it doesn't require a re-enumeration of the PCI BARs. However, the
 392 * driver-initiated FLR does still cause a reset of both GT and display and a
 393 * memory wipe of local and stolen memory, so recovery would require a full HW
 394 * re-init and saving/restoring (or re-populating) the wiped memory. Since we
 395 * perform the FLR as the very last action before releasing access to the HW
 396 * during the driver release flow, we don't attempt recovery at all, because
 397 * if/when a new instance of i915 is bound to the device it will do a full
 398 * re-init anyway.
 399 */
 400static void __xe_driver_flr(struct xe_device *xe)
 401{
 402	const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
 403	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
 404	int ret;
 405
 406	drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
 407
 408	/*
 409	 * Make sure any pending FLR requests have cleared by waiting for the
 410	 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
 411	 * to make sure it's not still set from a prior attempt (it's a write to
 412	 * clear bit).
 413	 * Note that we should never be in a situation where a previous attempt
 414	 * is still pending (unless the HW is totally dead), but better to be
 415	 * safe in case something unexpected happens
 416	 */
 417	ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
 418	if (ret) {
 419		drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
 420		return;
 421	}
 422	xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
 423
 424	/* Trigger the actual Driver-FLR */
 425	xe_mmio_rmw32(mmio, GU_CNTL, 0, DRIVERFLR);
 426
 427	/* Wait for hardware teardown to complete */
 428	ret = xe_mmio_wait32(mmio, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
 429	if (ret) {
 430		drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
 431		return;
 432	}
 433
 434	/* Wait for hardware/firmware re-init to complete */
 435	ret = xe_mmio_wait32(mmio, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
 436			     flr_timeout, NULL, false);
 437	if (ret) {
 438		drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
 439		return;
 440	}
 441
 442	/* Clear sticky completion status */
 443	xe_mmio_write32(mmio, GU_DEBUG, DRIVERFLR_STATUS);
 444}
 445
 446static void xe_driver_flr(struct xe_device *xe)
 447{
 448	if (xe_driver_flr_disabled(xe)) {
 449		drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
 450		return;
 451	}
 452
 453	__xe_driver_flr(xe);
 454}
 455
 456static void xe_driver_flr_fini(void *arg)
 457{
 458	struct xe_device *xe = arg;
 459
 460	if (xe->needs_flr_on_fini)
 461		xe_driver_flr(xe);
 462}
 463
 464static void xe_device_sanitize(void *arg)
 465{
 466	struct xe_device *xe = arg;
 467	struct xe_gt *gt;
 468	u8 id;
 469
 470	for_each_gt(gt, xe, id)
 471		xe_gt_sanitize(gt);
 472}
 473
 474static int xe_set_dma_info(struct xe_device *xe)
 475{
 476	unsigned int mask_size = xe->info.dma_mask_size;
 477	int err;
 478
 479	dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
 480
 481	err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
 482	if (err)
 483		goto mask_err;
 484
 485	err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
 486	if (err)
 487		goto mask_err;
 488
 489	return 0;
 490
 491mask_err:
 492	drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
 493	return err;
 494}
 495
 496static bool verify_lmem_ready(struct xe_device *xe)
 497{
 498	u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
 499
 500	return !!val;
 501}
 502
 503static int wait_for_lmem_ready(struct xe_device *xe)
 504{
 505	unsigned long timeout, start;
 506
 507	if (!IS_DGFX(xe))
 508		return 0;
 509
 510	if (IS_SRIOV_VF(xe))
 511		return 0;
 512
 513	if (verify_lmem_ready(xe))
 514		return 0;
 515
 516	drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
 517
 518	start = jiffies;
 519	timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
 520
 521	do {
 522		if (signal_pending(current))
 523			return -EINTR;
 524
 525		/*
 526		 * The boot firmware initializes local memory and
 527		 * assesses its health. If memory training fails,
 528		 * the punit will have been instructed to keep the GT powered
 529		 * down.we won't be able to communicate with it
 530		 *
 531		 * If the status check is done before punit updates the register,
 532		 * it can lead to the system being unusable.
 533		 * use a timeout and defer the probe to prevent this.
 534		 */
 535		if (time_after(jiffies, timeout)) {
 536			drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
 537			return -EPROBE_DEFER;
 538		}
 539
 540		msleep(20);
 541
 542	} while (!verify_lmem_ready(xe));
 543
 544	drm_dbg(&xe->drm, "lmem ready after %ums",
 545		jiffies_to_msecs(jiffies - start));
 546
 547	return 0;
 548}
 549ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
 550
 551static void update_device_info(struct xe_device *xe)
 552{
 553	/* disable features that are not available/applicable to VFs */
 554	if (IS_SRIOV_VF(xe)) {
 555		xe->info.probe_display = 0;
 556		xe->info.has_heci_gscfi = 0;
 557		xe->info.skip_guc_pc = 1;
 558		xe->info.skip_pcode = 1;
 559	}
 560}
 561
 562/**
 563 * xe_device_probe_early: Device early probe
 564 * @xe: xe device instance
 565 *
 566 * Initialize MMIO resources that don't require any
 567 * knowledge about tile count. Also initialize pcode and
 568 * check vram initialization on root tile.
 569 *
 570 * Return: 0 on success, error code on failure
 571 */
 572int xe_device_probe_early(struct xe_device *xe)
 573{
 574	int err;
 575
 576	err = xe_mmio_init(xe);
 577	if (err)
 578		return err;
 579
 580	xe_sriov_probe_early(xe);
 581
 582	update_device_info(xe);
 583
 584	err = xe_pcode_probe_early(xe);
 585	if (err)
 586		return err;
 587
 588	err = wait_for_lmem_ready(xe);
 589	if (err)
 590		return err;
 591
 592	xe->wedged.mode = xe_modparam.wedged_mode;
 593
 594	return 0;
 595}
 596
 597static int probe_has_flat_ccs(struct xe_device *xe)
 598{
 599	struct xe_gt *gt;
 600	unsigned int fw_ref;
 601	u32 reg;
 602
 603	/* Always enabled/disabled, no runtime check to do */
 604	if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
 605		return 0;
 606
 607	gt = xe_root_mmio_gt(xe);
 608
 609	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 610	if (!fw_ref)
 611		return -ETIMEDOUT;
 612
 613	reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
 614	xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
 615
 616	if (!xe->info.has_flat_ccs)
 617		drm_dbg(&xe->drm,
 618			"Flat CCS has been disabled in bios, May lead to performance impact");
 619
 620	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 621	return 0;
 622}
 623
 624int xe_device_probe(struct xe_device *xe)
 625{
 626	struct xe_tile *tile;
 627	struct xe_gt *gt;
 628	int err;
 629	u8 last_gt;
 630	u8 id;
 631
 632	xe_pat_init_early(xe);
 633
 634	err = xe_sriov_init(xe);
 635	if (err)
 636		return err;
 637
 638	xe->info.mem_region_mask = 1;
 639	err = xe_display_init_nommio(xe);
 640	if (err)
 641		return err;
 642
 643	err = xe_set_dma_info(xe);
 644	if (err)
 645		return err;
 646
 647	err = xe_mmio_probe_tiles(xe);
 648	if (err)
 649		return err;
 650
 651	xe_ttm_sys_mgr_init(xe);
 652
 653	for_each_gt(gt, xe, id) {
 654		err = xe_gt_init_early(gt);
 655		if (err)
 656			return err;
 657
 658		/*
 659		 * Only after this point can GT-specific MMIO operations
 660		 * (including things like communication with the GuC)
 661		 * be performed.
 662		 */
 663		xe_gt_mmio_init(gt);
 664	}
 665
 666	for_each_tile(tile, xe, id) {
 667		if (IS_SRIOV_VF(xe)) {
 668			xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
 669			err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
 670			if (err)
 671				return err;
 672			err = xe_gt_sriov_vf_query_config(tile->primary_gt);
 673			if (err)
 674				return err;
 675		}
 676		err = xe_ggtt_init_early(tile->mem.ggtt);
 677		if (err)
 678			return err;
 679		err = xe_memirq_init(&tile->memirq);
 680		if (err)
 681			return err;
 682	}
 683
 684	for_each_gt(gt, xe, id) {
 685		err = xe_gt_init_hwconfig(gt);
 686		if (err)
 687			return err;
 688	}
 689
 690	err = xe_devcoredump_init(xe);
 691	if (err)
 692		return err;
 693	err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
 694	if (err)
 695		return err;
 696
 697	err = xe_display_init_noirq(xe);
 698	if (err)
 699		return err;
 700
 701	err = xe_irq_install(xe);
 702	if (err)
 703		goto err;
 704
 705	err = probe_has_flat_ccs(xe);
 706	if (err)
 707		goto err;
 708
 709	err = xe_vram_probe(xe);
 710	if (err)
 711		goto err;
 712
 713	for_each_tile(tile, xe, id) {
 714		err = xe_tile_init_noalloc(tile);
 715		if (err)
 716			goto err;
 717	}
 718
 719	/* Allocate and map stolen after potential VRAM resize */
 720	xe_ttm_stolen_mgr_init(xe);
 721
 722	/*
 723	 * Now that GT is initialized (TTM in particular),
 724	 * we can try to init display, and inherit the initial fb.
 725	 * This is the reason the first allocation needs to be done
 726	 * inside display.
 727	 */
 728	err = xe_display_init_noaccel(xe);
 729	if (err)
 730		goto err;
 731
 732	for_each_gt(gt, xe, id) {
 733		last_gt = id;
 734
 735		err = xe_gt_init(gt);
 736		if (err)
 737			goto err_fini_gt;
 738	}
 739
 740	xe_heci_gsc_init(xe);
 741
 742	err = xe_oa_init(xe);
 743	if (err)
 744		goto err_fini_gt;
 745
 746	err = xe_display_init(xe);
 747	if (err)
 748		goto err_fini_oa;
 749
 750	err = drm_dev_register(&xe->drm, 0);
 751	if (err)
 752		goto err_fini_display;
 753
 754	xe_display_register(xe);
 755
 756	xe_oa_register(xe);
 757
 758	xe_debugfs_register(xe);
 759
 760	xe_hwmon_register(xe);
 761
 762	for_each_gt(gt, xe, id)
 763		xe_gt_sanitize_freq(gt);
 764
 765	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
 766
 767err_fini_display:
 768	xe_display_driver_remove(xe);
 769
 770err_fini_oa:
 771	xe_oa_fini(xe);
 772
 773err_fini_gt:
 774	for_each_gt(gt, xe, id) {
 775		if (id < last_gt)
 776			xe_gt_remove(gt);
 777		else
 778			break;
 779	}
 780
 781err:
 782	xe_display_fini(xe);
 783	return err;
 784}
 785
 786static void xe_device_remove_display(struct xe_device *xe)
 787{
 788	xe_display_unregister(xe);
 789
 790	drm_dev_unplug(&xe->drm);
 791	xe_display_driver_remove(xe);
 792}
 793
 794void xe_device_remove(struct xe_device *xe)
 795{
 796	struct xe_gt *gt;
 797	u8 id;
 798
 799	xe_oa_unregister(xe);
 800
 801	xe_device_remove_display(xe);
 802
 803	xe_display_fini(xe);
 804
 805	xe_oa_fini(xe);
 806
 807	xe_heci_gsc_fini(xe);
 808
 809	for_each_gt(gt, xe, id)
 810		xe_gt_remove(gt);
 811}
 812
 813void xe_device_shutdown(struct xe_device *xe)
 814{
 815	struct xe_gt *gt;
 816	u8 id;
 817
 818	drm_dbg(&xe->drm, "Shutting down device\n");
 819
 820	if (xe_driver_flr_disabled(xe)) {
 821		xe_display_pm_shutdown(xe);
 822
 823		xe_irq_suspend(xe);
 824
 825		for_each_gt(gt, xe, id)
 826			xe_gt_shutdown(gt);
 827
 828		xe_display_pm_shutdown_late(xe);
 829	} else {
 830		/* BOOM! */
 831		__xe_driver_flr(xe);
 832	}
 833}
 834
 835/**
 836 * xe_device_wmb() - Device specific write memory barrier
 837 * @xe: the &xe_device
 838 *
 839 * While wmb() is sufficient for a barrier if we use system memory, on discrete
 840 * platforms with device memory we additionally need to issue a register write.
 841 * Since it doesn't matter which register we write to, use the read-only VF_CAP
 842 * register that is also marked as accessible by the VFs.
 843 */
 844void xe_device_wmb(struct xe_device *xe)
 845{
 846	wmb();
 847	if (IS_DGFX(xe))
 848		xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
 849}
 850
 851/**
 852 * xe_device_td_flush() - Flush transient L3 cache entries
 853 * @xe: The device
 854 *
 855 * Display engine has direct access to memory and is never coherent with L3/L4
 856 * caches (or CPU caches), however KMD is responsible for specifically flushing
 857 * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
 858 * can happen from such a surface without seeing corruption.
 859 *
 860 * Display surfaces can be tagged as transient by mapping it using one of the
 861 * various L3:XD PAT index modes on Xe2.
 862 *
 863 * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
 864 * at the end of each submission via PIPE_CONTROL for compute/render, since SA
 865 * Media is not coherent with L3 and we want to support render-vs-media
 866 * usescases. For other engines like copy/blt the HW internally forces uncached
 867 * behaviour, hence why we can skip the TDF on such platforms.
 868 */
 869void xe_device_td_flush(struct xe_device *xe)
 870{
 871	struct xe_gt *gt;
 872	unsigned int fw_ref;
 873	u8 id;
 874
 875	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
 876		return;
 877
 878	if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
 879		xe_device_l2_flush(xe);
 880		return;
 881	}
 882
 883	for_each_gt(gt, xe, id) {
 884		if (xe_gt_is_media_type(gt))
 885			continue;
 886
 887		fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 888		if (!fw_ref)
 889			return;
 890
 891		xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
 892		/*
 893		 * FIXME: We can likely do better here with our choice of
 894		 * timeout. Currently we just assume the worst case, i.e. 150us,
 895		 * which is believed to be sufficient to cover the worst case
 896		 * scenario on current platforms if all cache entries are
 897		 * transient and need to be flushed..
 898		 */
 899		if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
 900				   150, NULL, false))
 901			xe_gt_err_once(gt, "TD flush timeout\n");
 902
 903		xe_force_wake_put(gt_to_fw(gt), fw_ref);
 904	}
 905}
 906
 907void xe_device_l2_flush(struct xe_device *xe)
 908{
 909	struct xe_gt *gt;
 910	unsigned int fw_ref;
 911
 912	gt = xe_root_mmio_gt(xe);
 913
 914	if (!XE_WA(gt, 16023588340))
 915		return;
 916
 917	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 918	if (!fw_ref)
 919		return;
 920
 921	spin_lock(&gt->global_invl_lock);
 922	xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
 923
 924	if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
 925		xe_gt_err_once(gt, "Global invalidation timeout\n");
 926	spin_unlock(&gt->global_invl_lock);
 927
 928	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 929}
 930
 931u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 932{
 933	return xe_device_has_flat_ccs(xe) ?
 934		DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
 935}
 936
 937/**
 938 * xe_device_assert_mem_access - Inspect the current runtime_pm state.
 939 * @xe: xe device instance
 940 *
 941 * To be used before any kind of memory access. It will splat a debug warning
 942 * if the device is currently sleeping. But it doesn't guarantee in any way
 943 * that the device is going to remain awake. Xe PM runtime get and put
 944 * functions might be added to the outer bound of the memory access, while
 945 * this check is intended for inner usage to splat some warning if the worst
 946 * case has just happened.
 947 */
 948void xe_device_assert_mem_access(struct xe_device *xe)
 949{
 950	xe_assert(xe, !xe_pm_runtime_suspended(xe));
 951}
 952
 953void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
 954{
 955	struct xe_gt *gt;
 956	u8 id;
 957
 958	drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
 959	drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
 960
 961	for_each_gt(gt, xe, id) {
 962		drm_printf(p, "GT id: %u\n", id);
 963		drm_printf(p, "\tTile: %u\n", gt->tile->id);
 964		drm_printf(p, "\tType: %s\n",
 965			   gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
 966		drm_printf(p, "\tIP ver: %u.%u.%u\n",
 967			   REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
 968			   REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
 969			   REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
 970		drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
 971	}
 972}
 973
 974u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
 975{
 976	return sign_extend64(address, xe->info.va_bits - 1);
 977}
 978
 979u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
 980{
 981	return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
 982}
 983
 984static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
 985{
 986	struct xe_device *xe = arg;
 987
 988	xe_pm_runtime_put(xe);
 989}
 990
 991/**
 992 * xe_device_declare_wedged - Declare device wedged
 993 * @xe: xe device instance
 994 *
 995 * This is a final state that can only be cleared with a mudule
 996 * re-probe (unbind + bind).
 997 * In this state every IOCTL will be blocked so the GT cannot be used.
 998 * In general it will be called upon any critical error such as gt reset
 999 * failure or guc loading failure.
1000 * If xe.wedged module parameter is set to 2, this function will be called
1001 * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
1002 * snapshot capture. In this mode, GT reset won't be attempted so the state of
1003 * the issue is preserved for further debugging.
1004 */
1005void xe_device_declare_wedged(struct xe_device *xe)
1006{
1007	struct xe_gt *gt;
1008	u8 id;
1009
1010	if (xe->wedged.mode == 0) {
1011		drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
1012		return;
1013	}
1014
1015	xe_pm_runtime_get_noresume(xe);
1016
1017	if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
1018		drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
1019		return;
1020	}
1021
1022	if (!atomic_xchg(&xe->wedged.flag, 1)) {
1023		xe->needs_flr_on_fini = true;
1024		drm_err(&xe->drm,
1025			"CRITICAL: Xe has declared device %s as wedged.\n"
1026			"IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
1027			"Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
1028			dev_name(xe->drm.dev));
1029	}
1030
1031	for_each_gt(gt, xe, id)
1032		xe_gt_declare_wedged(gt);
1033}