Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2021 Intel Corporation
   4 */
   5
   6#include "xe_bo.h"
   7
   8#include <linux/dma-buf.h>
   9
  10#include <drm/drm_drv.h>
  11#include <drm/drm_gem_ttm_helper.h>
  12#include <drm/drm_managed.h>
  13#include <drm/ttm/ttm_device.h>
  14#include <drm/ttm/ttm_placement.h>
  15#include <drm/ttm/ttm_tt.h>
  16#include <uapi/drm/xe_drm.h>
  17
  18#include "xe_device.h"
  19#include "xe_dma_buf.h"
  20#include "xe_drm_client.h"
  21#include "xe_ggtt.h"
  22#include "xe_gt.h"
  23#include "xe_map.h"
  24#include "xe_migrate.h"
  25#include "xe_pm.h"
  26#include "xe_preempt_fence.h"
  27#include "xe_res_cursor.h"
  28#include "xe_trace_bo.h"
  29#include "xe_ttm_stolen_mgr.h"
  30#include "xe_vm.h"
  31
  32const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
  33	[XE_PL_SYSTEM] = "system",
  34	[XE_PL_TT] = "gtt",
  35	[XE_PL_VRAM0] = "vram0",
  36	[XE_PL_VRAM1] = "vram1",
  37	[XE_PL_STOLEN] = "stolen"
  38};
  39
  40static const struct ttm_place sys_placement_flags = {
  41	.fpfn = 0,
  42	.lpfn = 0,
  43	.mem_type = XE_PL_SYSTEM,
  44	.flags = 0,
  45};
  46
  47static struct ttm_placement sys_placement = {
  48	.num_placement = 1,
  49	.placement = &sys_placement_flags,
  50};
  51
  52static const struct ttm_place tt_placement_flags[] = {
  53	{
  54		.fpfn = 0,
  55		.lpfn = 0,
  56		.mem_type = XE_PL_TT,
  57		.flags = TTM_PL_FLAG_DESIRED,
  58	},
  59	{
  60		.fpfn = 0,
  61		.lpfn = 0,
  62		.mem_type = XE_PL_SYSTEM,
  63		.flags = TTM_PL_FLAG_FALLBACK,
  64	}
  65};
  66
  67static struct ttm_placement tt_placement = {
  68	.num_placement = 2,
  69	.placement = tt_placement_flags,
  70};
  71
  72bool mem_type_is_vram(u32 mem_type)
  73{
  74	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
  75}
  76
  77static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
  78{
  79	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
  80}
  81
  82static bool resource_is_vram(struct ttm_resource *res)
  83{
  84	return mem_type_is_vram(res->mem_type);
  85}
  86
  87bool xe_bo_is_vram(struct xe_bo *bo)
  88{
  89	return resource_is_vram(bo->ttm.resource) ||
  90		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
  91}
  92
  93bool xe_bo_is_stolen(struct xe_bo *bo)
  94{
  95	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
  96}
  97
  98/**
  99 * xe_bo_has_single_placement - check if BO is placed only in one memory location
 100 * @bo: The BO
 101 *
 102 * This function checks whether a given BO is placed in only one memory location.
 103 *
 104 * Returns: true if the BO is placed in a single memory location, false otherwise.
 105 *
 106 */
 107bool xe_bo_has_single_placement(struct xe_bo *bo)
 108{
 109	return bo->placement.num_placement == 1;
 110}
 111
 112/**
 113 * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
 114 * @bo: The BO
 115 *
 116 * The stolen memory is accessed through the PCI BAR for both DGFX and some
 117 * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
 118 *
 119 * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
 120 */
 121bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
 122{
 123	return xe_bo_is_stolen(bo) &&
 124		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
 125}
 126
 127static bool xe_bo_is_user(struct xe_bo *bo)
 128{
 129	return bo->flags & XE_BO_FLAG_USER;
 130}
 131
 132static struct xe_migrate *
 133mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
 134{
 135	struct xe_tile *tile;
 136
 137	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
 138	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
 139	return tile->migrate;
 140}
 141
 142static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
 143{
 144	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 145	struct ttm_resource_manager *mgr;
 146
 147	xe_assert(xe, resource_is_vram(res));
 148	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
 149	return to_xe_ttm_vram_mgr(mgr)->vram;
 150}
 151
 152static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
 153			   u32 bo_flags, u32 *c)
 154{
 155	if (bo_flags & XE_BO_FLAG_SYSTEM) {
 156		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 157
 158		bo->placements[*c] = (struct ttm_place) {
 159			.mem_type = XE_PL_TT,
 160		};
 161		*c += 1;
 162	}
 163}
 164
 165static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 166		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
 167{
 168	struct ttm_place place = { .mem_type = mem_type };
 169	struct xe_mem_region *vram;
 170	u64 io_size;
 171
 172	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 173
 174	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
 175	xe_assert(xe, vram && vram->usable_size);
 176	io_size = vram->io_size;
 177
 178	/*
 179	 * For eviction / restore on suspend / resume objects
 180	 * pinned in VRAM must be contiguous
 181	 */
 182	if (bo_flags & (XE_BO_FLAG_PINNED |
 183			XE_BO_FLAG_GGTT))
 184		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
 185
 186	if (io_size < vram->usable_size) {
 187		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
 188			place.fpfn = 0;
 189			place.lpfn = io_size >> PAGE_SHIFT;
 190		} else {
 191			place.flags |= TTM_PL_FLAG_TOPDOWN;
 192		}
 193	}
 194	places[*c] = place;
 195	*c += 1;
 196}
 197
 198static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
 199			 u32 bo_flags, u32 *c)
 200{
 201	if (bo_flags & XE_BO_FLAG_VRAM0)
 202		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
 203	if (bo_flags & XE_BO_FLAG_VRAM1)
 204		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
 205}
 206
 207static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
 208			   u32 bo_flags, u32 *c)
 209{
 210	if (bo_flags & XE_BO_FLAG_STOLEN) {
 211		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 212
 213		bo->placements[*c] = (struct ttm_place) {
 214			.mem_type = XE_PL_STOLEN,
 215			.flags = bo_flags & (XE_BO_FLAG_PINNED |
 216					     XE_BO_FLAG_GGTT) ?
 217				TTM_PL_FLAG_CONTIGUOUS : 0,
 218		};
 219		*c += 1;
 220	}
 221}
 222
 223static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 224				       u32 bo_flags)
 225{
 226	u32 c = 0;
 227
 228	try_add_vram(xe, bo, bo_flags, &c);
 229	try_add_system(xe, bo, bo_flags, &c);
 230	try_add_stolen(xe, bo, bo_flags, &c);
 231
 232	if (!c)
 233		return -EINVAL;
 234
 235	bo->placement = (struct ttm_placement) {
 236		.num_placement = c,
 237		.placement = bo->placements,
 238	};
 239
 240	return 0;
 241}
 242
 243int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 244			      u32 bo_flags)
 245{
 246	xe_bo_assert_held(bo);
 247	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
 248}
 249
 250static void xe_evict_flags(struct ttm_buffer_object *tbo,
 251			   struct ttm_placement *placement)
 252{
 253	if (!xe_bo_is_xe_bo(tbo)) {
 254		/* Don't handle scatter gather BOs */
 255		if (tbo->type == ttm_bo_type_sg) {
 256			placement->num_placement = 0;
 257			return;
 258		}
 259
 260		*placement = sys_placement;
 261		return;
 262	}
 263
 264	/*
 265	 * For xe, sg bos that are evicted to system just triggers a
 266	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
 267	 */
 268	switch (tbo->resource->mem_type) {
 269	case XE_PL_VRAM0:
 270	case XE_PL_VRAM1:
 271	case XE_PL_STOLEN:
 272		*placement = tt_placement;
 273		break;
 274	case XE_PL_TT:
 275	default:
 276		*placement = sys_placement;
 277		break;
 278	}
 279}
 280
 281struct xe_ttm_tt {
 282	struct ttm_tt ttm;
 283	struct device *dev;
 284	struct sg_table sgt;
 285	struct sg_table *sg;
 286	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
 287	bool purgeable;
 288};
 289
 290static int xe_tt_map_sg(struct ttm_tt *tt)
 291{
 292	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
 293	unsigned long num_pages = tt->num_pages;
 294	int ret;
 295
 296	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
 297
 298	if (xe_tt->sg)
 299		return 0;
 300
 301	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
 302						num_pages, 0,
 303						(u64)num_pages << PAGE_SHIFT,
 304						xe_sg_segment_size(xe_tt->dev),
 305						GFP_KERNEL);
 306	if (ret)
 307		return ret;
 308
 309	xe_tt->sg = &xe_tt->sgt;
 310	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
 311			      DMA_ATTR_SKIP_CPU_SYNC);
 312	if (ret) {
 313		sg_free_table(xe_tt->sg);
 314		xe_tt->sg = NULL;
 315		return ret;
 316	}
 317
 318	return 0;
 319}
 320
 321static void xe_tt_unmap_sg(struct ttm_tt *tt)
 322{
 323	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
 324
 325	if (xe_tt->sg) {
 326		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
 327				  DMA_BIDIRECTIONAL, 0);
 328		sg_free_table(xe_tt->sg);
 329		xe_tt->sg = NULL;
 330	}
 331}
 332
 333struct sg_table *xe_bo_sg(struct xe_bo *bo)
 334{
 335	struct ttm_tt *tt = bo->ttm.ttm;
 336	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
 337
 338	return xe_tt->sg;
 339}
 340
 341static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 342				       u32 page_flags)
 343{
 344	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 345	struct xe_device *xe = xe_bo_device(bo);
 346	struct xe_ttm_tt *tt;
 347	unsigned long extra_pages;
 348	enum ttm_caching caching = ttm_cached;
 349	int err;
 350
 351	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
 352	if (!tt)
 353		return NULL;
 354
 355	tt->dev = xe->drm.dev;
 356
 357	extra_pages = 0;
 358	if (xe_bo_needs_ccs_pages(bo))
 359		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
 360					   PAGE_SIZE);
 361
 362	/*
 363	 * DGFX system memory is always WB / ttm_cached, since
 364	 * other caching modes are only supported on x86. DGFX
 365	 * GPU system memory accesses are always coherent with the
 366	 * CPU.
 367	 */
 368	if (!IS_DGFX(xe)) {
 369		switch (bo->cpu_caching) {
 370		case DRM_XE_GEM_CPU_CACHING_WC:
 371			caching = ttm_write_combined;
 372			break;
 373		default:
 374			caching = ttm_cached;
 375			break;
 376		}
 377
 378		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
 379
 380		/*
 381		 * Display scanout is always non-coherent with the CPU cache.
 382		 *
 383		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
 384		 * non-coherent and require a CPU:WC mapping.
 385		 */
 386		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
 387		    (xe->info.graphics_verx100 >= 1270 &&
 388		     bo->flags & XE_BO_FLAG_PAGETABLE))
 389			caching = ttm_write_combined;
 390	}
 391
 392	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
 393		/*
 394		 * Valid only for internally-created buffers only, for
 395		 * which cpu_caching is never initialized.
 396		 */
 397		xe_assert(xe, bo->cpu_caching == 0);
 398		caching = ttm_uncached;
 399	}
 400
 401	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
 402	if (err) {
 403		kfree(tt);
 404		return NULL;
 405	}
 406
 407	return &tt->ttm;
 408}
 409
 410static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
 411			      struct ttm_operation_ctx *ctx)
 412{
 413	int err;
 414
 415	/*
 416	 * dma-bufs are not populated with pages, and the dma-
 417	 * addresses are set up when moved to XE_PL_TT.
 418	 */
 419	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
 420		return 0;
 421
 422	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
 423	if (err)
 424		return err;
 425
 426	return err;
 427}
 428
 429static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
 430{
 431	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
 432		return;
 433
 434	xe_tt_unmap_sg(tt);
 435
 436	return ttm_pool_free(&ttm_dev->pool, tt);
 437}
 438
 439static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
 440{
 441	ttm_tt_fini(tt);
 442	kfree(tt);
 443}
 444
 445static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 446				 struct ttm_resource *mem)
 447{
 448	struct xe_device *xe = ttm_to_xe_device(bdev);
 449
 450	switch (mem->mem_type) {
 451	case XE_PL_SYSTEM:
 452	case XE_PL_TT:
 453		return 0;
 454	case XE_PL_VRAM0:
 455	case XE_PL_VRAM1: {
 456		struct xe_ttm_vram_mgr_resource *vres =
 457			to_xe_ttm_vram_mgr_resource(mem);
 458		struct xe_mem_region *vram = res_to_mem_region(mem);
 459
 460		if (vres->used_visible_size < mem->size)
 461			return -EINVAL;
 462
 463		mem->bus.offset = mem->start << PAGE_SHIFT;
 464
 465		if (vram->mapping &&
 466		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
 467			mem->bus.addr = (u8 __force *)vram->mapping +
 468				mem->bus.offset;
 469
 470		mem->bus.offset += vram->io_start;
 471		mem->bus.is_iomem = true;
 472
 473#if  !IS_ENABLED(CONFIG_X86)
 474		mem->bus.caching = ttm_write_combined;
 475#endif
 476		return 0;
 477	} case XE_PL_STOLEN:
 478		return xe_ttm_stolen_io_mem_reserve(xe, mem);
 479	default:
 480		return -EINVAL;
 481	}
 482}
 483
 484static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 485				const struct ttm_operation_ctx *ctx)
 486{
 487	struct dma_resv_iter cursor;
 488	struct dma_fence *fence;
 489	struct drm_gem_object *obj = &bo->ttm.base;
 490	struct drm_gpuvm_bo *vm_bo;
 491	bool idle = false;
 492	int ret = 0;
 493
 494	dma_resv_assert_held(bo->ttm.base.resv);
 495
 496	if (!list_empty(&bo->ttm.base.gpuva.list)) {
 497		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
 498				    DMA_RESV_USAGE_BOOKKEEP);
 499		dma_resv_for_each_fence_unlocked(&cursor, fence)
 500			dma_fence_enable_sw_signaling(fence);
 501		dma_resv_iter_end(&cursor);
 502	}
 503
 504	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
 505		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
 506		struct drm_gpuva *gpuva;
 507
 508		if (!xe_vm_in_fault_mode(vm)) {
 509			drm_gpuvm_bo_evict(vm_bo, true);
 510			continue;
 511		}
 512
 513		if (!idle) {
 514			long timeout;
 515
 516			if (ctx->no_wait_gpu &&
 517			    !dma_resv_test_signaled(bo->ttm.base.resv,
 518						    DMA_RESV_USAGE_BOOKKEEP))
 519				return -EBUSY;
 520
 521			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
 522							DMA_RESV_USAGE_BOOKKEEP,
 523							ctx->interruptible,
 524							MAX_SCHEDULE_TIMEOUT);
 525			if (!timeout)
 526				return -ETIME;
 527			if (timeout < 0)
 528				return timeout;
 529
 530			idle = true;
 531		}
 532
 533		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
 534			struct xe_vma *vma = gpuva_to_vma(gpuva);
 535
 536			trace_xe_vma_evict(vma);
 537			ret = xe_vm_invalidate_vma(vma);
 538			if (XE_WARN_ON(ret))
 539				return ret;
 540		}
 541	}
 542
 543	return ret;
 544}
 545
 546/*
 547 * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
 548 * Note that unmapping the attachment is deferred to the next
 549 * map_attachment time, or to bo destroy (after idling) whichever comes first.
 550 * This is to avoid syncing before unmap_attachment(), assuming that the
 551 * caller relies on idling the reservation object before moving the
 552 * backing store out. Should that assumption not hold, then we will be able
 553 * to unconditionally call unmap_attachment() when moving out to system.
 554 */
 555static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
 556			     struct ttm_resource *new_res)
 557{
 558	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
 559	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
 560					       ttm);
 561	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 562	struct sg_table *sg;
 563
 564	xe_assert(xe, attach);
 565	xe_assert(xe, ttm_bo->ttm);
 566
 567	if (new_res->mem_type == XE_PL_SYSTEM)
 568		goto out;
 569
 570	if (ttm_bo->sg) {
 571		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
 572		ttm_bo->sg = NULL;
 573	}
 574
 575	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
 576	if (IS_ERR(sg))
 577		return PTR_ERR(sg);
 578
 579	ttm_bo->sg = sg;
 580	xe_tt->sg = sg;
 581
 582out:
 583	ttm_bo_move_null(ttm_bo, new_res);
 584
 585	return 0;
 586}
 587
 588/**
 589 * xe_bo_move_notify - Notify subsystems of a pending move
 590 * @bo: The buffer object
 591 * @ctx: The struct ttm_operation_ctx controlling locking and waits.
 592 *
 593 * This function notifies subsystems of an upcoming buffer move.
 594 * Upon receiving such a notification, subsystems should schedule
 595 * halting access to the underlying pages and optionally add a fence
 596 * to the buffer object's dma_resv object, that signals when access is
 597 * stopped. The caller will wait on all dma_resv fences before
 598 * starting the move.
 599 *
 600 * A subsystem may commence access to the object after obtaining
 601 * bindings to the new backing memory under the object lock.
 602 *
 603 * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
 604 * negative error code on error.
 605 */
 606static int xe_bo_move_notify(struct xe_bo *bo,
 607			     const struct ttm_operation_ctx *ctx)
 608{
 609	struct ttm_buffer_object *ttm_bo = &bo->ttm;
 610	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 611	struct ttm_resource *old_mem = ttm_bo->resource;
 612	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 613	int ret;
 614
 615	/*
 616	 * If this starts to call into many components, consider
 617	 * using a notification chain here.
 618	 */
 619
 620	if (xe_bo_is_pinned(bo))
 621		return -EINVAL;
 622
 623	xe_bo_vunmap(bo);
 624	ret = xe_bo_trigger_rebind(xe, bo, ctx);
 625	if (ret)
 626		return ret;
 627
 628	/* Don't call move_notify() for imported dma-bufs. */
 629	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
 630		dma_buf_move_notify(ttm_bo->base.dma_buf);
 631
 632	/*
 633	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
 634	 * so if we moved from VRAM make sure to unlink this from the userfault
 635	 * tracking.
 636	 */
 637	if (mem_type_is_vram(old_mem_type)) {
 638		mutex_lock(&xe->mem_access.vram_userfault.lock);
 639		if (!list_empty(&bo->vram_userfault_link))
 640			list_del_init(&bo->vram_userfault_link);
 641		mutex_unlock(&xe->mem_access.vram_userfault.lock);
 642	}
 643
 644	return 0;
 645}
 646
 647static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 648		      struct ttm_operation_ctx *ctx,
 649		      struct ttm_resource *new_mem,
 650		      struct ttm_place *hop)
 651{
 652	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 653	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 654	struct ttm_resource *old_mem = ttm_bo->resource;
 655	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 656	struct ttm_tt *ttm = ttm_bo->ttm;
 657	struct xe_migrate *migrate = NULL;
 658	struct dma_fence *fence;
 659	bool move_lacks_source;
 660	bool tt_has_data;
 661	bool needs_clear;
 662	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
 663				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
 664	int ret = 0;
 665
 666	/* Bo creation path, moving to system or TT. */
 667	if ((!old_mem && ttm) && !handle_system_ccs) {
 668		if (new_mem->mem_type == XE_PL_TT)
 669			ret = xe_tt_map_sg(ttm);
 670		if (!ret)
 671			ttm_bo_move_null(ttm_bo, new_mem);
 672		goto out;
 673	}
 674
 675	if (ttm_bo->type == ttm_bo_type_sg) {
 676		ret = xe_bo_move_notify(bo, ctx);
 677		if (!ret)
 678			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
 679		return ret;
 680	}
 681
 682	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
 683			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
 684
 685	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
 686					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
 687
 688	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
 689		(!ttm && ttm_bo->type == ttm_bo_type_device);
 690
 691	if (new_mem->mem_type == XE_PL_TT) {
 692		ret = xe_tt_map_sg(ttm);
 693		if (ret)
 694			goto out;
 695	}
 696
 697	if ((move_lacks_source && !needs_clear)) {
 698		ttm_bo_move_null(ttm_bo, new_mem);
 699		goto out;
 700	}
 701
 702	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
 703		ttm_bo_move_null(ttm_bo, new_mem);
 704		goto out;
 705	}
 706
 707	/*
 708	 * Failed multi-hop where the old_mem is still marked as
 709	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
 710	 */
 711	if (old_mem_type == XE_PL_TT &&
 712	    new_mem->mem_type == XE_PL_TT) {
 713		ttm_bo_move_null(ttm_bo, new_mem);
 714		goto out;
 715	}
 716
 717	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
 718		ret = xe_bo_move_notify(bo, ctx);
 719		if (ret)
 720			goto out;
 721	}
 722
 723	if (old_mem_type == XE_PL_TT &&
 724	    new_mem->mem_type == XE_PL_SYSTEM) {
 725		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
 726						     DMA_RESV_USAGE_BOOKKEEP,
 727						     false,
 728						     MAX_SCHEDULE_TIMEOUT);
 729		if (timeout < 0) {
 730			ret = timeout;
 731			goto out;
 732		}
 733
 734		if (!handle_system_ccs) {
 735			ttm_bo_move_null(ttm_bo, new_mem);
 736			goto out;
 737		}
 738	}
 739
 740	if (!move_lacks_source &&
 741	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
 742	     (mem_type_is_vram(old_mem_type) &&
 743	      new_mem->mem_type == XE_PL_SYSTEM))) {
 744		hop->fpfn = 0;
 745		hop->lpfn = 0;
 746		hop->mem_type = XE_PL_TT;
 747		hop->flags = TTM_PL_FLAG_TEMPORARY;
 748		ret = -EMULTIHOP;
 749		goto out;
 750	}
 751
 752	if (bo->tile)
 753		migrate = bo->tile->migrate;
 754	else if (resource_is_vram(new_mem))
 755		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
 756	else if (mem_type_is_vram(old_mem_type))
 757		migrate = mem_type_to_migrate(xe, old_mem_type);
 758	else
 759		migrate = xe->tiles[0].migrate;
 760
 761	xe_assert(xe, migrate);
 762	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
 763	if (xe_rpm_reclaim_safe(xe)) {
 764		/*
 765		 * We might be called through swapout in the validation path of
 766		 * another TTM device, so acquire rpm here.
 767		 */
 768		xe_pm_runtime_get(xe);
 769	} else {
 770		drm_WARN_ON(&xe->drm, handle_system_ccs);
 771		xe_pm_runtime_get_noresume(xe);
 772	}
 773
 774	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
 775		/*
 776		 * Kernel memory that is pinned should only be moved on suspend
 777		 * / resume, some of the pinned memory is required for the
 778		 * device to resume / use the GPU to move other evicted memory
 779		 * (user memory) around. This likely could be optimized a bit
 780		 * futher where we find the minimum set of pinned memory
 781		 * required for resume but for simplity doing a memcpy for all
 782		 * pinned memory.
 783		 */
 784		ret = xe_bo_vmap(bo);
 785		if (!ret) {
 786			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
 787
 788			/* Create a new VMAP once kernel BO back in VRAM */
 789			if (!ret && resource_is_vram(new_mem)) {
 790				struct xe_mem_region *vram = res_to_mem_region(new_mem);
 791				void __iomem *new_addr = vram->mapping +
 792					(new_mem->start << PAGE_SHIFT);
 793
 794				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
 795					ret = -EINVAL;
 796					xe_pm_runtime_put(xe);
 797					goto out;
 798				}
 799
 800				xe_assert(xe, new_mem->start ==
 801					  bo->placements->fpfn);
 802
 803				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
 804			}
 805		}
 806	} else {
 807		if (move_lacks_source) {
 808			u32 flags = 0;
 809
 810			if (mem_type_is_vram(new_mem->mem_type))
 811				flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
 812			else if (handle_system_ccs)
 813				flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
 814
 815			fence = xe_migrate_clear(migrate, bo, new_mem, flags);
 816		}
 817		else
 818			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
 819						new_mem, handle_system_ccs);
 820		if (IS_ERR(fence)) {
 821			ret = PTR_ERR(fence);
 822			xe_pm_runtime_put(xe);
 823			goto out;
 824		}
 825		if (!move_lacks_source) {
 826			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
 827							true, new_mem);
 828			if (ret) {
 829				dma_fence_wait(fence, false);
 830				ttm_bo_move_null(ttm_bo, new_mem);
 831				ret = 0;
 832			}
 833		} else {
 834			/*
 835			 * ttm_bo_move_accel_cleanup() may blow up if
 836			 * bo->resource == NULL, so just attach the
 837			 * fence and set the new resource.
 838			 */
 839			dma_resv_add_fence(ttm_bo->base.resv, fence,
 840					   DMA_RESV_USAGE_KERNEL);
 841			ttm_bo_move_null(ttm_bo, new_mem);
 842		}
 843
 844		dma_fence_put(fence);
 845	}
 846
 847	xe_pm_runtime_put(xe);
 848
 849out:
 850	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
 851	    ttm_bo->ttm) {
 852		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
 853						     DMA_RESV_USAGE_KERNEL,
 854						     false,
 855						     MAX_SCHEDULE_TIMEOUT);
 856		if (timeout < 0)
 857			ret = timeout;
 858
 859		xe_tt_unmap_sg(ttm_bo->ttm);
 860	}
 861
 862	return ret;
 863}
 864
 865/**
 866 * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
 867 * @bo: The buffer object to move.
 868 *
 869 * On successful completion, the object memory will be moved to sytem memory.
 870 *
 871 * This is needed to for special handling of pinned VRAM object during
 872 * suspend-resume.
 873 *
 874 * Return: 0 on success. Negative error code on failure.
 875 */
 876int xe_bo_evict_pinned(struct xe_bo *bo)
 877{
 878	struct ttm_place place = {
 879		.mem_type = XE_PL_TT,
 880	};
 881	struct ttm_placement placement = {
 882		.placement = &place,
 883		.num_placement = 1,
 884	};
 885	struct ttm_operation_ctx ctx = {
 886		.interruptible = false,
 887	};
 888	struct ttm_resource *new_mem;
 889	int ret;
 890
 891	xe_bo_assert_held(bo);
 892
 893	if (WARN_ON(!bo->ttm.resource))
 894		return -EINVAL;
 895
 896	if (WARN_ON(!xe_bo_is_pinned(bo)))
 897		return -EINVAL;
 898
 899	if (!xe_bo_is_vram(bo))
 900		return 0;
 901
 902	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
 903	if (ret)
 904		return ret;
 905
 906	if (!bo->ttm.ttm) {
 907		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
 908		if (!bo->ttm.ttm) {
 909			ret = -ENOMEM;
 910			goto err_res_free;
 911		}
 912	}
 913
 914	ret = ttm_bo_populate(&bo->ttm, &ctx);
 915	if (ret)
 916		goto err_res_free;
 917
 918	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
 919	if (ret)
 920		goto err_res_free;
 921
 922	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
 923	if (ret)
 924		goto err_res_free;
 925
 926	return 0;
 927
 928err_res_free:
 929	ttm_resource_free(&bo->ttm, &new_mem);
 930	return ret;
 931}
 932
 933/**
 934 * xe_bo_restore_pinned() - Restore a pinned VRAM object
 935 * @bo: The buffer object to move.
 936 *
 937 * On successful completion, the object memory will be moved back to VRAM.
 938 *
 939 * This is needed to for special handling of pinned VRAM object during
 940 * suspend-resume.
 941 *
 942 * Return: 0 on success. Negative error code on failure.
 943 */
 944int xe_bo_restore_pinned(struct xe_bo *bo)
 945{
 946	struct ttm_operation_ctx ctx = {
 947		.interruptible = false,
 948	};
 949	struct ttm_resource *new_mem;
 950	struct ttm_place *place = &bo->placements[0];
 951	int ret;
 952
 953	xe_bo_assert_held(bo);
 954
 955	if (WARN_ON(!bo->ttm.resource))
 956		return -EINVAL;
 957
 958	if (WARN_ON(!xe_bo_is_pinned(bo)))
 959		return -EINVAL;
 960
 961	if (WARN_ON(xe_bo_is_vram(bo)))
 962		return -EINVAL;
 963
 964	if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
 965		return -EINVAL;
 966
 967	if (!mem_type_is_vram(place->mem_type))
 968		return 0;
 969
 970	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
 971	if (ret)
 972		return ret;
 973
 974	ret = ttm_bo_populate(&bo->ttm, &ctx);
 975	if (ret)
 976		goto err_res_free;
 977
 978	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
 979	if (ret)
 980		goto err_res_free;
 981
 982	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
 983	if (ret)
 984		goto err_res_free;
 985
 986	return 0;
 987
 988err_res_free:
 989	ttm_resource_free(&bo->ttm, &new_mem);
 990	return ret;
 991}
 992
 993static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 994				       unsigned long page_offset)
 995{
 996	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 997	struct xe_res_cursor cursor;
 998	struct xe_mem_region *vram;
 999
1000	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1001		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1002
1003	vram = res_to_mem_region(ttm_bo->resource);
1004	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1005	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1006}
1007
1008static void __xe_bo_vunmap(struct xe_bo *bo);
1009
1010/*
1011 * TODO: Move this function to TTM so we don't rely on how TTM does its
1012 * locking, thereby abusing TTM internals.
1013 */
1014static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1015{
1016	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1017	bool locked;
1018
1019	xe_assert(xe, !kref_read(&ttm_bo->kref));
1020
1021	/*
1022	 * We can typically only race with TTM trylocking under the
1023	 * lru_lock, which will immediately be unlocked again since
1024	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1025	 * always succeed here, as long as we hold the lru lock.
1026	 */
1027	spin_lock(&ttm_bo->bdev->lru_lock);
1028	locked = dma_resv_trylock(ttm_bo->base.resv);
1029	spin_unlock(&ttm_bo->bdev->lru_lock);
1030	xe_assert(xe, locked);
1031
1032	return locked;
1033}
1034
1035static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1036{
1037	struct dma_resv_iter cursor;
1038	struct dma_fence *fence;
1039	struct dma_fence *replacement = NULL;
1040	struct xe_bo *bo;
1041
1042	if (!xe_bo_is_xe_bo(ttm_bo))
1043		return;
1044
1045	bo = ttm_to_xe_bo(ttm_bo);
1046	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1047
1048	/*
1049	 * Corner case where TTM fails to allocate memory and this BOs resv
1050	 * still points the VMs resv
1051	 */
1052	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1053		return;
1054
1055	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1056		return;
1057
1058	/*
1059	 * Scrub the preempt fences if any. The unbind fence is already
1060	 * attached to the resv.
1061	 * TODO: Don't do this for external bos once we scrub them after
1062	 * unbind.
1063	 */
1064	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1065				DMA_RESV_USAGE_BOOKKEEP, fence) {
1066		if (xe_fence_is_xe_preempt(fence) &&
1067		    !dma_fence_is_signaled(fence)) {
1068			if (!replacement)
1069				replacement = dma_fence_get_stub();
1070
1071			dma_resv_replace_fences(ttm_bo->base.resv,
1072						fence->context,
1073						replacement,
1074						DMA_RESV_USAGE_BOOKKEEP);
1075		}
1076	}
1077	dma_fence_put(replacement);
1078
1079	dma_resv_unlock(ttm_bo->base.resv);
1080}
1081
1082static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1083{
1084	if (!xe_bo_is_xe_bo(ttm_bo))
1085		return;
1086
1087	/*
1088	 * Object is idle and about to be destroyed. Release the
1089	 * dma-buf attachment.
1090	 */
1091	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1092		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1093						       struct xe_ttm_tt, ttm);
1094
1095		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1096					 DMA_BIDIRECTIONAL);
1097		ttm_bo->sg = NULL;
1098		xe_tt->sg = NULL;
1099	}
1100}
1101
1102static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1103{
1104	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1105
1106	if (ttm_bo->ttm) {
1107		struct ttm_placement place = {};
1108		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1109
1110		drm_WARN_ON(&xe->drm, ret);
1111	}
1112}
1113
1114static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1115{
1116	struct ttm_operation_ctx ctx = {
1117		.interruptible = false
1118	};
1119
1120	if (ttm_bo->ttm) {
1121		struct xe_ttm_tt *xe_tt =
1122			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1123
1124		if (xe_tt->purgeable)
1125			xe_ttm_bo_purge(ttm_bo, &ctx);
1126	}
1127}
1128
1129const struct ttm_device_funcs xe_ttm_funcs = {
1130	.ttm_tt_create = xe_ttm_tt_create,
1131	.ttm_tt_populate = xe_ttm_tt_populate,
1132	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1133	.ttm_tt_destroy = xe_ttm_tt_destroy,
1134	.evict_flags = xe_evict_flags,
1135	.move = xe_bo_move,
1136	.io_mem_reserve = xe_ttm_io_mem_reserve,
1137	.io_mem_pfn = xe_ttm_io_mem_pfn,
1138	.release_notify = xe_ttm_bo_release_notify,
1139	.eviction_valuable = ttm_bo_eviction_valuable,
1140	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1141	.swap_notify = xe_ttm_bo_swap_notify,
1142};
1143
1144static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1145{
1146	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1147	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1148
1149	if (bo->ttm.base.import_attach)
1150		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1151	drm_gem_object_release(&bo->ttm.base);
1152
1153	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1154
1155	if (bo->ggtt_node && bo->ggtt_node->base.size)
1156		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1157
1158#ifdef CONFIG_PROC_FS
1159	if (bo->client)
1160		xe_drm_client_remove_bo(bo);
1161#endif
1162
1163	if (bo->vm && xe_bo_is_user(bo))
1164		xe_vm_put(bo->vm);
1165
1166	mutex_lock(&xe->mem_access.vram_userfault.lock);
1167	if (!list_empty(&bo->vram_userfault_link))
1168		list_del(&bo->vram_userfault_link);
1169	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1170
1171	kfree(bo);
1172}
1173
1174static void xe_gem_object_free(struct drm_gem_object *obj)
1175{
1176	/* Our BO reference counting scheme works as follows:
1177	 *
1178	 * The gem object kref is typically used throughout the driver,
1179	 * and the gem object holds a ttm_buffer_object refcount, so
1180	 * that when the last gem object reference is put, which is when
1181	 * we end up in this function, we put also that ttm_buffer_object
1182	 * refcount. Anything using gem interfaces is then no longer
1183	 * allowed to access the object in a way that requires a gem
1184	 * refcount, including locking the object.
1185	 *
1186	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1187	 * refcount directly if needed.
1188	 */
1189	__xe_bo_vunmap(gem_to_xe_bo(obj));
1190	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1191}
1192
1193static void xe_gem_object_close(struct drm_gem_object *obj,
1194				struct drm_file *file_priv)
1195{
1196	struct xe_bo *bo = gem_to_xe_bo(obj);
1197
1198	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1199		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1200
1201		xe_bo_lock(bo, false);
1202		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1203		xe_bo_unlock(bo);
1204	}
1205}
1206
1207static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1208{
1209	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1210	struct drm_device *ddev = tbo->base.dev;
1211	struct xe_device *xe = to_xe_device(ddev);
1212	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1213	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1214	vm_fault_t ret;
1215	int idx;
1216
1217	if (needs_rpm)
1218		xe_pm_runtime_get(xe);
1219
1220	ret = ttm_bo_vm_reserve(tbo, vmf);
1221	if (ret)
1222		goto out;
1223
1224	if (drm_dev_enter(ddev, &idx)) {
1225		trace_xe_bo_cpu_fault(bo);
1226
1227		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1228					       TTM_BO_VM_NUM_PREFAULT);
1229		drm_dev_exit(idx);
1230	} else {
1231		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1232	}
1233
1234	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1235		goto out;
1236	/*
1237	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1238	 */
1239	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1240		mutex_lock(&xe->mem_access.vram_userfault.lock);
1241		if (list_empty(&bo->vram_userfault_link))
1242			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1243		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1244	}
1245
1246	dma_resv_unlock(tbo->base.resv);
1247out:
1248	if (needs_rpm)
1249		xe_pm_runtime_put(xe);
1250
1251	return ret;
1252}
1253
1254static const struct vm_operations_struct xe_gem_vm_ops = {
1255	.fault = xe_gem_fault,
1256	.open = ttm_bo_vm_open,
1257	.close = ttm_bo_vm_close,
1258	.access = ttm_bo_vm_access
1259};
1260
1261static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1262	.free = xe_gem_object_free,
1263	.close = xe_gem_object_close,
1264	.mmap = drm_gem_ttm_mmap,
1265	.export = xe_gem_prime_export,
1266	.vm_ops = &xe_gem_vm_ops,
1267};
1268
1269/**
1270 * xe_bo_alloc - Allocate storage for a struct xe_bo
1271 *
1272 * This funcition is intended to allocate storage to be used for input
1273 * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1274 * created is needed before the call to __xe_bo_create_locked().
1275 * If __xe_bo_create_locked ends up never to be called, then the
1276 * storage allocated with this function needs to be freed using
1277 * xe_bo_free().
1278 *
1279 * Return: A pointer to an uninitialized struct xe_bo on success,
1280 * ERR_PTR(-ENOMEM) on error.
1281 */
1282struct xe_bo *xe_bo_alloc(void)
1283{
1284	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1285
1286	if (!bo)
1287		return ERR_PTR(-ENOMEM);
1288
1289	return bo;
1290}
1291
1292/**
1293 * xe_bo_free - Free storage allocated using xe_bo_alloc()
1294 * @bo: The buffer object storage.
1295 *
1296 * Refer to xe_bo_alloc() documentation for valid use-cases.
1297 */
1298void xe_bo_free(struct xe_bo *bo)
1299{
1300	kfree(bo);
1301}
1302
1303struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1304				     struct xe_tile *tile, struct dma_resv *resv,
1305				     struct ttm_lru_bulk_move *bulk, size_t size,
1306				     u16 cpu_caching, enum ttm_bo_type type,
1307				     u32 flags)
1308{
1309	struct ttm_operation_ctx ctx = {
1310		.interruptible = true,
1311		.no_wait_gpu = false,
1312	};
1313	struct ttm_placement *placement;
1314	uint32_t alignment;
1315	size_t aligned_size;
1316	int err;
1317
1318	/* Only kernel objects should set GT */
1319	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1320
1321	if (XE_WARN_ON(!size)) {
1322		xe_bo_free(bo);
1323		return ERR_PTR(-EINVAL);
1324	}
1325
1326	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1327	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1328	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1329	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1330		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1331
1332		aligned_size = ALIGN(size, align);
1333		if (type != ttm_bo_type_device)
1334			size = ALIGN(size, align);
1335		flags |= XE_BO_FLAG_INTERNAL_64K;
1336		alignment = align >> PAGE_SHIFT;
1337	} else {
1338		aligned_size = ALIGN(size, SZ_4K);
1339		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1340		alignment = SZ_4K >> PAGE_SHIFT;
1341	}
1342
1343	if (type == ttm_bo_type_device && aligned_size != size)
1344		return ERR_PTR(-EINVAL);
1345
1346	if (!bo) {
1347		bo = xe_bo_alloc();
1348		if (IS_ERR(bo))
1349			return bo;
1350	}
1351
1352	bo->ccs_cleared = false;
1353	bo->tile = tile;
1354	bo->size = size;
1355	bo->flags = flags;
1356	bo->cpu_caching = cpu_caching;
1357	bo->ttm.base.funcs = &xe_gem_object_funcs;
1358	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1359	INIT_LIST_HEAD(&bo->pinned_link);
1360#ifdef CONFIG_PROC_FS
1361	INIT_LIST_HEAD(&bo->client_link);
1362#endif
1363	INIT_LIST_HEAD(&bo->vram_userfault_link);
1364
1365	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1366
1367	if (resv) {
1368		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1369		ctx.resv = resv;
1370	}
1371
1372	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1373		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1374		if (WARN_ON(err)) {
1375			xe_ttm_bo_destroy(&bo->ttm);
1376			return ERR_PTR(err);
1377		}
1378	}
1379
1380	/* Defer populating type_sg bos */
1381	placement = (type == ttm_bo_type_sg ||
1382		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1383		&bo->placement;
1384	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1385				   placement, alignment,
1386				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1387	if (err)
1388		return ERR_PTR(err);
1389
1390	/*
1391	 * The VRAM pages underneath are potentially still being accessed by the
1392	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1393	 * sure to add any corresponding move/clear fences into the objects
1394	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1395	 *
1396	 * For KMD internal buffers we don't care about GPU clearing, however we
1397	 * still need to handle async evictions, where the VRAM is still being
1398	 * accessed by the GPU. Most internal callers are not expecting this,
1399	 * since they are missing the required synchronisation before accessing
1400	 * the memory. To keep things simple just sync wait any kernel fences
1401	 * here, if the buffer is designated KMD internal.
1402	 *
1403	 * For normal userspace objects we should already have the required
1404	 * pipelining or sync waiting elsewhere, since we already have to deal
1405	 * with things like async GPU clearing.
1406	 */
1407	if (type == ttm_bo_type_kernel) {
1408		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1409						     DMA_RESV_USAGE_KERNEL,
1410						     ctx.interruptible,
1411						     MAX_SCHEDULE_TIMEOUT);
1412
1413		if (timeout < 0) {
1414			if (!resv)
1415				dma_resv_unlock(bo->ttm.base.resv);
1416			xe_bo_put(bo);
1417			return ERR_PTR(timeout);
1418		}
1419	}
1420
1421	bo->created = true;
1422	if (bulk)
1423		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1424	else
1425		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1426
1427	return bo;
1428}
1429
1430static int __xe_bo_fixed_placement(struct xe_device *xe,
1431				   struct xe_bo *bo,
1432				   u32 flags,
1433				   u64 start, u64 end, u64 size)
1434{
1435	struct ttm_place *place = bo->placements;
1436
1437	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1438		return -EINVAL;
1439
1440	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1441	place->fpfn = start >> PAGE_SHIFT;
1442	place->lpfn = end >> PAGE_SHIFT;
1443
1444	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1445	case XE_BO_FLAG_VRAM0:
1446		place->mem_type = XE_PL_VRAM0;
1447		break;
1448	case XE_BO_FLAG_VRAM1:
1449		place->mem_type = XE_PL_VRAM1;
1450		break;
1451	case XE_BO_FLAG_STOLEN:
1452		place->mem_type = XE_PL_STOLEN;
1453		break;
1454
1455	default:
1456		/* 0 or multiple of the above set */
1457		return -EINVAL;
1458	}
1459
1460	bo->placement = (struct ttm_placement) {
1461		.num_placement = 1,
1462		.placement = place,
1463	};
1464
1465	return 0;
1466}
1467
1468static struct xe_bo *
1469__xe_bo_create_locked(struct xe_device *xe,
1470		      struct xe_tile *tile, struct xe_vm *vm,
1471		      size_t size, u64 start, u64 end,
1472		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
1473		      u64 alignment)
1474{
1475	struct xe_bo *bo = NULL;
1476	int err;
1477
1478	if (vm)
1479		xe_vm_assert_held(vm);
1480
1481	if (start || end != ~0ULL) {
1482		bo = xe_bo_alloc();
1483		if (IS_ERR(bo))
1484			return bo;
1485
1486		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1487		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1488		if (err) {
1489			xe_bo_free(bo);
1490			return ERR_PTR(err);
1491		}
1492	}
1493
1494	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1495				    vm && !xe_vm_in_fault_mode(vm) &&
1496				    flags & XE_BO_FLAG_USER ?
1497				    &vm->lru_bulk_move : NULL, size,
1498				    cpu_caching, type, flags);
1499	if (IS_ERR(bo))
1500		return bo;
1501
1502	bo->min_align = alignment;
1503
1504	/*
1505	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1506	 * to ensure the shared resv doesn't disappear under the bo, the bo
1507	 * will keep a reference to the vm, and avoid circular references
1508	 * by having all the vm's bo refereferences released at vm close
1509	 * time.
1510	 */
1511	if (vm && xe_bo_is_user(bo))
1512		xe_vm_get(vm);
1513	bo->vm = vm;
1514
1515	if (bo->flags & XE_BO_FLAG_GGTT) {
1516		if (!tile && flags & XE_BO_FLAG_STOLEN)
1517			tile = xe_device_get_root_tile(xe);
1518
1519		xe_assert(xe, tile);
1520
1521		if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
1522			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1523						   start + bo->size, U64_MAX);
1524		} else {
1525			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1526		}
1527		if (err)
1528			goto err_unlock_put_bo;
1529	}
1530
1531	return bo;
1532
1533err_unlock_put_bo:
1534	__xe_bo_unset_bulk_move(bo);
1535	xe_bo_unlock_vm_held(bo);
1536	xe_bo_put(bo);
1537	return ERR_PTR(err);
1538}
1539
1540struct xe_bo *
1541xe_bo_create_locked_range(struct xe_device *xe,
1542			  struct xe_tile *tile, struct xe_vm *vm,
1543			  size_t size, u64 start, u64 end,
1544			  enum ttm_bo_type type, u32 flags, u64 alignment)
1545{
1546	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
1547				     flags, alignment);
1548}
1549
1550struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1551				  struct xe_vm *vm, size_t size,
1552				  enum ttm_bo_type type, u32 flags)
1553{
1554	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
1555				     flags, 0);
1556}
1557
1558struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1559				struct xe_vm *vm, size_t size,
1560				u16 cpu_caching,
1561				u32 flags)
1562{
1563	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1564						 cpu_caching, ttm_bo_type_device,
1565						 flags | XE_BO_FLAG_USER, 0);
1566	if (!IS_ERR(bo))
1567		xe_bo_unlock_vm_held(bo);
1568
1569	return bo;
1570}
1571
1572struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1573			   struct xe_vm *vm, size_t size,
1574			   enum ttm_bo_type type, u32 flags)
1575{
1576	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1577
1578	if (!IS_ERR(bo))
1579		xe_bo_unlock_vm_held(bo);
1580
1581	return bo;
1582}
1583
1584struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1585				      struct xe_vm *vm,
1586				      size_t size, u64 offset,
1587				      enum ttm_bo_type type, u32 flags)
1588{
1589	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
1590					       type, flags, 0);
1591}
1592
1593struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
1594					      struct xe_tile *tile,
1595					      struct xe_vm *vm,
1596					      size_t size, u64 offset,
1597					      enum ttm_bo_type type, u32 flags,
1598					      u64 alignment)
1599{
1600	struct xe_bo *bo;
1601	int err;
1602	u64 start = offset == ~0ull ? 0 : offset;
1603	u64 end = offset == ~0ull ? offset : start + size;
1604
1605	if (flags & XE_BO_FLAG_STOLEN &&
1606	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1607		flags |= XE_BO_FLAG_GGTT;
1608
1609	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1610				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
1611				       alignment);
1612	if (IS_ERR(bo))
1613		return bo;
1614
1615	err = xe_bo_pin(bo);
1616	if (err)
1617		goto err_put;
1618
1619	err = xe_bo_vmap(bo);
1620	if (err)
1621		goto err_unpin;
1622
1623	xe_bo_unlock_vm_held(bo);
1624
1625	return bo;
1626
1627err_unpin:
1628	xe_bo_unpin(bo);
1629err_put:
1630	xe_bo_unlock_vm_held(bo);
1631	xe_bo_put(bo);
1632	return ERR_PTR(err);
1633}
1634
1635struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1636				   struct xe_vm *vm, size_t size,
1637				   enum ttm_bo_type type, u32 flags)
1638{
1639	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1640}
1641
1642struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1643				     const void *data, size_t size,
1644				     enum ttm_bo_type type, u32 flags)
1645{
1646	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1647						ALIGN(size, PAGE_SIZE),
1648						type, flags);
1649	if (IS_ERR(bo))
1650		return bo;
1651
1652	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1653
1654	return bo;
1655}
1656
1657static void __xe_bo_unpin_map_no_vm(void *arg)
1658{
1659	xe_bo_unpin_map_no_vm(arg);
1660}
1661
1662struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1663					   size_t size, u32 flags)
1664{
1665	struct xe_bo *bo;
1666	int ret;
1667
1668	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1669	if (IS_ERR(bo))
1670		return bo;
1671
1672	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
1673	if (ret)
1674		return ERR_PTR(ret);
1675
1676	return bo;
1677}
1678
1679struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1680					     const void *data, size_t size, u32 flags)
1681{
1682	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1683
1684	if (IS_ERR(bo))
1685		return bo;
1686
1687	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1688
1689	return bo;
1690}
1691
1692/**
1693 * xe_managed_bo_reinit_in_vram
1694 * @xe: xe device
1695 * @tile: Tile where the new buffer will be created
1696 * @src: Managed buffer object allocated in system memory
1697 *
1698 * Replace a managed src buffer object allocated in system memory with a new
1699 * one allocated in vram, copying the data between them.
1700 * Buffer object in VRAM is not going to have the same GGTT address, the caller
1701 * is responsible for making sure that any old references to it are updated.
1702 *
1703 * Returns 0 for success, negative error code otherwise.
1704 */
1705int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1706{
1707	struct xe_bo *bo;
1708	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
1709
1710	dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
1711
1712	xe_assert(xe, IS_DGFX(xe));
1713	xe_assert(xe, !(*src)->vmap.is_iomem);
1714
1715	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
1716					    (*src)->size, dst_flags);
1717	if (IS_ERR(bo))
1718		return PTR_ERR(bo);
1719
1720	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
1721	*src = bo;
1722
1723	return 0;
1724}
1725
1726/*
1727 * XXX: This is in the VM bind data path, likely should calculate this once and
1728 * store, with a recalculation if the BO is moved.
1729 */
1730uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1731{
1732	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1733
1734	if (res->mem_type == XE_PL_STOLEN)
1735		return xe_ttm_stolen_gpu_offset(xe);
1736
1737	return res_to_mem_region(res)->dpa_base;
1738}
1739
1740/**
1741 * xe_bo_pin_external - pin an external BO
1742 * @bo: buffer object to be pinned
1743 *
1744 * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1745 * BO. Unique call compared to xe_bo_pin as this function has it own set of
1746 * asserts and code to ensure evict / restore on suspend / resume.
1747 *
1748 * Returns 0 for success, negative error code otherwise.
1749 */
1750int xe_bo_pin_external(struct xe_bo *bo)
1751{
1752	struct xe_device *xe = xe_bo_device(bo);
1753	int err;
1754
1755	xe_assert(xe, !bo->vm);
1756	xe_assert(xe, xe_bo_is_user(bo));
1757
1758	if (!xe_bo_is_pinned(bo)) {
1759		err = xe_bo_validate(bo, NULL, false);
1760		if (err)
1761			return err;
1762
1763		if (xe_bo_is_vram(bo)) {
1764			spin_lock(&xe->pinned.lock);
1765			list_add_tail(&bo->pinned_link,
1766				      &xe->pinned.external_vram);
1767			spin_unlock(&xe->pinned.lock);
1768		}
1769	}
1770
1771	ttm_bo_pin(&bo->ttm);
1772
1773	/*
1774	 * FIXME: If we always use the reserve / unreserve functions for locking
1775	 * we do not need this.
1776	 */
1777	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1778
1779	return 0;
1780}
1781
1782int xe_bo_pin(struct xe_bo *bo)
1783{
1784	struct ttm_place *place = &bo->placements[0];
1785	struct xe_device *xe = xe_bo_device(bo);
1786	int err;
1787
1788	/* We currently don't expect user BO to be pinned */
1789	xe_assert(xe, !xe_bo_is_user(bo));
1790
1791	/* Pinned object must be in GGTT or have pinned flag */
1792	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
1793				   XE_BO_FLAG_GGTT));
1794
1795	/*
1796	 * No reason we can't support pinning imported dma-bufs we just don't
1797	 * expect to pin an imported dma-buf.
1798	 */
1799	xe_assert(xe, !bo->ttm.base.import_attach);
1800
1801	/* We only expect at most 1 pin */
1802	xe_assert(xe, !xe_bo_is_pinned(bo));
1803
1804	err = xe_bo_validate(bo, NULL, false);
1805	if (err)
1806		return err;
1807
1808	/*
1809	 * For pinned objects in on DGFX, which are also in vram, we expect
1810	 * these to be in contiguous VRAM memory. Required eviction / restore
1811	 * during suspend / resume (force restore to same physical address).
1812	 */
1813	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1814	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1815		if (mem_type_is_vram(place->mem_type)) {
1816			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1817
1818			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1819				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1820			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1821		}
1822	}
1823
1824	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1825		spin_lock(&xe->pinned.lock);
1826		list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1827		spin_unlock(&xe->pinned.lock);
1828	}
1829
1830	ttm_bo_pin(&bo->ttm);
1831
1832	/*
1833	 * FIXME: If we always use the reserve / unreserve functions for locking
1834	 * we do not need this.
1835	 */
1836	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1837
1838	return 0;
1839}
1840
1841/**
1842 * xe_bo_unpin_external - unpin an external BO
1843 * @bo: buffer object to be unpinned
1844 *
1845 * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1846 * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1847 * asserts and code to ensure evict / restore on suspend / resume.
1848 *
1849 * Returns 0 for success, negative error code otherwise.
1850 */
1851void xe_bo_unpin_external(struct xe_bo *bo)
1852{
1853	struct xe_device *xe = xe_bo_device(bo);
1854
1855	xe_assert(xe, !bo->vm);
1856	xe_assert(xe, xe_bo_is_pinned(bo));
1857	xe_assert(xe, xe_bo_is_user(bo));
1858
1859	spin_lock(&xe->pinned.lock);
1860	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
1861		list_del_init(&bo->pinned_link);
1862	spin_unlock(&xe->pinned.lock);
1863
1864	ttm_bo_unpin(&bo->ttm);
1865
1866	/*
1867	 * FIXME: If we always use the reserve / unreserve functions for locking
1868	 * we do not need this.
1869	 */
1870	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1871}
1872
1873void xe_bo_unpin(struct xe_bo *bo)
1874{
1875	struct ttm_place *place = &bo->placements[0];
1876	struct xe_device *xe = xe_bo_device(bo);
1877
1878	xe_assert(xe, !bo->ttm.base.import_attach);
1879	xe_assert(xe, xe_bo_is_pinned(bo));
1880
1881	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1882		spin_lock(&xe->pinned.lock);
1883		xe_assert(xe, !list_empty(&bo->pinned_link));
1884		list_del_init(&bo->pinned_link);
1885		spin_unlock(&xe->pinned.lock);
1886	}
1887	ttm_bo_unpin(&bo->ttm);
1888}
1889
1890/**
1891 * xe_bo_validate() - Make sure the bo is in an allowed placement
1892 * @bo: The bo,
1893 * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1894 *      NULL. Used together with @allow_res_evict.
1895 * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1896 *                   reservation object.
1897 *
1898 * Make sure the bo is in allowed placement, migrating it if necessary. If
1899 * needed, other bos will be evicted. If bos selected for eviction shares
1900 * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1901 * set to true, otherwise they will be bypassed.
1902 *
1903 * Return: 0 on success, negative error code on failure. May return
1904 * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1905 */
1906int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1907{
1908	struct ttm_operation_ctx ctx = {
1909		.interruptible = true,
1910		.no_wait_gpu = false,
1911	};
1912
1913	if (vm) {
1914		lockdep_assert_held(&vm->lock);
1915		xe_vm_assert_held(vm);
1916
1917		ctx.allow_res_evict = allow_res_evict;
1918		ctx.resv = xe_vm_resv(vm);
1919	}
1920
1921	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1922}
1923
1924bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1925{
1926	if (bo->destroy == &xe_ttm_bo_destroy)
1927		return true;
1928
1929	return false;
1930}
1931
1932/*
1933 * Resolve a BO address. There is no assert to check if the proper lock is held
1934 * so it should only be used in cases where it is not fatal to get the wrong
1935 * address, such as printing debug information, but not in cases where memory is
1936 * written based on this result.
1937 */
1938dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1939{
1940	struct xe_device *xe = xe_bo_device(bo);
1941	struct xe_res_cursor cur;
1942	u64 page;
1943
1944	xe_assert(xe, page_size <= PAGE_SIZE);
1945	page = offset >> PAGE_SHIFT;
1946	offset &= (PAGE_SIZE - 1);
1947
1948	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1949		xe_assert(xe, bo->ttm.ttm);
1950
1951		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1952				page_size, &cur);
1953		return xe_res_dma(&cur) + offset;
1954	} else {
1955		struct xe_res_cursor cur;
1956
1957		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1958			     page_size, &cur);
1959		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1960	}
1961}
1962
1963dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1964{
1965	if (!READ_ONCE(bo->ttm.pin_count))
1966		xe_bo_assert_held(bo);
1967	return __xe_bo_addr(bo, offset, page_size);
1968}
1969
1970int xe_bo_vmap(struct xe_bo *bo)
1971{
1972	void *virtual;
1973	bool is_iomem;
1974	int ret;
1975
1976	xe_bo_assert_held(bo);
1977
1978	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
1979		return -EINVAL;
1980
1981	if (!iosys_map_is_null(&bo->vmap))
1982		return 0;
1983
1984	/*
1985	 * We use this more or less deprecated interface for now since
1986	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1987	 * single page bos, which is done here.
1988	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1989	 * to use struct iosys_map.
1990	 */
1991	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1992	if (ret)
1993		return ret;
1994
1995	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1996	if (is_iomem)
1997		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1998	else
1999		iosys_map_set_vaddr(&bo->vmap, virtual);
2000
2001	return 0;
2002}
2003
2004static void __xe_bo_vunmap(struct xe_bo *bo)
2005{
2006	if (!iosys_map_is_null(&bo->vmap)) {
2007		iosys_map_clear(&bo->vmap);
2008		ttm_bo_kunmap(&bo->kmap);
2009	}
2010}
2011
2012void xe_bo_vunmap(struct xe_bo *bo)
2013{
2014	xe_bo_assert_held(bo);
2015	__xe_bo_vunmap(bo);
2016}
2017
2018int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2019			struct drm_file *file)
2020{
2021	struct xe_device *xe = to_xe_device(dev);
2022	struct xe_file *xef = to_xe_file(file);
2023	struct drm_xe_gem_create *args = data;
2024	struct xe_vm *vm = NULL;
2025	struct xe_bo *bo;
2026	unsigned int bo_flags;
2027	u32 handle;
2028	int err;
2029
2030	if (XE_IOCTL_DBG(xe, args->extensions) ||
2031	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2032	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2033		return -EINVAL;
2034
2035	/* at least one valid memory placement must be specified */
2036	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2037			 !args->placement))
2038		return -EINVAL;
2039
2040	if (XE_IOCTL_DBG(xe, args->flags &
2041			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2042			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2043			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2044		return -EINVAL;
2045
2046	if (XE_IOCTL_DBG(xe, args->handle))
2047		return -EINVAL;
2048
2049	if (XE_IOCTL_DBG(xe, !args->size))
2050		return -EINVAL;
2051
2052	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2053		return -EINVAL;
2054
2055	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2056		return -EINVAL;
2057
2058	bo_flags = 0;
2059	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2060		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2061
2062	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2063		bo_flags |= XE_BO_FLAG_SCANOUT;
2064
2065	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2066
2067	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2068	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2069	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2070	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2071	    IS_ALIGNED(args->size, SZ_64K))
2072		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2073
2074	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2075		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2076			return -EINVAL;
2077
2078		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2079	}
2080
2081	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2082			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2083		return -EINVAL;
2084
2085	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2086			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2087		return -EINVAL;
2088
2089	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2090			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2091		return -EINVAL;
2092
2093	if (args->vm_id) {
2094		vm = xe_vm_lookup(xef, args->vm_id);
2095		if (XE_IOCTL_DBG(xe, !vm))
2096			return -ENOENT;
2097		err = xe_vm_lock(vm, true);
2098		if (err)
2099			goto out_vm;
2100	}
2101
2102	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2103			       bo_flags);
2104
2105	if (vm)
2106		xe_vm_unlock(vm);
2107
2108	if (IS_ERR(bo)) {
2109		err = PTR_ERR(bo);
2110		goto out_vm;
2111	}
2112
2113	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2114	if (err)
2115		goto out_bulk;
2116
2117	args->handle = handle;
2118	goto out_put;
2119
2120out_bulk:
2121	if (vm && !xe_vm_in_fault_mode(vm)) {
2122		xe_vm_lock(vm, false);
2123		__xe_bo_unset_bulk_move(bo);
2124		xe_vm_unlock(vm);
2125	}
2126out_put:
2127	xe_bo_put(bo);
2128out_vm:
2129	if (vm)
2130		xe_vm_put(vm);
2131
2132	return err;
2133}
2134
2135int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2136			     struct drm_file *file)
2137{
2138	struct xe_device *xe = to_xe_device(dev);
2139	struct drm_xe_gem_mmap_offset *args = data;
2140	struct drm_gem_object *gem_obj;
2141
2142	if (XE_IOCTL_DBG(xe, args->extensions) ||
2143	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2144		return -EINVAL;
2145
2146	if (XE_IOCTL_DBG(xe, args->flags))
2147		return -EINVAL;
2148
2149	gem_obj = drm_gem_object_lookup(file, args->handle);
2150	if (XE_IOCTL_DBG(xe, !gem_obj))
2151		return -ENOENT;
2152
2153	/* The mmap offset was set up at BO allocation time. */
2154	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2155
2156	xe_bo_put(gem_to_xe_bo(gem_obj));
2157	return 0;
2158}
2159
2160/**
2161 * xe_bo_lock() - Lock the buffer object's dma_resv object
2162 * @bo: The struct xe_bo whose lock is to be taken
2163 * @intr: Whether to perform any wait interruptible
2164 *
2165 * Locks the buffer object's dma_resv object. If the buffer object is
2166 * pointing to a shared dma_resv object, that shared lock is locked.
2167 *
2168 * Return: 0 on success, -EINTR if @intr is true and the wait for a
2169 * contended lock was interrupted. If @intr is set to false, the
2170 * function always returns 0.
2171 */
2172int xe_bo_lock(struct xe_bo *bo, bool intr)
2173{
2174	if (intr)
2175		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2176
2177	dma_resv_lock(bo->ttm.base.resv, NULL);
2178
2179	return 0;
2180}
2181
2182/**
2183 * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2184 * @bo: The struct xe_bo whose lock is to be released.
2185 *
2186 * Unlock a buffer object lock that was locked by xe_bo_lock().
2187 */
2188void xe_bo_unlock(struct xe_bo *bo)
2189{
2190	dma_resv_unlock(bo->ttm.base.resv);
2191}
2192
2193/**
2194 * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2195 * @bo: The buffer object to migrate
2196 * @mem_type: The TTM memory type intended to migrate to
2197 *
2198 * Check whether the buffer object supports migration to the
2199 * given memory type. Note that pinning may affect the ability to migrate as
2200 * returned by this function.
2201 *
2202 * This function is primarily intended as a helper for checking the
2203 * possibility to migrate buffer objects and can be called without
2204 * the object lock held.
2205 *
2206 * Return: true if migration is possible, false otherwise.
2207 */
2208bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2209{
2210	unsigned int cur_place;
2211
2212	if (bo->ttm.type == ttm_bo_type_kernel)
2213		return true;
2214
2215	if (bo->ttm.type == ttm_bo_type_sg)
2216		return false;
2217
2218	for (cur_place = 0; cur_place < bo->placement.num_placement;
2219	     cur_place++) {
2220		if (bo->placements[cur_place].mem_type == mem_type)
2221			return true;
2222	}
2223
2224	return false;
2225}
2226
2227static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2228{
2229	memset(place, 0, sizeof(*place));
2230	place->mem_type = mem_type;
2231}
2232
2233/**
2234 * xe_bo_migrate - Migrate an object to the desired region id
2235 * @bo: The buffer object to migrate.
2236 * @mem_type: The TTM region type to migrate to.
2237 *
2238 * Attempt to migrate the buffer object to the desired memory region. The
2239 * buffer object may not be pinned, and must be locked.
2240 * On successful completion, the object memory type will be updated,
2241 * but an async migration task may not have completed yet, and to
2242 * accomplish that, the object's kernel fences must be signaled with
2243 * the object lock held.
2244 *
2245 * Return: 0 on success. Negative error code on failure. In particular may
2246 * return -EINTR or -ERESTARTSYS if signal pending.
2247 */
2248int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2249{
2250	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2251	struct ttm_operation_ctx ctx = {
2252		.interruptible = true,
2253		.no_wait_gpu = false,
2254	};
2255	struct ttm_placement placement;
2256	struct ttm_place requested;
2257
2258	xe_bo_assert_held(bo);
2259
2260	if (bo->ttm.resource->mem_type == mem_type)
2261		return 0;
2262
2263	if (xe_bo_is_pinned(bo))
2264		return -EBUSY;
2265
2266	if (!xe_bo_can_migrate(bo, mem_type))
2267		return -EINVAL;
2268
2269	xe_place_from_ttm_type(mem_type, &requested);
2270	placement.num_placement = 1;
2271	placement.placement = &requested;
2272
2273	/*
2274	 * Stolen needs to be handled like below VRAM handling if we ever need
2275	 * to support it.
2276	 */
2277	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2278
2279	if (mem_type_is_vram(mem_type)) {
2280		u32 c = 0;
2281
2282		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2283	}
2284
2285	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2286}
2287
2288/**
2289 * xe_bo_evict - Evict an object to evict placement
2290 * @bo: The buffer object to migrate.
2291 * @force_alloc: Set force_alloc in ttm_operation_ctx
2292 *
2293 * On successful completion, the object memory will be moved to evict
2294 * placement. Ths function blocks until the object has been fully moved.
2295 *
2296 * Return: 0 on success. Negative error code on failure.
2297 */
2298int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2299{
2300	struct ttm_operation_ctx ctx = {
2301		.interruptible = false,
2302		.no_wait_gpu = false,
2303		.force_alloc = force_alloc,
2304	};
2305	struct ttm_placement placement;
2306	int ret;
2307
2308	xe_evict_flags(&bo->ttm, &placement);
2309	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2310	if (ret)
2311		return ret;
2312
2313	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2314			      false, MAX_SCHEDULE_TIMEOUT);
2315
2316	return 0;
2317}
2318
2319/**
2320 * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2321 * placed in system memory.
2322 * @bo: The xe_bo
2323 *
2324 * Return: true if extra pages need to be allocated, false otherwise.
2325 */
2326bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2327{
2328	struct xe_device *xe = xe_bo_device(bo);
2329
2330	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2331		return false;
2332
2333	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2334		return false;
2335
2336	/* On discrete GPUs, if the GPU can access this buffer from
2337	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2338	 * can't be used since there's no CCS storage associated with
2339	 * non-VRAM addresses.
2340	 */
2341	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2342		return false;
2343
2344	return true;
2345}
2346
2347/**
2348 * __xe_bo_release_dummy() - Dummy kref release function
2349 * @kref: The embedded struct kref.
2350 *
2351 * Dummy release function for xe_bo_put_deferred(). Keep off.
2352 */
2353void __xe_bo_release_dummy(struct kref *kref)
2354{
2355}
2356
2357/**
2358 * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2359 * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2360 *
2361 * Puts all bos whose put was deferred by xe_bo_put_deferred().
2362 * The @deferred list can be either an onstack local list or a global
2363 * shared list used by a workqueue.
2364 */
2365void xe_bo_put_commit(struct llist_head *deferred)
2366{
2367	struct llist_node *freed;
2368	struct xe_bo *bo, *next;
2369
2370	if (!deferred)
2371		return;
2372
2373	freed = llist_del_all(deferred);
2374	if (!freed)
2375		return;
2376
2377	llist_for_each_entry_safe(bo, next, freed, freed)
2378		drm_gem_object_free(&bo->ttm.base.refcount);
2379}
2380
2381void xe_bo_put(struct xe_bo *bo)
2382{
2383	might_sleep();
2384	if (bo) {
2385#ifdef CONFIG_PROC_FS
2386		if (bo->client)
2387			might_lock(&bo->client->bos_lock);
2388#endif
2389		if (bo->ggtt_node && bo->ggtt_node->ggtt)
2390			might_lock(&bo->ggtt_node->ggtt->lock);
2391		drm_gem_object_put(&bo->ttm.base);
2392	}
2393}
2394
2395/**
2396 * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2397 * @file_priv: ...
2398 * @dev: ...
2399 * @args: ...
2400 *
2401 * See dumb_create() hook in include/drm/drm_drv.h
2402 *
2403 * Return: ...
2404 */
2405int xe_bo_dumb_create(struct drm_file *file_priv,
2406		      struct drm_device *dev,
2407		      struct drm_mode_create_dumb *args)
2408{
2409	struct xe_device *xe = to_xe_device(dev);
2410	struct xe_bo *bo;
2411	uint32_t handle;
2412	int cpp = DIV_ROUND_UP(args->bpp, 8);
2413	int err;
2414	u32 page_size = max_t(u32, PAGE_SIZE,
2415		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2416
2417	args->pitch = ALIGN(args->width * cpp, 64);
2418	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2419			   page_size);
2420
2421	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2422			       DRM_XE_GEM_CPU_CACHING_WC,
2423			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2424			       XE_BO_FLAG_SCANOUT |
2425			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
2426	if (IS_ERR(bo))
2427		return PTR_ERR(bo);
2428
2429	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2430	/* drop reference from allocate - handle holds it now */
2431	drm_gem_object_put(&bo->ttm.base);
2432	if (!err)
2433		args->handle = handle;
2434	return err;
2435}
2436
2437void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2438{
2439	struct ttm_buffer_object *tbo = &bo->ttm;
2440	struct ttm_device *bdev = tbo->bdev;
2441
2442	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2443
2444	list_del_init(&bo->vram_userfault_link);
2445}
2446
2447#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2448#include "tests/xe_bo.c"
2449#endif