Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2021 Intel Corporation
   4 */
   5
   6#include "xe_bo.h"
   7
   8#include <linux/dma-buf.h>
   9
  10#include <drm/drm_drv.h>
  11#include <drm/drm_gem_ttm_helper.h>
  12#include <drm/drm_managed.h>
  13#include <drm/ttm/ttm_device.h>
  14#include <drm/ttm/ttm_placement.h>
  15#include <drm/ttm/ttm_tt.h>
  16#include <drm/xe_drm.h>
  17
  18#include "xe_device.h"
  19#include "xe_dma_buf.h"
  20#include "xe_drm_client.h"
  21#include "xe_ggtt.h"
  22#include "xe_gt.h"
  23#include "xe_map.h"
  24#include "xe_migrate.h"
  25#include "xe_preempt_fence.h"
  26#include "xe_res_cursor.h"
  27#include "xe_trace.h"
  28#include "xe_ttm_stolen_mgr.h"
  29#include "xe_vm.h"
  30
  31const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
  32	[XE_PL_SYSTEM] = "system",
  33	[XE_PL_TT] = "gtt",
  34	[XE_PL_VRAM0] = "vram0",
  35	[XE_PL_VRAM1] = "vram1",
  36	[XE_PL_STOLEN] = "stolen"
  37};
  38
  39static const struct ttm_place sys_placement_flags = {
  40	.fpfn = 0,
  41	.lpfn = 0,
  42	.mem_type = XE_PL_SYSTEM,
  43	.flags = 0,
  44};
  45
  46static struct ttm_placement sys_placement = {
  47	.num_placement = 1,
  48	.placement = &sys_placement_flags,
  49	.num_busy_placement = 1,
  50	.busy_placement = &sys_placement_flags,
  51};
  52
  53static const struct ttm_place tt_placement_flags = {
  54	.fpfn = 0,
  55	.lpfn = 0,
  56	.mem_type = XE_PL_TT,
  57	.flags = 0,
  58};
  59
  60static struct ttm_placement tt_placement = {
  61	.num_placement = 1,
  62	.placement = &tt_placement_flags,
  63	.num_busy_placement = 1,
  64	.busy_placement = &sys_placement_flags,
  65};
  66
  67bool mem_type_is_vram(u32 mem_type)
  68{
  69	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
  70}
  71
  72static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
  73{
  74	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
  75}
  76
  77static bool resource_is_vram(struct ttm_resource *res)
  78{
  79	return mem_type_is_vram(res->mem_type);
  80}
  81
  82bool xe_bo_is_vram(struct xe_bo *bo)
  83{
  84	return resource_is_vram(bo->ttm.resource) ||
  85		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
  86}
  87
  88bool xe_bo_is_stolen(struct xe_bo *bo)
  89{
  90	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
  91}
  92
  93/**
  94 * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
  95 * @bo: The BO
  96 *
  97 * The stolen memory is accessed through the PCI BAR for both DGFX and some
  98 * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
  99 *
 100 * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
 101 */
 102bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
 103{
 104	return xe_bo_is_stolen(bo) &&
 105		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
 106}
 107
 108static bool xe_bo_is_user(struct xe_bo *bo)
 109{
 110	return bo->flags & XE_BO_CREATE_USER_BIT;
 111}
 112
 113static struct xe_migrate *
 114mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
 115{
 116	struct xe_tile *tile;
 117
 118	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
 119	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
 120	return tile->migrate;
 121}
 122
 123static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
 124{
 125	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
 126	struct ttm_resource_manager *mgr;
 127
 128	xe_assert(xe, resource_is_vram(res));
 129	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
 130	return to_xe_ttm_vram_mgr(mgr)->vram;
 131}
 132
 133static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
 134			   u32 bo_flags, u32 *c)
 135{
 136	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
 137		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 138
 139		bo->placements[*c] = (struct ttm_place) {
 140			.mem_type = XE_PL_TT,
 141		};
 142		*c += 1;
 143
 144		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
 145			bo->props.preferred_mem_type = XE_PL_TT;
 146	}
 147}
 148
 149static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 150		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
 151{
 152	struct ttm_place place = { .mem_type = mem_type };
 153	struct xe_mem_region *vram;
 154	u64 io_size;
 155
 156	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 157
 158	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
 159	xe_assert(xe, vram && vram->usable_size);
 160	io_size = vram->io_size;
 161
 162	/*
 163	 * For eviction / restore on suspend / resume objects
 164	 * pinned in VRAM must be contiguous
 165	 */
 166	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
 167			XE_BO_CREATE_GGTT_BIT))
 168		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
 169
 170	if (io_size < vram->usable_size) {
 171		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
 172			place.fpfn = 0;
 173			place.lpfn = io_size >> PAGE_SHIFT;
 174		} else {
 175			place.flags |= TTM_PL_FLAG_TOPDOWN;
 176		}
 177	}
 178	places[*c] = place;
 179	*c += 1;
 180
 181	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
 182		bo->props.preferred_mem_type = mem_type;
 183}
 184
 185static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
 186			 u32 bo_flags, u32 *c)
 187{
 188	if (bo->props.preferred_gt == XE_GT1) {
 189		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
 190			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
 191		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
 192			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
 193	} else {
 194		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
 195			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
 196		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
 197			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
 198	}
 199}
 200
 201static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
 202			   u32 bo_flags, u32 *c)
 203{
 204	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
 205		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 206
 207		bo->placements[*c] = (struct ttm_place) {
 208			.mem_type = XE_PL_STOLEN,
 209			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
 210					     XE_BO_CREATE_GGTT_BIT) ?
 211				TTM_PL_FLAG_CONTIGUOUS : 0,
 212		};
 213		*c += 1;
 214	}
 215}
 216
 217static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 218				       u32 bo_flags)
 219{
 220	u32 c = 0;
 221
 222	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
 223
 224	/* The order of placements should indicate preferred location */
 225
 226	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
 227		try_add_system(xe, bo, bo_flags, &c);
 228		try_add_vram(xe, bo, bo_flags, &c);
 229	} else {
 230		try_add_vram(xe, bo, bo_flags, &c);
 231		try_add_system(xe, bo, bo_flags, &c);
 232	}
 233	try_add_stolen(xe, bo, bo_flags, &c);
 234
 235	if (!c)
 236		return -EINVAL;
 237
 238	bo->placement = (struct ttm_placement) {
 239		.num_placement = c,
 240		.placement = bo->placements,
 241		.num_busy_placement = c,
 242		.busy_placement = bo->placements,
 243	};
 244
 245	return 0;
 246}
 247
 248int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 249			      u32 bo_flags)
 250{
 251	xe_bo_assert_held(bo);
 252	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
 253}
 254
 255static void xe_evict_flags(struct ttm_buffer_object *tbo,
 256			   struct ttm_placement *placement)
 257{
 258	if (!xe_bo_is_xe_bo(tbo)) {
 259		/* Don't handle scatter gather BOs */
 260		if (tbo->type == ttm_bo_type_sg) {
 261			placement->num_placement = 0;
 262			placement->num_busy_placement = 0;
 263			return;
 264		}
 265
 266		*placement = sys_placement;
 267		return;
 268	}
 269
 270	/*
 271	 * For xe, sg bos that are evicted to system just triggers a
 272	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
 273	 */
 274	switch (tbo->resource->mem_type) {
 275	case XE_PL_VRAM0:
 276	case XE_PL_VRAM1:
 277	case XE_PL_STOLEN:
 278		*placement = tt_placement;
 279		break;
 280	case XE_PL_TT:
 281	default:
 282		*placement = sys_placement;
 283		break;
 284	}
 285}
 286
 287struct xe_ttm_tt {
 288	struct ttm_tt ttm;
 289	struct device *dev;
 290	struct sg_table sgt;
 291	struct sg_table *sg;
 292};
 293
 294static int xe_tt_map_sg(struct ttm_tt *tt)
 295{
 296	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
 297	unsigned long num_pages = tt->num_pages;
 298	int ret;
 299
 300	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
 301
 302	if (xe_tt->sg)
 303		return 0;
 304
 305	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
 306						num_pages, 0,
 307						(u64)num_pages << PAGE_SHIFT,
 308						xe_sg_segment_size(xe_tt->dev),
 309						GFP_KERNEL);
 310	if (ret)
 311		return ret;
 312
 313	xe_tt->sg = &xe_tt->sgt;
 314	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
 315			      DMA_ATTR_SKIP_CPU_SYNC);
 316	if (ret) {
 317		sg_free_table(xe_tt->sg);
 318		xe_tt->sg = NULL;
 319		return ret;
 320	}
 321
 322	return 0;
 323}
 324
 325struct sg_table *xe_bo_sg(struct xe_bo *bo)
 326{
 327	struct ttm_tt *tt = bo->ttm.ttm;
 328	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
 329
 330	return xe_tt->sg;
 331}
 332
 333static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 334				       u32 page_flags)
 335{
 336	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 337	struct xe_device *xe = xe_bo_device(bo);
 338	struct xe_ttm_tt *tt;
 339	unsigned long extra_pages;
 340	enum ttm_caching caching;
 341	int err;
 342
 343	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
 344	if (!tt)
 345		return NULL;
 346
 347	tt->dev = xe->drm.dev;
 348
 349	extra_pages = 0;
 350	if (xe_bo_needs_ccs_pages(bo))
 351		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
 352					   PAGE_SIZE);
 353
 354	switch (bo->cpu_caching) {
 355	case DRM_XE_GEM_CPU_CACHING_WC:
 356		caching = ttm_write_combined;
 357		break;
 358	default:
 359		caching = ttm_cached;
 360		break;
 361	}
 362
 363	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
 364
 365	/*
 366	 * Display scanout is always non-coherent with the CPU cache.
 367	 *
 368	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
 369	 * require a CPU:WC mapping.
 370	 */
 371	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
 372	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
 373		caching = ttm_write_combined;
 374
 375	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
 376	if (err) {
 377		kfree(tt);
 378		return NULL;
 379	}
 380
 381	return &tt->ttm;
 382}
 383
 384static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
 385			      struct ttm_operation_ctx *ctx)
 386{
 387	int err;
 388
 389	/*
 390	 * dma-bufs are not populated with pages, and the dma-
 391	 * addresses are set up when moved to XE_PL_TT.
 392	 */
 393	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
 394		return 0;
 395
 396	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
 397	if (err)
 398		return err;
 399
 400	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
 401	err = xe_tt_map_sg(tt);
 402	if (err)
 403		ttm_pool_free(&ttm_dev->pool, tt);
 404
 405	return err;
 406}
 407
 408static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
 409{
 410	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
 411
 412	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
 413		return;
 414
 415	if (xe_tt->sg) {
 416		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
 417				  DMA_BIDIRECTIONAL, 0);
 418		sg_free_table(xe_tt->sg);
 419		xe_tt->sg = NULL;
 420	}
 421
 422	return ttm_pool_free(&ttm_dev->pool, tt);
 423}
 424
 425static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
 426{
 427	ttm_tt_fini(tt);
 428	kfree(tt);
 429}
 430
 431static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 432				 struct ttm_resource *mem)
 433{
 434	struct xe_device *xe = ttm_to_xe_device(bdev);
 435
 436	switch (mem->mem_type) {
 437	case XE_PL_SYSTEM:
 438	case XE_PL_TT:
 439		return 0;
 440	case XE_PL_VRAM0:
 441	case XE_PL_VRAM1: {
 442		struct xe_ttm_vram_mgr_resource *vres =
 443			to_xe_ttm_vram_mgr_resource(mem);
 444		struct xe_mem_region *vram = res_to_mem_region(mem);
 445
 446		if (vres->used_visible_size < mem->size)
 447			return -EINVAL;
 448
 449		mem->bus.offset = mem->start << PAGE_SHIFT;
 450
 451		if (vram->mapping &&
 452		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
 453			mem->bus.addr = (u8 __force *)vram->mapping +
 454				mem->bus.offset;
 455
 456		mem->bus.offset += vram->io_start;
 457		mem->bus.is_iomem = true;
 458
 459#if  !defined(CONFIG_X86)
 460		mem->bus.caching = ttm_write_combined;
 461#endif
 462		return 0;
 463	} case XE_PL_STOLEN:
 464		return xe_ttm_stolen_io_mem_reserve(xe, mem);
 465	default:
 466		return -EINVAL;
 467	}
 468}
 469
 470static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 471				const struct ttm_operation_ctx *ctx)
 472{
 473	struct dma_resv_iter cursor;
 474	struct dma_fence *fence;
 475	struct drm_gem_object *obj = &bo->ttm.base;
 476	struct drm_gpuvm_bo *vm_bo;
 477	bool idle = false;
 478	int ret = 0;
 479
 480	dma_resv_assert_held(bo->ttm.base.resv);
 481
 482	if (!list_empty(&bo->ttm.base.gpuva.list)) {
 483		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
 484				    DMA_RESV_USAGE_BOOKKEEP);
 485		dma_resv_for_each_fence_unlocked(&cursor, fence)
 486			dma_fence_enable_sw_signaling(fence);
 487		dma_resv_iter_end(&cursor);
 488	}
 489
 490	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
 491		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
 492		struct drm_gpuva *gpuva;
 493
 494		if (!xe_vm_in_fault_mode(vm)) {
 495			drm_gpuvm_bo_evict(vm_bo, true);
 496			continue;
 497		}
 498
 499		if (!idle) {
 500			long timeout;
 501
 502			if (ctx->no_wait_gpu &&
 503			    !dma_resv_test_signaled(bo->ttm.base.resv,
 504						    DMA_RESV_USAGE_BOOKKEEP))
 505				return -EBUSY;
 506
 507			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
 508							DMA_RESV_USAGE_BOOKKEEP,
 509							ctx->interruptible,
 510							MAX_SCHEDULE_TIMEOUT);
 511			if (!timeout)
 512				return -ETIME;
 513			if (timeout < 0)
 514				return timeout;
 515
 516			idle = true;
 517		}
 518
 519		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
 520			struct xe_vma *vma = gpuva_to_vma(gpuva);
 521
 522			trace_xe_vma_evict(vma);
 523			ret = xe_vm_invalidate_vma(vma);
 524			if (XE_WARN_ON(ret))
 525				return ret;
 526		}
 527	}
 528
 529	return ret;
 530}
 531
 532/*
 533 * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
 534 * Note that unmapping the attachment is deferred to the next
 535 * map_attachment time, or to bo destroy (after idling) whichever comes first.
 536 * This is to avoid syncing before unmap_attachment(), assuming that the
 537 * caller relies on idling the reservation object before moving the
 538 * backing store out. Should that assumption not hold, then we will be able
 539 * to unconditionally call unmap_attachment() when moving out to system.
 540 */
 541static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
 542			     struct ttm_resource *new_res)
 543{
 544	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
 545	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
 546					       ttm);
 547	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 548	struct sg_table *sg;
 549
 550	xe_assert(xe, attach);
 551	xe_assert(xe, ttm_bo->ttm);
 552
 553	if (new_res->mem_type == XE_PL_SYSTEM)
 554		goto out;
 555
 556	if (ttm_bo->sg) {
 557		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
 558		ttm_bo->sg = NULL;
 559	}
 560
 561	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
 562	if (IS_ERR(sg))
 563		return PTR_ERR(sg);
 564
 565	ttm_bo->sg = sg;
 566	xe_tt->sg = sg;
 567
 568out:
 569	ttm_bo_move_null(ttm_bo, new_res);
 570
 571	return 0;
 572}
 573
 574/**
 575 * xe_bo_move_notify - Notify subsystems of a pending move
 576 * @bo: The buffer object
 577 * @ctx: The struct ttm_operation_ctx controlling locking and waits.
 578 *
 579 * This function notifies subsystems of an upcoming buffer move.
 580 * Upon receiving such a notification, subsystems should schedule
 581 * halting access to the underlying pages and optionally add a fence
 582 * to the buffer object's dma_resv object, that signals when access is
 583 * stopped. The caller will wait on all dma_resv fences before
 584 * starting the move.
 585 *
 586 * A subsystem may commence access to the object after obtaining
 587 * bindings to the new backing memory under the object lock.
 588 *
 589 * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
 590 * negative error code on error.
 591 */
 592static int xe_bo_move_notify(struct xe_bo *bo,
 593			     const struct ttm_operation_ctx *ctx)
 594{
 595	struct ttm_buffer_object *ttm_bo = &bo->ttm;
 596	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 597	int ret;
 598
 599	/*
 600	 * If this starts to call into many components, consider
 601	 * using a notification chain here.
 602	 */
 603
 604	if (xe_bo_is_pinned(bo))
 605		return -EINVAL;
 606
 607	xe_bo_vunmap(bo);
 608	ret = xe_bo_trigger_rebind(xe, bo, ctx);
 609	if (ret)
 610		return ret;
 611
 612	/* Don't call move_notify() for imported dma-bufs. */
 613	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
 614		dma_buf_move_notify(ttm_bo->base.dma_buf);
 615
 616	return 0;
 617}
 618
 619static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 620		      struct ttm_operation_ctx *ctx,
 621		      struct ttm_resource *new_mem,
 622		      struct ttm_place *hop)
 623{
 624	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 625	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 626	struct ttm_resource *old_mem = ttm_bo->resource;
 627	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 628	struct ttm_tt *ttm = ttm_bo->ttm;
 629	struct xe_migrate *migrate = NULL;
 630	struct dma_fence *fence;
 631	bool move_lacks_source;
 632	bool tt_has_data;
 633	bool needs_clear;
 634	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
 635				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
 636	int ret = 0;
 637	/* Bo creation path, moving to system or TT. */
 638	if ((!old_mem && ttm) && !handle_system_ccs) {
 639		ttm_bo_move_null(ttm_bo, new_mem);
 640		return 0;
 641	}
 642
 643	if (ttm_bo->type == ttm_bo_type_sg) {
 644		ret = xe_bo_move_notify(bo, ctx);
 645		if (!ret)
 646			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
 647		goto out;
 648	}
 649
 650	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
 651			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
 652
 653	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
 654						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
 655
 656	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
 657		(!ttm && ttm_bo->type == ttm_bo_type_device);
 658
 659	if ((move_lacks_source && !needs_clear)) {
 660		ttm_bo_move_null(ttm_bo, new_mem);
 661		goto out;
 662	}
 663
 664	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
 665		ttm_bo_move_null(ttm_bo, new_mem);
 666		goto out;
 667	}
 668
 669	/*
 670	 * Failed multi-hop where the old_mem is still marked as
 671	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
 672	 */
 673	if (old_mem_type == XE_PL_TT &&
 674	    new_mem->mem_type == XE_PL_TT) {
 675		ttm_bo_move_null(ttm_bo, new_mem);
 676		goto out;
 677	}
 678
 679	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
 680		ret = xe_bo_move_notify(bo, ctx);
 681		if (ret)
 682			goto out;
 683	}
 684
 685	if (old_mem_type == XE_PL_TT &&
 686	    new_mem->mem_type == XE_PL_SYSTEM) {
 687		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
 688						     DMA_RESV_USAGE_BOOKKEEP,
 689						     true,
 690						     MAX_SCHEDULE_TIMEOUT);
 691		if (timeout < 0) {
 692			ret = timeout;
 693			goto out;
 694		}
 695
 696		if (!handle_system_ccs) {
 697			ttm_bo_move_null(ttm_bo, new_mem);
 698			goto out;
 699		}
 700	}
 701
 702	if (!move_lacks_source &&
 703	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
 704	     (mem_type_is_vram(old_mem_type) &&
 705	      new_mem->mem_type == XE_PL_SYSTEM))) {
 706		hop->fpfn = 0;
 707		hop->lpfn = 0;
 708		hop->mem_type = XE_PL_TT;
 709		hop->flags = TTM_PL_FLAG_TEMPORARY;
 710		ret = -EMULTIHOP;
 711		goto out;
 712	}
 713
 714	if (bo->tile)
 715		migrate = bo->tile->migrate;
 716	else if (resource_is_vram(new_mem))
 717		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
 718	else if (mem_type_is_vram(old_mem_type))
 719		migrate = mem_type_to_migrate(xe, old_mem_type);
 720	else
 721		migrate = xe->tiles[0].migrate;
 722
 723	xe_assert(xe, migrate);
 724	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
 725	xe_device_mem_access_get(xe);
 726
 727	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
 728		/*
 729		 * Kernel memory that is pinned should only be moved on suspend
 730		 * / resume, some of the pinned memory is required for the
 731		 * device to resume / use the GPU to move other evicted memory
 732		 * (user memory) around. This likely could be optimized a bit
 733		 * futher where we find the minimum set of pinned memory
 734		 * required for resume but for simplity doing a memcpy for all
 735		 * pinned memory.
 736		 */
 737		ret = xe_bo_vmap(bo);
 738		if (!ret) {
 739			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
 740
 741			/* Create a new VMAP once kernel BO back in VRAM */
 742			if (!ret && resource_is_vram(new_mem)) {
 743				struct xe_mem_region *vram = res_to_mem_region(new_mem);
 744				void __iomem *new_addr = vram->mapping +
 745					(new_mem->start << PAGE_SHIFT);
 746
 747				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
 748					ret = -EINVAL;
 749					xe_device_mem_access_put(xe);
 750					goto out;
 751				}
 752
 753				xe_assert(xe, new_mem->start ==
 754					  bo->placements->fpfn);
 755
 756				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
 757			}
 758		}
 759	} else {
 760		if (move_lacks_source)
 761			fence = xe_migrate_clear(migrate, bo, new_mem);
 762		else
 763			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
 764						new_mem, handle_system_ccs);
 765		if (IS_ERR(fence)) {
 766			ret = PTR_ERR(fence);
 767			xe_device_mem_access_put(xe);
 768			goto out;
 769		}
 770		if (!move_lacks_source) {
 771			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
 772							true, new_mem);
 773			if (ret) {
 774				dma_fence_wait(fence, false);
 775				ttm_bo_move_null(ttm_bo, new_mem);
 776				ret = 0;
 777			}
 778		} else {
 779			/*
 780			 * ttm_bo_move_accel_cleanup() may blow up if
 781			 * bo->resource == NULL, so just attach the
 782			 * fence and set the new resource.
 783			 */
 784			dma_resv_add_fence(ttm_bo->base.resv, fence,
 785					   DMA_RESV_USAGE_KERNEL);
 786			ttm_bo_move_null(ttm_bo, new_mem);
 787		}
 788
 789		dma_fence_put(fence);
 790	}
 791
 792	xe_device_mem_access_put(xe);
 793
 794out:
 795	return ret;
 796
 797}
 798
 799/**
 800 * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
 801 * @bo: The buffer object to move.
 802 *
 803 * On successful completion, the object memory will be moved to sytem memory.
 804 * This function blocks until the object has been fully moved.
 805 *
 806 * This is needed to for special handling of pinned VRAM object during
 807 * suspend-resume.
 808 *
 809 * Return: 0 on success. Negative error code on failure.
 810 */
 811int xe_bo_evict_pinned(struct xe_bo *bo)
 812{
 813	struct ttm_place place = {
 814		.mem_type = XE_PL_TT,
 815	};
 816	struct ttm_placement placement = {
 817		.placement = &place,
 818		.num_placement = 1,
 819	};
 820	struct ttm_operation_ctx ctx = {
 821		.interruptible = false,
 822	};
 823	struct ttm_resource *new_mem;
 824	int ret;
 825
 826	xe_bo_assert_held(bo);
 827
 828	if (WARN_ON(!bo->ttm.resource))
 829		return -EINVAL;
 830
 831	if (WARN_ON(!xe_bo_is_pinned(bo)))
 832		return -EINVAL;
 833
 834	if (WARN_ON(!xe_bo_is_vram(bo)))
 835		return -EINVAL;
 836
 837	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
 838	if (ret)
 839		return ret;
 840
 841	if (!bo->ttm.ttm) {
 842		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
 843		if (!bo->ttm.ttm) {
 844			ret = -ENOMEM;
 845			goto err_res_free;
 846		}
 847	}
 848
 849	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
 850	if (ret)
 851		goto err_res_free;
 852
 853	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
 854	if (ret)
 855		goto err_res_free;
 856
 857	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
 858	if (ret)
 859		goto err_res_free;
 860
 861	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
 862			      false, MAX_SCHEDULE_TIMEOUT);
 863
 864	return 0;
 865
 866err_res_free:
 867	ttm_resource_free(&bo->ttm, &new_mem);
 868	return ret;
 869}
 870
 871/**
 872 * xe_bo_restore_pinned() - Restore a pinned VRAM object
 873 * @bo: The buffer object to move.
 874 *
 875 * On successful completion, the object memory will be moved back to VRAM.
 876 * This function blocks until the object has been fully moved.
 877 *
 878 * This is needed to for special handling of pinned VRAM object during
 879 * suspend-resume.
 880 *
 881 * Return: 0 on success. Negative error code on failure.
 882 */
 883int xe_bo_restore_pinned(struct xe_bo *bo)
 884{
 885	struct ttm_operation_ctx ctx = {
 886		.interruptible = false,
 887	};
 888	struct ttm_resource *new_mem;
 889	int ret;
 890
 891	xe_bo_assert_held(bo);
 892
 893	if (WARN_ON(!bo->ttm.resource))
 894		return -EINVAL;
 895
 896	if (WARN_ON(!xe_bo_is_pinned(bo)))
 897		return -EINVAL;
 898
 899	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
 900		return -EINVAL;
 901
 902	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
 903	if (ret)
 904		return ret;
 905
 906	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
 907	if (ret)
 908		goto err_res_free;
 909
 910	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
 911	if (ret)
 912		goto err_res_free;
 913
 914	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
 915	if (ret)
 916		goto err_res_free;
 917
 918	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
 919			      false, MAX_SCHEDULE_TIMEOUT);
 920
 921	return 0;
 922
 923err_res_free:
 924	ttm_resource_free(&bo->ttm, &new_mem);
 925	return ret;
 926}
 927
 928static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
 929				       unsigned long page_offset)
 930{
 931	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
 932	struct xe_res_cursor cursor;
 933	struct xe_mem_region *vram;
 934
 935	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
 936		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
 937
 938	vram = res_to_mem_region(ttm_bo->resource);
 939	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
 940	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
 941}
 942
 943static void __xe_bo_vunmap(struct xe_bo *bo);
 944
 945/*
 946 * TODO: Move this function to TTM so we don't rely on how TTM does its
 947 * locking, thereby abusing TTM internals.
 948 */
 949static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
 950{
 951	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
 952	bool locked;
 953
 954	xe_assert(xe, !kref_read(&ttm_bo->kref));
 955
 956	/*
 957	 * We can typically only race with TTM trylocking under the
 958	 * lru_lock, which will immediately be unlocked again since
 959	 * the ttm_bo refcount is zero at this point. So trylocking *should*
 960	 * always succeed here, as long as we hold the lru lock.
 961	 */
 962	spin_lock(&ttm_bo->bdev->lru_lock);
 963	locked = dma_resv_trylock(ttm_bo->base.resv);
 964	spin_unlock(&ttm_bo->bdev->lru_lock);
 965	xe_assert(xe, locked);
 966
 967	return locked;
 968}
 969
 970static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
 971{
 972	struct dma_resv_iter cursor;
 973	struct dma_fence *fence;
 974	struct dma_fence *replacement = NULL;
 975	struct xe_bo *bo;
 976
 977	if (!xe_bo_is_xe_bo(ttm_bo))
 978		return;
 979
 980	bo = ttm_to_xe_bo(ttm_bo);
 981	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
 982
 983	/*
 984	 * Corner case where TTM fails to allocate memory and this BOs resv
 985	 * still points the VMs resv
 986	 */
 987	if (ttm_bo->base.resv != &ttm_bo->base._resv)
 988		return;
 989
 990	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
 991		return;
 992
 993	/*
 994	 * Scrub the preempt fences if any. The unbind fence is already
 995	 * attached to the resv.
 996	 * TODO: Don't do this for external bos once we scrub them after
 997	 * unbind.
 998	 */
 999	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1000				DMA_RESV_USAGE_BOOKKEEP, fence) {
1001		if (xe_fence_is_xe_preempt(fence) &&
1002		    !dma_fence_is_signaled(fence)) {
1003			if (!replacement)
1004				replacement = dma_fence_get_stub();
1005
1006			dma_resv_replace_fences(ttm_bo->base.resv,
1007						fence->context,
1008						replacement,
1009						DMA_RESV_USAGE_BOOKKEEP);
1010		}
1011	}
1012	dma_fence_put(replacement);
1013
1014	dma_resv_unlock(ttm_bo->base.resv);
1015}
1016
1017static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1018{
1019	if (!xe_bo_is_xe_bo(ttm_bo))
1020		return;
1021
1022	/*
1023	 * Object is idle and about to be destroyed. Release the
1024	 * dma-buf attachment.
1025	 */
1026	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1027		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1028						       struct xe_ttm_tt, ttm);
1029
1030		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1031					 DMA_BIDIRECTIONAL);
1032		ttm_bo->sg = NULL;
1033		xe_tt->sg = NULL;
1034	}
1035}
1036
1037struct ttm_device_funcs xe_ttm_funcs = {
1038	.ttm_tt_create = xe_ttm_tt_create,
1039	.ttm_tt_populate = xe_ttm_tt_populate,
1040	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1041	.ttm_tt_destroy = xe_ttm_tt_destroy,
1042	.evict_flags = xe_evict_flags,
1043	.move = xe_bo_move,
1044	.io_mem_reserve = xe_ttm_io_mem_reserve,
1045	.io_mem_pfn = xe_ttm_io_mem_pfn,
1046	.release_notify = xe_ttm_bo_release_notify,
1047	.eviction_valuable = ttm_bo_eviction_valuable,
1048	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1049};
1050
1051static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1052{
1053	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1054	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1055
1056	if (bo->ttm.base.import_attach)
1057		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1058	drm_gem_object_release(&bo->ttm.base);
1059
1060	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1061
1062	if (bo->ggtt_node.size)
1063		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1064
1065#ifdef CONFIG_PROC_FS
1066	if (bo->client)
1067		xe_drm_client_remove_bo(bo);
1068#endif
1069
1070	if (bo->vm && xe_bo_is_user(bo))
1071		xe_vm_put(bo->vm);
1072
1073	kfree(bo);
1074}
1075
1076static void xe_gem_object_free(struct drm_gem_object *obj)
1077{
1078	/* Our BO reference counting scheme works as follows:
1079	 *
1080	 * The gem object kref is typically used throughout the driver,
1081	 * and the gem object holds a ttm_buffer_object refcount, so
1082	 * that when the last gem object reference is put, which is when
1083	 * we end up in this function, we put also that ttm_buffer_object
1084	 * refcount. Anything using gem interfaces is then no longer
1085	 * allowed to access the object in a way that requires a gem
1086	 * refcount, including locking the object.
1087	 *
1088	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1089	 * refcount directly if needed.
1090	 */
1091	__xe_bo_vunmap(gem_to_xe_bo(obj));
1092	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1093}
1094
1095static void xe_gem_object_close(struct drm_gem_object *obj,
1096				struct drm_file *file_priv)
1097{
1098	struct xe_bo *bo = gem_to_xe_bo(obj);
1099
1100	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1101		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1102
1103		xe_bo_lock(bo, false);
1104		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1105		xe_bo_unlock(bo);
1106	}
1107}
1108
1109static bool should_migrate_to_system(struct xe_bo *bo)
1110{
1111	struct xe_device *xe = xe_bo_device(bo);
1112
1113	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
1114}
1115
1116static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1117{
1118	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1119	struct drm_device *ddev = tbo->base.dev;
1120	vm_fault_t ret;
1121	int idx, r = 0;
1122
1123	ret = ttm_bo_vm_reserve(tbo, vmf);
1124	if (ret)
1125		return ret;
1126
1127	if (drm_dev_enter(ddev, &idx)) {
1128		struct xe_bo *bo = ttm_to_xe_bo(tbo);
1129
1130		trace_xe_bo_cpu_fault(bo);
1131
1132		if (should_migrate_to_system(bo)) {
1133			r = xe_bo_migrate(bo, XE_PL_TT);
1134			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1135				ret = VM_FAULT_NOPAGE;
1136			else if (r)
1137				ret = VM_FAULT_SIGBUS;
1138		}
1139		if (!ret)
1140			ret = ttm_bo_vm_fault_reserved(vmf,
1141						       vmf->vma->vm_page_prot,
1142						       TTM_BO_VM_NUM_PREFAULT);
1143		drm_dev_exit(idx);
1144	} else {
1145		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1146	}
1147	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1148		return ret;
1149
1150	dma_resv_unlock(tbo->base.resv);
1151	return ret;
1152}
1153
1154static const struct vm_operations_struct xe_gem_vm_ops = {
1155	.fault = xe_gem_fault,
1156	.open = ttm_bo_vm_open,
1157	.close = ttm_bo_vm_close,
1158	.access = ttm_bo_vm_access
1159};
1160
1161static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1162	.free = xe_gem_object_free,
1163	.close = xe_gem_object_close,
1164	.mmap = drm_gem_ttm_mmap,
1165	.export = xe_gem_prime_export,
1166	.vm_ops = &xe_gem_vm_ops,
1167};
1168
1169/**
1170 * xe_bo_alloc - Allocate storage for a struct xe_bo
1171 *
1172 * This funcition is intended to allocate storage to be used for input
1173 * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1174 * created is needed before the call to __xe_bo_create_locked().
1175 * If __xe_bo_create_locked ends up never to be called, then the
1176 * storage allocated with this function needs to be freed using
1177 * xe_bo_free().
1178 *
1179 * Return: A pointer to an uninitialized struct xe_bo on success,
1180 * ERR_PTR(-ENOMEM) on error.
1181 */
1182struct xe_bo *xe_bo_alloc(void)
1183{
1184	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1185
1186	if (!bo)
1187		return ERR_PTR(-ENOMEM);
1188
1189	return bo;
1190}
1191
1192/**
1193 * xe_bo_free - Free storage allocated using xe_bo_alloc()
1194 * @bo: The buffer object storage.
1195 *
1196 * Refer to xe_bo_alloc() documentation for valid use-cases.
1197 */
1198void xe_bo_free(struct xe_bo *bo)
1199{
1200	kfree(bo);
1201}
1202
1203struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1204				     struct xe_tile *tile, struct dma_resv *resv,
1205				     struct ttm_lru_bulk_move *bulk, size_t size,
1206				     u16 cpu_caching, enum ttm_bo_type type,
1207				     u32 flags)
1208{
1209	struct ttm_operation_ctx ctx = {
1210		.interruptible = true,
1211		.no_wait_gpu = false,
1212	};
1213	struct ttm_placement *placement;
1214	uint32_t alignment;
1215	size_t aligned_size;
1216	int err;
1217
1218	/* Only kernel objects should set GT */
1219	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1220
1221	if (XE_WARN_ON(!size)) {
1222		xe_bo_free(bo);
1223		return ERR_PTR(-EINVAL);
1224	}
1225
1226	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
1227	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
1228	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
1229		aligned_size = ALIGN(size, SZ_64K);
1230		if (type != ttm_bo_type_device)
1231			size = ALIGN(size, SZ_64K);
1232		flags |= XE_BO_INTERNAL_64K;
1233		alignment = SZ_64K >> PAGE_SHIFT;
1234
1235	} else {
1236		aligned_size = ALIGN(size, SZ_4K);
1237		flags &= ~XE_BO_INTERNAL_64K;
1238		alignment = SZ_4K >> PAGE_SHIFT;
1239	}
1240
1241	if (type == ttm_bo_type_device && aligned_size != size)
1242		return ERR_PTR(-EINVAL);
1243
1244	if (!bo) {
1245		bo = xe_bo_alloc();
1246		if (IS_ERR(bo))
1247			return bo;
1248	}
1249
1250	bo->ccs_cleared = false;
1251	bo->tile = tile;
1252	bo->size = size;
1253	bo->flags = flags;
1254	bo->cpu_caching = cpu_caching;
1255	bo->ttm.base.funcs = &xe_gem_object_funcs;
1256	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
1257	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
1258	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
1259	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1260	INIT_LIST_HEAD(&bo->pinned_link);
1261#ifdef CONFIG_PROC_FS
1262	INIT_LIST_HEAD(&bo->client_link);
1263#endif
1264
1265	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1266
1267	if (resv) {
1268		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
1269		ctx.resv = resv;
1270	}
1271
1272	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
1273		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1274		if (WARN_ON(err)) {
1275			xe_ttm_bo_destroy(&bo->ttm);
1276			return ERR_PTR(err);
1277		}
1278	}
1279
1280	/* Defer populating type_sg bos */
1281	placement = (type == ttm_bo_type_sg ||
1282		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
1283		&bo->placement;
1284	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1285				   placement, alignment,
1286				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1287	if (err)
1288		return ERR_PTR(err);
1289
1290	/*
1291	 * The VRAM pages underneath are potentially still being accessed by the
1292	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1293	 * sure to add any corresponding move/clear fences into the objects
1294	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1295	 *
1296	 * For KMD internal buffers we don't care about GPU clearing, however we
1297	 * still need to handle async evictions, where the VRAM is still being
1298	 * accessed by the GPU. Most internal callers are not expecting this,
1299	 * since they are missing the required synchronisation before accessing
1300	 * the memory. To keep things simple just sync wait any kernel fences
1301	 * here, if the buffer is designated KMD internal.
1302	 *
1303	 * For normal userspace objects we should already have the required
1304	 * pipelining or sync waiting elsewhere, since we already have to deal
1305	 * with things like async GPU clearing.
1306	 */
1307	if (type == ttm_bo_type_kernel) {
1308		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1309						     DMA_RESV_USAGE_KERNEL,
1310						     ctx.interruptible,
1311						     MAX_SCHEDULE_TIMEOUT);
1312
1313		if (timeout < 0) {
1314			if (!resv)
1315				dma_resv_unlock(bo->ttm.base.resv);
1316			xe_bo_put(bo);
1317			return ERR_PTR(timeout);
1318		}
1319	}
1320
1321	bo->created = true;
1322	if (bulk)
1323		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1324	else
1325		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1326
1327	return bo;
1328}
1329
1330static int __xe_bo_fixed_placement(struct xe_device *xe,
1331				   struct xe_bo *bo,
1332				   u32 flags,
1333				   u64 start, u64 end, u64 size)
1334{
1335	struct ttm_place *place = bo->placements;
1336
1337	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
1338		return -EINVAL;
1339
1340	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1341	place->fpfn = start >> PAGE_SHIFT;
1342	place->lpfn = end >> PAGE_SHIFT;
1343
1344	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
1345	case XE_BO_CREATE_VRAM0_BIT:
1346		place->mem_type = XE_PL_VRAM0;
1347		break;
1348	case XE_BO_CREATE_VRAM1_BIT:
1349		place->mem_type = XE_PL_VRAM1;
1350		break;
1351	case XE_BO_CREATE_STOLEN_BIT:
1352		place->mem_type = XE_PL_STOLEN;
1353		break;
1354
1355	default:
1356		/* 0 or multiple of the above set */
1357		return -EINVAL;
1358	}
1359
1360	bo->placement = (struct ttm_placement) {
1361		.num_placement = 1,
1362		.placement = place,
1363		.num_busy_placement = 1,
1364		.busy_placement = place,
1365	};
1366
1367	return 0;
1368}
1369
1370static struct xe_bo *
1371__xe_bo_create_locked(struct xe_device *xe,
1372		      struct xe_tile *tile, struct xe_vm *vm,
1373		      size_t size, u64 start, u64 end,
1374		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1375{
1376	struct xe_bo *bo = NULL;
1377	int err;
1378
1379	if (vm)
1380		xe_vm_assert_held(vm);
1381
1382	if (start || end != ~0ULL) {
1383		bo = xe_bo_alloc();
1384		if (IS_ERR(bo))
1385			return bo;
1386
1387		flags |= XE_BO_FIXED_PLACEMENT_BIT;
1388		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1389		if (err) {
1390			xe_bo_free(bo);
1391			return ERR_PTR(err);
1392		}
1393	}
1394
1395	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1396				    vm && !xe_vm_in_fault_mode(vm) &&
1397				    flags & XE_BO_CREATE_USER_BIT ?
1398				    &vm->lru_bulk_move : NULL, size,
1399				    cpu_caching, type, flags);
1400	if (IS_ERR(bo))
1401		return bo;
1402
1403	/*
1404	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1405	 * to ensure the shared resv doesn't disappear under the bo, the bo
1406	 * will keep a reference to the vm, and avoid circular references
1407	 * by having all the vm's bo refereferences released at vm close
1408	 * time.
1409	 */
1410	if (vm && xe_bo_is_user(bo))
1411		xe_vm_get(vm);
1412	bo->vm = vm;
1413
1414	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
1415		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
1416			tile = xe_device_get_root_tile(xe);
1417
1418		xe_assert(xe, tile);
1419
1420		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
1421			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1422						   start + bo->size, U64_MAX);
1423		} else {
1424			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1425		}
1426		if (err)
1427			goto err_unlock_put_bo;
1428	}
1429
1430	return bo;
1431
1432err_unlock_put_bo:
1433	__xe_bo_unset_bulk_move(bo);
1434	xe_bo_unlock_vm_held(bo);
1435	xe_bo_put(bo);
1436	return ERR_PTR(err);
1437}
1438
1439struct xe_bo *
1440xe_bo_create_locked_range(struct xe_device *xe,
1441			  struct xe_tile *tile, struct xe_vm *vm,
1442			  size_t size, u64 start, u64 end,
1443			  enum ttm_bo_type type, u32 flags)
1444{
1445	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1446}
1447
1448struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1449				  struct xe_vm *vm, size_t size,
1450				  enum ttm_bo_type type, u32 flags)
1451{
1452	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1453}
1454
1455struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1456				struct xe_vm *vm, size_t size,
1457				u16 cpu_caching,
1458				enum ttm_bo_type type,
1459				u32 flags)
1460{
1461	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1462						 cpu_caching, type,
1463						 flags | XE_BO_CREATE_USER_BIT);
1464	if (!IS_ERR(bo))
1465		xe_bo_unlock_vm_held(bo);
1466
1467	return bo;
1468}
1469
1470struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1471			   struct xe_vm *vm, size_t size,
1472			   enum ttm_bo_type type, u32 flags)
1473{
1474	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1475
1476	if (!IS_ERR(bo))
1477		xe_bo_unlock_vm_held(bo);
1478
1479	return bo;
1480}
1481
1482struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1483				      struct xe_vm *vm,
1484				      size_t size, u64 offset,
1485				      enum ttm_bo_type type, u32 flags)
1486{
1487	struct xe_bo *bo;
1488	int err;
1489	u64 start = offset == ~0ull ? 0 : offset;
1490	u64 end = offset == ~0ull ? offset : start + size;
1491
1492	if (flags & XE_BO_CREATE_STOLEN_BIT &&
1493	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1494		flags |= XE_BO_CREATE_GGTT_BIT;
1495
1496	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1497				       flags | XE_BO_NEEDS_CPU_ACCESS);
1498	if (IS_ERR(bo))
1499		return bo;
1500
1501	err = xe_bo_pin(bo);
1502	if (err)
1503		goto err_put;
1504
1505	err = xe_bo_vmap(bo);
1506	if (err)
1507		goto err_unpin;
1508
1509	xe_bo_unlock_vm_held(bo);
1510
1511	return bo;
1512
1513err_unpin:
1514	xe_bo_unpin(bo);
1515err_put:
1516	xe_bo_unlock_vm_held(bo);
1517	xe_bo_put(bo);
1518	return ERR_PTR(err);
1519}
1520
1521struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1522				   struct xe_vm *vm, size_t size,
1523				   enum ttm_bo_type type, u32 flags)
1524{
1525	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1526}
1527
1528struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1529				     const void *data, size_t size,
1530				     enum ttm_bo_type type, u32 flags)
1531{
1532	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1533						ALIGN(size, PAGE_SIZE),
1534						type, flags);
1535	if (IS_ERR(bo))
1536		return bo;
1537
1538	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1539
1540	return bo;
1541}
1542
1543static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
1544{
1545	xe_bo_unpin_map_no_vm(arg);
1546}
1547
1548struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1549					   size_t size, u32 flags)
1550{
1551	struct xe_bo *bo;
1552	int ret;
1553
1554	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1555	if (IS_ERR(bo))
1556		return bo;
1557
1558	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
1559	if (ret)
1560		return ERR_PTR(ret);
1561
1562	return bo;
1563}
1564
1565struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1566					     const void *data, size_t size, u32 flags)
1567{
1568	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1569
1570	if (IS_ERR(bo))
1571		return bo;
1572
1573	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1574
1575	return bo;
1576}
1577
1578/*
1579 * XXX: This is in the VM bind data path, likely should calculate this once and
1580 * store, with a recalculation if the BO is moved.
1581 */
1582uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1583{
1584	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1585
1586	if (res->mem_type == XE_PL_STOLEN)
1587		return xe_ttm_stolen_gpu_offset(xe);
1588
1589	return res_to_mem_region(res)->dpa_base;
1590}
1591
1592/**
1593 * xe_bo_pin_external - pin an external BO
1594 * @bo: buffer object to be pinned
1595 *
1596 * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1597 * BO. Unique call compared to xe_bo_pin as this function has it own set of
1598 * asserts and code to ensure evict / restore on suspend / resume.
1599 *
1600 * Returns 0 for success, negative error code otherwise.
1601 */
1602int xe_bo_pin_external(struct xe_bo *bo)
1603{
1604	struct xe_device *xe = xe_bo_device(bo);
1605	int err;
1606
1607	xe_assert(xe, !bo->vm);
1608	xe_assert(xe, xe_bo_is_user(bo));
1609
1610	if (!xe_bo_is_pinned(bo)) {
1611		err = xe_bo_validate(bo, NULL, false);
1612		if (err)
1613			return err;
1614
1615		if (xe_bo_is_vram(bo)) {
1616			spin_lock(&xe->pinned.lock);
1617			list_add_tail(&bo->pinned_link,
1618				      &xe->pinned.external_vram);
1619			spin_unlock(&xe->pinned.lock);
1620		}
1621	}
1622
1623	ttm_bo_pin(&bo->ttm);
1624
1625	/*
1626	 * FIXME: If we always use the reserve / unreserve functions for locking
1627	 * we do not need this.
1628	 */
1629	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1630
1631	return 0;
1632}
1633
1634int xe_bo_pin(struct xe_bo *bo)
1635{
1636	struct xe_device *xe = xe_bo_device(bo);
1637	int err;
1638
1639	/* We currently don't expect user BO to be pinned */
1640	xe_assert(xe, !xe_bo_is_user(bo));
1641
1642	/* Pinned object must be in GGTT or have pinned flag */
1643	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
1644				   XE_BO_CREATE_GGTT_BIT));
1645
1646	/*
1647	 * No reason we can't support pinning imported dma-bufs we just don't
1648	 * expect to pin an imported dma-buf.
1649	 */
1650	xe_assert(xe, !bo->ttm.base.import_attach);
1651
1652	/* We only expect at most 1 pin */
1653	xe_assert(xe, !xe_bo_is_pinned(bo));
1654
1655	err = xe_bo_validate(bo, NULL, false);
1656	if (err)
1657		return err;
1658
1659	/*
1660	 * For pinned objects in on DGFX, which are also in vram, we expect
1661	 * these to be in contiguous VRAM memory. Required eviction / restore
1662	 * during suspend / resume (force restore to same physical address).
1663	 */
1664	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1665	    bo->flags & XE_BO_INTERNAL_TEST)) {
1666		struct ttm_place *place = &(bo->placements[0]);
1667
1668		if (mem_type_is_vram(place->mem_type)) {
1669			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1670
1671			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1672				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1673			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1674
1675			spin_lock(&xe->pinned.lock);
1676			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1677			spin_unlock(&xe->pinned.lock);
1678		}
1679	}
1680
1681	ttm_bo_pin(&bo->ttm);
1682
1683	/*
1684	 * FIXME: If we always use the reserve / unreserve functions for locking
1685	 * we do not need this.
1686	 */
1687	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1688
1689	return 0;
1690}
1691
1692/**
1693 * xe_bo_unpin_external - unpin an external BO
1694 * @bo: buffer object to be unpinned
1695 *
1696 * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1697 * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1698 * asserts and code to ensure evict / restore on suspend / resume.
1699 *
1700 * Returns 0 for success, negative error code otherwise.
1701 */
1702void xe_bo_unpin_external(struct xe_bo *bo)
1703{
1704	struct xe_device *xe = xe_bo_device(bo);
1705
1706	xe_assert(xe, !bo->vm);
1707	xe_assert(xe, xe_bo_is_pinned(bo));
1708	xe_assert(xe, xe_bo_is_user(bo));
1709
1710	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1711		spin_lock(&xe->pinned.lock);
1712		list_del_init(&bo->pinned_link);
1713		spin_unlock(&xe->pinned.lock);
1714	}
1715
1716	ttm_bo_unpin(&bo->ttm);
1717
1718	/*
1719	 * FIXME: If we always use the reserve / unreserve functions for locking
1720	 * we do not need this.
1721	 */
1722	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1723}
1724
1725void xe_bo_unpin(struct xe_bo *bo)
1726{
1727	struct xe_device *xe = xe_bo_device(bo);
1728
1729	xe_assert(xe, !bo->ttm.base.import_attach);
1730	xe_assert(xe, xe_bo_is_pinned(bo));
1731
1732	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1733	    bo->flags & XE_BO_INTERNAL_TEST)) {
1734		struct ttm_place *place = &(bo->placements[0]);
1735
1736		if (mem_type_is_vram(place->mem_type)) {
1737			xe_assert(xe, !list_empty(&bo->pinned_link));
1738
1739			spin_lock(&xe->pinned.lock);
1740			list_del_init(&bo->pinned_link);
1741			spin_unlock(&xe->pinned.lock);
1742		}
1743	}
1744
1745	ttm_bo_unpin(&bo->ttm);
1746}
1747
1748/**
1749 * xe_bo_validate() - Make sure the bo is in an allowed placement
1750 * @bo: The bo,
1751 * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1752 *      NULL. Used together with @allow_res_evict.
1753 * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1754 *                   reservation object.
1755 *
1756 * Make sure the bo is in allowed placement, migrating it if necessary. If
1757 * needed, other bos will be evicted. If bos selected for eviction shares
1758 * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1759 * set to true, otherwise they will be bypassed.
1760 *
1761 * Return: 0 on success, negative error code on failure. May return
1762 * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1763 */
1764int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1765{
1766	struct ttm_operation_ctx ctx = {
1767		.interruptible = true,
1768		.no_wait_gpu = false,
1769	};
1770
1771	if (vm) {
1772		lockdep_assert_held(&vm->lock);
1773		xe_vm_assert_held(vm);
1774
1775		ctx.allow_res_evict = allow_res_evict;
1776		ctx.resv = xe_vm_resv(vm);
1777	}
1778
1779	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1780}
1781
1782bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1783{
1784	if (bo->destroy == &xe_ttm_bo_destroy)
1785		return true;
1786
1787	return false;
1788}
1789
1790/*
1791 * Resolve a BO address. There is no assert to check if the proper lock is held
1792 * so it should only be used in cases where it is not fatal to get the wrong
1793 * address, such as printing debug information, but not in cases where memory is
1794 * written based on this result.
1795 */
1796dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1797{
1798	struct xe_device *xe = xe_bo_device(bo);
1799	struct xe_res_cursor cur;
1800	u64 page;
1801
1802	xe_assert(xe, page_size <= PAGE_SIZE);
1803	page = offset >> PAGE_SHIFT;
1804	offset &= (PAGE_SIZE - 1);
1805
1806	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1807		xe_assert(xe, bo->ttm.ttm);
1808
1809		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1810				page_size, &cur);
1811		return xe_res_dma(&cur) + offset;
1812	} else {
1813		struct xe_res_cursor cur;
1814
1815		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1816			     page_size, &cur);
1817		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1818	}
1819}
1820
1821dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1822{
1823	if (!READ_ONCE(bo->ttm.pin_count))
1824		xe_bo_assert_held(bo);
1825	return __xe_bo_addr(bo, offset, page_size);
1826}
1827
1828int xe_bo_vmap(struct xe_bo *bo)
1829{
1830	void *virtual;
1831	bool is_iomem;
1832	int ret;
1833
1834	xe_bo_assert_held(bo);
1835
1836	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
1837		return -EINVAL;
1838
1839	if (!iosys_map_is_null(&bo->vmap))
1840		return 0;
1841
1842	/*
1843	 * We use this more or less deprecated interface for now since
1844	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1845	 * single page bos, which is done here.
1846	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1847	 * to use struct iosys_map.
1848	 */
1849	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1850	if (ret)
1851		return ret;
1852
1853	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1854	if (is_iomem)
1855		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1856	else
1857		iosys_map_set_vaddr(&bo->vmap, virtual);
1858
1859	return 0;
1860}
1861
1862static void __xe_bo_vunmap(struct xe_bo *bo)
1863{
1864	if (!iosys_map_is_null(&bo->vmap)) {
1865		iosys_map_clear(&bo->vmap);
1866		ttm_bo_kunmap(&bo->kmap);
1867	}
1868}
1869
1870void xe_bo_vunmap(struct xe_bo *bo)
1871{
1872	xe_bo_assert_held(bo);
1873	__xe_bo_vunmap(bo);
1874}
1875
1876int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1877			struct drm_file *file)
1878{
1879	struct xe_device *xe = to_xe_device(dev);
1880	struct xe_file *xef = to_xe_file(file);
1881	struct drm_xe_gem_create *args = data;
1882	struct xe_vm *vm = NULL;
1883	struct xe_bo *bo;
1884	unsigned int bo_flags;
1885	u32 handle;
1886	int err;
1887
1888	if (XE_IOCTL_DBG(xe, args->extensions) ||
1889	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
1890	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1891		return -EINVAL;
1892
1893	/* at least one valid memory placement must be specified */
1894	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
1895			 !args->placement))
1896		return -EINVAL;
1897
1898	if (XE_IOCTL_DBG(xe, args->flags &
1899			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1900			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1901			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
1902		return -EINVAL;
1903
1904	if (XE_IOCTL_DBG(xe, args->handle))
1905		return -EINVAL;
1906
1907	if (XE_IOCTL_DBG(xe, !args->size))
1908		return -EINVAL;
1909
1910	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1911		return -EINVAL;
1912
1913	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1914		return -EINVAL;
1915
1916	bo_flags = 0;
1917	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1918		bo_flags |= XE_BO_DEFER_BACKING;
1919
1920	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1921		bo_flags |= XE_BO_SCANOUT_BIT;
1922
1923	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
1924
1925	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1926		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
1927			return -EINVAL;
1928
1929		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
1930	}
1931
1932	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
1933			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
1934		return -EINVAL;
1935
1936	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
1937			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
1938		return -EINVAL;
1939
1940	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
1941			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
1942		return -EINVAL;
1943
1944	if (args->vm_id) {
1945		vm = xe_vm_lookup(xef, args->vm_id);
1946		if (XE_IOCTL_DBG(xe, !vm))
1947			return -ENOENT;
1948		err = xe_vm_lock(vm, true);
1949		if (err)
1950			goto out_vm;
1951	}
1952
1953	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
1954			       ttm_bo_type_device, bo_flags);
1955
1956	if (vm)
1957		xe_vm_unlock(vm);
1958
1959	if (IS_ERR(bo)) {
1960		err = PTR_ERR(bo);
1961		goto out_vm;
1962	}
1963
1964	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
1965	if (err)
1966		goto out_bulk;
1967
1968	args->handle = handle;
1969	goto out_put;
1970
1971out_bulk:
1972	if (vm && !xe_vm_in_fault_mode(vm)) {
1973		xe_vm_lock(vm, false);
1974		__xe_bo_unset_bulk_move(bo);
1975		xe_vm_unlock(vm);
1976	}
1977out_put:
1978	xe_bo_put(bo);
1979out_vm:
1980	if (vm)
1981		xe_vm_put(vm);
1982
1983	return err;
1984}
1985
1986int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
1987			     struct drm_file *file)
1988{
1989	struct xe_device *xe = to_xe_device(dev);
1990	struct drm_xe_gem_mmap_offset *args = data;
1991	struct drm_gem_object *gem_obj;
1992
1993	if (XE_IOCTL_DBG(xe, args->extensions) ||
1994	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1995		return -EINVAL;
1996
1997	if (XE_IOCTL_DBG(xe, args->flags))
1998		return -EINVAL;
1999
2000	gem_obj = drm_gem_object_lookup(file, args->handle);
2001	if (XE_IOCTL_DBG(xe, !gem_obj))
2002		return -ENOENT;
2003
2004	/* The mmap offset was set up at BO allocation time. */
2005	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2006
2007	xe_bo_put(gem_to_xe_bo(gem_obj));
2008	return 0;
2009}
2010
2011/**
2012 * xe_bo_lock() - Lock the buffer object's dma_resv object
2013 * @bo: The struct xe_bo whose lock is to be taken
2014 * @intr: Whether to perform any wait interruptible
2015 *
2016 * Locks the buffer object's dma_resv object. If the buffer object is
2017 * pointing to a shared dma_resv object, that shared lock is locked.
2018 *
2019 * Return: 0 on success, -EINTR if @intr is true and the wait for a
2020 * contended lock was interrupted. If @intr is set to false, the
2021 * function always returns 0.
2022 */
2023int xe_bo_lock(struct xe_bo *bo, bool intr)
2024{
2025	if (intr)
2026		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2027
2028	dma_resv_lock(bo->ttm.base.resv, NULL);
2029
2030	return 0;
2031}
2032
2033/**
2034 * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2035 * @bo: The struct xe_bo whose lock is to be released.
2036 *
2037 * Unlock a buffer object lock that was locked by xe_bo_lock().
2038 */
2039void xe_bo_unlock(struct xe_bo *bo)
2040{
2041	dma_resv_unlock(bo->ttm.base.resv);
2042}
2043
2044/**
2045 * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2046 * @bo: The buffer object to migrate
2047 * @mem_type: The TTM memory type intended to migrate to
2048 *
2049 * Check whether the buffer object supports migration to the
2050 * given memory type. Note that pinning may affect the ability to migrate as
2051 * returned by this function.
2052 *
2053 * This function is primarily intended as a helper for checking the
2054 * possibility to migrate buffer objects and can be called without
2055 * the object lock held.
2056 *
2057 * Return: true if migration is possible, false otherwise.
2058 */
2059bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2060{
2061	unsigned int cur_place;
2062
2063	if (bo->ttm.type == ttm_bo_type_kernel)
2064		return true;
2065
2066	if (bo->ttm.type == ttm_bo_type_sg)
2067		return false;
2068
2069	for (cur_place = 0; cur_place < bo->placement.num_placement;
2070	     cur_place++) {
2071		if (bo->placements[cur_place].mem_type == mem_type)
2072			return true;
2073	}
2074
2075	return false;
2076}
2077
2078static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2079{
2080	memset(place, 0, sizeof(*place));
2081	place->mem_type = mem_type;
2082}
2083
2084/**
2085 * xe_bo_migrate - Migrate an object to the desired region id
2086 * @bo: The buffer object to migrate.
2087 * @mem_type: The TTM region type to migrate to.
2088 *
2089 * Attempt to migrate the buffer object to the desired memory region. The
2090 * buffer object may not be pinned, and must be locked.
2091 * On successful completion, the object memory type will be updated,
2092 * but an async migration task may not have completed yet, and to
2093 * accomplish that, the object's kernel fences must be signaled with
2094 * the object lock held.
2095 *
2096 * Return: 0 on success. Negative error code on failure. In particular may
2097 * return -EINTR or -ERESTARTSYS if signal pending.
2098 */
2099int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2100{
2101	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2102	struct ttm_operation_ctx ctx = {
2103		.interruptible = true,
2104		.no_wait_gpu = false,
2105	};
2106	struct ttm_placement placement;
2107	struct ttm_place requested;
2108
2109	xe_bo_assert_held(bo);
2110
2111	if (bo->ttm.resource->mem_type == mem_type)
2112		return 0;
2113
2114	if (xe_bo_is_pinned(bo))
2115		return -EBUSY;
2116
2117	if (!xe_bo_can_migrate(bo, mem_type))
2118		return -EINVAL;
2119
2120	xe_place_from_ttm_type(mem_type, &requested);
2121	placement.num_placement = 1;
2122	placement.num_busy_placement = 1;
2123	placement.placement = &requested;
2124	placement.busy_placement = &requested;
2125
2126	/*
2127	 * Stolen needs to be handled like below VRAM handling if we ever need
2128	 * to support it.
2129	 */
2130	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2131
2132	if (mem_type_is_vram(mem_type)) {
2133		u32 c = 0;
2134
2135		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2136	}
2137
2138	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2139}
2140
2141/**
2142 * xe_bo_evict - Evict an object to evict placement
2143 * @bo: The buffer object to migrate.
2144 * @force_alloc: Set force_alloc in ttm_operation_ctx
2145 *
2146 * On successful completion, the object memory will be moved to evict
2147 * placement. Ths function blocks until the object has been fully moved.
2148 *
2149 * Return: 0 on success. Negative error code on failure.
2150 */
2151int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2152{
2153	struct ttm_operation_ctx ctx = {
2154		.interruptible = false,
2155		.no_wait_gpu = false,
2156		.force_alloc = force_alloc,
2157	};
2158	struct ttm_placement placement;
2159	int ret;
2160
2161	xe_evict_flags(&bo->ttm, &placement);
2162	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2163	if (ret)
2164		return ret;
2165
2166	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2167			      false, MAX_SCHEDULE_TIMEOUT);
2168
2169	return 0;
2170}
2171
2172/**
2173 * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2174 * placed in system memory.
2175 * @bo: The xe_bo
2176 *
2177 * Return: true if extra pages need to be allocated, false otherwise.
2178 */
2179bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2180{
2181	struct xe_device *xe = xe_bo_device(bo);
2182
2183	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2184		return false;
2185
2186	/* On discrete GPUs, if the GPU can access this buffer from
2187	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2188	 * can't be used since there's no CCS storage associated with
2189	 * non-VRAM addresses.
2190	 */
2191	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
2192		return false;
2193
2194	return true;
2195}
2196
2197/**
2198 * __xe_bo_release_dummy() - Dummy kref release function
2199 * @kref: The embedded struct kref.
2200 *
2201 * Dummy release function for xe_bo_put_deferred(). Keep off.
2202 */
2203void __xe_bo_release_dummy(struct kref *kref)
2204{
2205}
2206
2207/**
2208 * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2209 * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2210 *
2211 * Puts all bos whose put was deferred by xe_bo_put_deferred().
2212 * The @deferred list can be either an onstack local list or a global
2213 * shared list used by a workqueue.
2214 */
2215void xe_bo_put_commit(struct llist_head *deferred)
2216{
2217	struct llist_node *freed;
2218	struct xe_bo *bo, *next;
2219
2220	if (!deferred)
2221		return;
2222
2223	freed = llist_del_all(deferred);
2224	if (!freed)
2225		return;
2226
2227	llist_for_each_entry_safe(bo, next, freed, freed)
2228		drm_gem_object_free(&bo->ttm.base.refcount);
2229}
2230
2231/**
2232 * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2233 * @file_priv: ...
2234 * @dev: ...
2235 * @args: ...
2236 *
2237 * See dumb_create() hook in include/drm/drm_drv.h
2238 *
2239 * Return: ...
2240 */
2241int xe_bo_dumb_create(struct drm_file *file_priv,
2242		      struct drm_device *dev,
2243		      struct drm_mode_create_dumb *args)
2244{
2245	struct xe_device *xe = to_xe_device(dev);
2246	struct xe_bo *bo;
2247	uint32_t handle;
2248	int cpp = DIV_ROUND_UP(args->bpp, 8);
2249	int err;
2250	u32 page_size = max_t(u32, PAGE_SIZE,
2251		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2252
2253	args->pitch = ALIGN(args->width * cpp, 64);
2254	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2255			   page_size);
2256
2257	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2258			       DRM_XE_GEM_CPU_CACHING_WC,
2259			       ttm_bo_type_device,
2260			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2261			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
2262			       XE_BO_NEEDS_CPU_ACCESS);
2263	if (IS_ERR(bo))
2264		return PTR_ERR(bo);
2265
2266	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2267	/* drop reference from allocate - handle holds it now */
2268	drm_gem_object_put(&bo->ttm.base);
2269	if (!err)
2270		args->handle = handle;
2271	return err;
2272}
2273
2274#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2275#include "tests/xe_bo.c"
2276#endif