intel_ggtt.c - drivers/gpu/drm/i915/gt/intel_ggtt.c - Linux source code v4.6

Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <asm/set_memory.h>
   7#include <asm/smp.h>
   8#include <linux/types.h>
   9#include <linux/stop_machine.h>
  10
  11#include <drm/drm_managed.h>
  12#include <drm/i915_drm.h>
  13#include <drm/intel-gtt.h>
  14
  15#include "display/intel_display.h"
  16#include "gem/i915_gem_lmem.h"
  17
  18#include "intel_context.h"
  19#include "intel_ggtt_gmch.h"
  20#include "intel_gpu_commands.h"
  21#include "intel_gt.h"
  22#include "intel_gt_regs.h"
  23#include "intel_pci_config.h"
  24#include "intel_ring.h"
  25#include "i915_drv.h"
  26#include "i915_pci.h"
  27#include "i915_request.h"
  28#include "i915_scatterlist.h"
  29#include "i915_utils.h"
  30#include "i915_vgpu.h"
  31
  32#include "intel_gtt.h"
  33#include "gen8_ppgtt.h"
  34#include "intel_engine_pm.h"
  35
  36static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
  37				   unsigned long color,
  38				   u64 *start,
  39				   u64 *end)
  40{
  41	if (i915_node_color_differs(node, color))
  42		*start += I915_GTT_PAGE_SIZE;
  43
  44	/*
  45	 * Also leave a space between the unallocated reserved node after the
  46	 * GTT and any objects within the GTT, i.e. we use the color adjustment
  47	 * to insert a guard page to prevent prefetches crossing over the
  48	 * GTT boundary.
  49	 */
  50	node = list_next_entry(node, node_list);
  51	if (node->color != color)
  52		*end -= I915_GTT_PAGE_SIZE;
  53}
  54
  55static int ggtt_init_hw(struct i915_ggtt *ggtt)
  56{
  57	struct drm_i915_private *i915 = ggtt->vm.i915;
  58
  59	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
  60
  61	ggtt->vm.is_ggtt = true;
  62
  63	/* Only VLV supports read-only GGTT mappings */
  64	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
  65
  66	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
  67		ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
  68
  69	if (ggtt->mappable_end) {
  70		if (!io_mapping_init_wc(&ggtt->iomap,
  71					ggtt->gmadr.start,
  72					ggtt->mappable_end)) {
  73			ggtt->vm.cleanup(&ggtt->vm);
  74			return -EIO;
  75		}
  76
  77		ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
  78					      ggtt->mappable_end);
  79	}
  80
  81	intel_ggtt_init_fences(ggtt);
  82
  83	return 0;
  84}
  85
  86/**
  87 * i915_ggtt_init_hw - Initialize GGTT hardware
  88 * @i915: i915 device
  89 */
  90int i915_ggtt_init_hw(struct drm_i915_private *i915)
  91{
  92	int ret;
  93
  94	/*
  95	 * Note that we use page colouring to enforce a guard page at the
  96	 * end of the address space. This is required as the CS may prefetch
  97	 * beyond the end of the batch buffer, across the page boundary,
  98	 * and beyond the end of the GTT if we do not provide a guard.
  99	 */
 100	ret = ggtt_init_hw(to_gt(i915)->ggtt);
 101	if (ret)
 102		return ret;
 103
 104	return 0;
 105}
 106
 107/**
 108 * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
 109 * @vm: The VM to suspend the mappings for
 110 *
 111 * Suspend the memory mappings for all objects mapped to HW via the GGTT or a
 112 * DPT page table.
 113 */
 114void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 115{
 116	struct i915_vma *vma, *vn;
 117	int save_skip_rewrite;
 118
 119	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
 120
 121retry:
 122	i915_gem_drain_freed_objects(vm->i915);
 123
 124	mutex_lock(&vm->mutex);
 125
 126	/*
 127	 * Skip rewriting PTE on VMA unbind.
 128	 * FIXME: Use an argument to i915_vma_unbind() instead?
 129	 */
 130	save_skip_rewrite = vm->skip_pte_rewrite;
 131	vm->skip_pte_rewrite = true;
 132
 133	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
 134		struct drm_i915_gem_object *obj = vma->obj;
 135
 136		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 137
 138		if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 139			continue;
 140
 141		/* unlikely to race when GPU is idle, so no worry about slowpath.. */
 142		if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) {
 143			/*
 144			 * No dead objects should appear here, GPU should be
 145			 * completely idle, and userspace suspended
 146			 */
 147			i915_gem_object_get(obj);
 148
 149			mutex_unlock(&vm->mutex);
 150
 151			i915_gem_object_lock(obj, NULL);
 152			GEM_WARN_ON(i915_vma_unbind(vma));
 153			i915_gem_object_unlock(obj);
 154			i915_gem_object_put(obj);
 155
 156			vm->skip_pte_rewrite = save_skip_rewrite;
 157			goto retry;
 158		}
 159
 160		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
 161			i915_vma_wait_for_bind(vma);
 162
 163			__i915_vma_evict(vma, false);
 164			drm_mm_remove_node(&vma->node);
 165		}
 166
 167		i915_gem_object_unlock(obj);
 168	}
 169
 170	vm->clear_range(vm, 0, vm->total);
 171
 172	vm->skip_pte_rewrite = save_skip_rewrite;
 173
 174	mutex_unlock(&vm->mutex);
 175}
 176
 177void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 178{
 179	struct intel_gt *gt;
 180
 181	i915_ggtt_suspend_vm(&ggtt->vm);
 182	ggtt->invalidate(ggtt);
 183
 184	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
 185		intel_gt_check_and_clear_faults(gt);
 186}
 187
 188void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
 189{
 190	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 191
 192	spin_lock_irq(&uncore->lock);
 193	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 194	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
 195	spin_unlock_irq(&uncore->lock);
 196}
 197
 198static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915)
 199{
 200	/*
 201	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
 202	 * will be dropped. For WC mappings in general we have 64 byte burst
 203	 * writes when the WC buffer is flushed, so we can't use it, but have to
 204	 * resort to an uncached mapping. The WC issue is easily caught by the
 205	 * readback check when writing GTT PTE entries.
 206	 */
 207	if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11)
 208		return true;
 209
 210	return false;
 211}
 212
 213static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 214{
 215	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 216
 217	/*
 218	 * Note that as an uncached mmio write, this will flush the
 219	 * WCB of the writes into the GGTT before it triggers the invalidate.
 220	 *
 221	 * Only perform this when GGTT is mapped as WC, see ggtt_probe_common().
 222	 */
 223	if (needs_wc_ggtt_mapping(ggtt->vm.i915))
 224		intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6,
 225				      GFX_FLSH_CNTL_EN);
 226}
 227
 228static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
 229{
 230	struct intel_uncore *uncore = gt->uncore;
 231	intel_wakeref_t wakeref;
 232
 233	with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
 234		struct intel_guc *guc = &gt->uc.guc;
 235
 236		intel_guc_invalidate_tlb_guc(guc);
 237	}
 238}
 239
 240static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 241{
 242	struct drm_i915_private *i915 = ggtt->vm.i915;
 243	struct intel_gt *gt;
 244
 245	gen8_ggtt_invalidate(ggtt);
 246
 247	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
 248		if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc))
 249			guc_ggtt_ct_invalidate(gt);
 250		else if (GRAPHICS_VER(i915) >= 12)
 251			intel_uncore_write_fw(gt->uncore,
 252					      GEN12_GUC_TLB_INV_CR,
 253					      GEN12_GUC_TLB_INV_CR_INVALIDATE);
 254		else
 255			intel_uncore_write_fw(gt->uncore,
 256					      GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 257	}
 258}
 259
 260static u64 mtl_ggtt_pte_encode(dma_addr_t addr,
 261			       unsigned int pat_index,
 262			       u32 flags)
 263{
 264	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
 265
 266	WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
 267
 268	if (flags & PTE_LM)
 269		pte |= GEN12_GGTT_PTE_LM;
 270
 271	if (pat_index & BIT(0))
 272		pte |= MTL_GGTT_PTE_PAT0;
 273
 274	if (pat_index & BIT(1))
 275		pte |= MTL_GGTT_PTE_PAT1;
 276
 277	return pte;
 278}
 279
 280u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 281			 unsigned int pat_index,
 282			 u32 flags)
 283{
 284	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
 285
 286	if (flags & PTE_LM)
 287		pte |= GEN12_GGTT_PTE_LM;
 288
 289	return pte;
 290}
 291
 292static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
 293{
 294	struct intel_gt *gt = ggtt->vm.gt;
 295
 296	return intel_gt_is_bind_context_ready(gt);
 297}
 298
 299static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref)
 300{
 301	struct intel_context *ce;
 302	struct intel_gt *gt = ggtt->vm.gt;
 303
 304	if (intel_gt_is_wedged(gt))
 305		return NULL;
 306
 307	ce = gt->engine[BCS0]->bind_context;
 308	GEM_BUG_ON(!ce);
 309
 310	/*
 311	 * If the GT is not awake already at this stage then fallback
 312	 * to pci based GGTT update otherwise __intel_wakeref_get_first()
 313	 * would conflict with fs_reclaim trying to allocate memory while
 314	 * doing rpm_resume().
 315	 */
 316	*wakeref = intel_gt_pm_get_if_awake(gt);
 317	if (!*wakeref)
 318		return NULL;
 319
 320	intel_engine_pm_get(ce->engine);
 321
 322	return ce;
 323}
 324
 325static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
 326{
 327	intel_engine_pm_put(ce->engine);
 328	intel_gt_pm_put(ce->engine->gt, wakeref);
 329}
 330
 331static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
 332				struct sg_table *pages, u32 num_entries,
 333				const gen8_pte_t pte)
 334{
 335	struct i915_sched_attr attr = {};
 336	struct intel_gt *gt = ggtt->vm.gt;
 337	const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
 338	struct sgt_iter iter;
 339	struct i915_request *rq;
 340	struct intel_context *ce;
 341	intel_wakeref_t wakeref;
 342	u32 *cs;
 343
 344	if (!num_entries)
 345		return true;
 346
 347	ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref);
 348	if (!ce)
 349		return false;
 350
 351	if (pages)
 352		iter = __sgt_iter(pages->sgl, true);
 353
 354	while (num_entries) {
 355		int count = 0;
 356		dma_addr_t addr;
 357		/*
 358		 * MI_UPDATE_GTT can update 512 entries in a single command but
 359		 * that end up with engine reset, 511 works.
 360		 */
 361		u32 n_ptes = min_t(u32, 511, num_entries);
 362
 363		if (mutex_lock_interruptible(&ce->timeline->mutex))
 364			goto put_ce;
 365
 366		intel_context_enter(ce);
 367		rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
 368		intel_context_exit(ce);
 369		if (IS_ERR(rq)) {
 370			GT_TRACE(gt, "Failed to get bind request\n");
 371			mutex_unlock(&ce->timeline->mutex);
 372			goto put_ce;
 373		}
 374
 375		cs = intel_ring_begin(rq, 2 * n_ptes + 2);
 376		if (IS_ERR(cs)) {
 377			GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
 378			i915_request_set_error_once(rq, PTR_ERR(cs));
 379			/* once a request is created, it must be queued */
 380			goto queue_err_rq;
 381		}
 382
 383		*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
 384		*cs++ = offset << 12;
 385
 386		if (pages) {
 387			for_each_sgt_daddr_next(addr, iter) {
 388				if (count == n_ptes)
 389					break;
 390				*cs++ = lower_32_bits(pte | addr);
 391				*cs++ = upper_32_bits(pte | addr);
 392				count++;
 393			}
 394			/* fill remaining with scratch pte, if any */
 395			if (count < n_ptes) {
 396				memset64((u64 *)cs, scratch_pte,
 397					 n_ptes - count);
 398				cs += (n_ptes - count) * 2;
 399			}
 400		} else {
 401			memset64((u64 *)cs, pte, n_ptes);
 402			cs += n_ptes * 2;
 403		}
 404
 405		intel_ring_advance(rq, cs);
 406queue_err_rq:
 407		i915_request_get(rq);
 408		__i915_request_commit(rq);
 409		__i915_request_queue(rq, &attr);
 410
 411		mutex_unlock(&ce->timeline->mutex);
 412		/* This will break if the request is complete or after engine reset */
 413		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
 414		if (rq->fence.error)
 415			goto err_rq;
 416
 417		i915_request_put(rq);
 418
 419		num_entries -= n_ptes;
 420		offset += n_ptes;
 421	}
 422
 423	gen8_ggtt_bind_put_ce(ce, wakeref);
 424	return true;
 425
 426err_rq:
 427	i915_request_put(rq);
 428put_ce:
 429	gen8_ggtt_bind_put_ce(ce, wakeref);
 430	return false;
 431}
 432
 433static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 434{
 435	writeq(pte, addr);
 436}
 437
 438static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 439				  dma_addr_t addr,
 440				  u64 offset,
 441				  unsigned int pat_index,
 442				  u32 flags)
 443{
 444	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 445	gen8_pte_t __iomem *pte =
 446		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 447
 448	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags));
 449
 450	ggtt->invalidate(ggtt);
 451}
 452
 453static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
 454				       dma_addr_t addr, u64 offset,
 455				       unsigned int pat_index, u32 flags)
 456{
 457	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 458	gen8_pte_t pte;
 459
 460	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
 461	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
 462	    gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
 463		return ggtt->invalidate(ggtt);
 464
 465	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
 466}
 467
 468static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 469				     struct i915_vma_resource *vma_res,
 470				     unsigned int pat_index,
 471				     u32 flags)
 472{
 473	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 474	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
 475	gen8_pte_t __iomem *gte;
 476	gen8_pte_t __iomem *end;
 477	struct sgt_iter iter;
 478	dma_addr_t addr;
 479
 480	/*
 481	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
 482	 * not to allow the user to override access to a read only page.
 483	 */
 484
 485	gte = (gen8_pte_t __iomem *)ggtt->gsm;
 486	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
 487	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
 488	while (gte < end)
 489		gen8_set_pte(gte++, vm->scratch[0]->encode);
 490	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
 491
 492	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 493		gen8_set_pte(gte++, pte_encode | addr);
 494	GEM_BUG_ON(gte > end);
 495
 496	/* Fill the allocated but "unused" space beyond the end of the buffer */
 497	while (gte < end)
 498		gen8_set_pte(gte++, vm->scratch[0]->encode);
 499
 500	/*
 501	 * We want to flush the TLBs only after we're certain all the PTE
 502	 * updates have finished.
 503	 */
 504	ggtt->invalidate(ggtt);
 505}
 506
 507static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
 508					    struct i915_vma_resource *vma_res,
 509					    unsigned int pat_index, u32 flags)
 510{
 511	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 512	gen8_pte_t scratch_pte = vm->scratch[0]->encode;
 513	gen8_pte_t pte_encode;
 514	u64 start, end;
 515
 516	pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
 517	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
 518	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
 519	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
 520		goto err;
 521
 522	start = end;
 523	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
 524	if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
 525	      vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
 526		goto err;
 527
 528	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
 529	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
 530		goto err;
 531
 532	return true;
 533
 534err:
 535	return false;
 536}
 537
 538static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
 539					  struct i915_vma_resource *vma_res,
 540					  unsigned int pat_index, u32 flags)
 541{
 542	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 543
 544	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
 545	    __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
 546		return ggtt->invalidate(ggtt);
 547
 548	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
 549}
 550
 551static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 552				  u64 start, u64 length)
 553{
 554	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 555	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
 556	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
 557	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
 558	gen8_pte_t __iomem *gtt_base =
 559		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
 560	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 561	int i;
 562
 563	if (WARN(num_entries > max_entries,
 564		 "First entry = %d; Num entries = %d (max=%d)\n",
 565		 first_entry, num_entries, max_entries))
 566		num_entries = max_entries;
 567
 568	for (i = 0; i < num_entries; i++)
 569		gen8_set_pte(&gtt_base[i], scratch_pte);
 570}
 571
 572static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
 573					 u64 start, u64 length)
 574{
 575	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 576	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
 577	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
 578	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
 579	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 580
 581	if (WARN(num_entries > max_entries,
 582		 "First entry = %d; Num entries = %d (max=%d)\n",
 583		 first_entry, num_entries, max_entries))
 584		num_entries = max_entries;
 585
 586	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry,
 587	     NULL, num_entries, scratch_pte))
 588		return ggtt->invalidate(ggtt);
 589
 590	gen8_ggtt_clear_range(vm, start, length);
 591}
 592
 593static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 594				  dma_addr_t addr,
 595				  u64 offset,
 596				  unsigned int pat_index,
 597				  u32 flags)
 598{
 599	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 600	gen6_pte_t __iomem *pte =
 601		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 602
 603	iowrite32(vm->pte_encode(addr, pat_index, flags), pte);
 604
 605	ggtt->invalidate(ggtt);
 606}
 607
 608/*
 609 * Binds an object into the global gtt with the specified cache level.
 610 * The object will be accessible to the GPU via commands whose operands
 611 * reference offsets within the global GTT as well as accessible by the GPU
 612 * through the GMADR mapped BAR (i915->mm.gtt->gtt).
 613 */
 614static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 615				     struct i915_vma_resource *vma_res,
 616				     unsigned int pat_index,
 617				     u32 flags)
 618{
 619	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 620	gen6_pte_t __iomem *gte;
 621	gen6_pte_t __iomem *end;
 622	struct sgt_iter iter;
 623	dma_addr_t addr;
 624
 625	gte = (gen6_pte_t __iomem *)ggtt->gsm;
 626	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
 627
 628	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
 629	while (gte < end)
 630		iowrite32(vm->scratch[0]->encode, gte++);
 631	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
 632	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 633		iowrite32(vm->pte_encode(addr, pat_index, flags), gte++);
 634	GEM_BUG_ON(gte > end);
 635
 636	/* Fill the allocated but "unused" space beyond the end of the buffer */
 637	while (gte < end)
 638		iowrite32(vm->scratch[0]->encode, gte++);
 639
 640	/*
 641	 * We want to flush the TLBs only after we're certain all the PTE
 642	 * updates have finished.
 643	 */
 644	ggtt->invalidate(ggtt);
 645}
 646
 647static void nop_clear_range(struct i915_address_space *vm,
 648			    u64 start, u64 length)
 649{
 650}
 651
 652static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
 653{
 654	/*
 655	 * Make sure the internal GAM fifo has been cleared of all GTT
 656	 * writes before exiting stop_machine(). This guarantees that
 657	 * any aperture accesses waiting to start in another process
 658	 * cannot back up behind the GTT writes causing a hang.
 659	 * The register can be any arbitrary GAM register.
 660	 */
 661	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
 662}
 663
 664struct insert_page {
 665	struct i915_address_space *vm;
 666	dma_addr_t addr;
 667	u64 offset;
 668	unsigned int pat_index;
 669};
 670
 671static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
 672{
 673	struct insert_page *arg = _arg;
 674
 675	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset,
 676			      arg->pat_index, 0);
 677	bxt_vtd_ggtt_wa(arg->vm);
 678
 679	return 0;
 680}
 681
 682static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
 683					  dma_addr_t addr,
 684					  u64 offset,
 685					  unsigned int pat_index,
 686					  u32 unused)
 687{
 688	struct insert_page arg = { vm, addr, offset, pat_index };
 689
 690	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
 691}
 692
 693struct insert_entries {
 694	struct i915_address_space *vm;
 695	struct i915_vma_resource *vma_res;
 696	unsigned int pat_index;
 697	u32 flags;
 698};
 699
 700static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
 701{
 702	struct insert_entries *arg = _arg;
 703
 704	gen8_ggtt_insert_entries(arg->vm, arg->vma_res,
 705				 arg->pat_index, arg->flags);
 706	bxt_vtd_ggtt_wa(arg->vm);
 707
 708	return 0;
 709}
 710
 711static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 712					     struct i915_vma_resource *vma_res,
 713					     unsigned int pat_index,
 714					     u32 flags)
 715{
 716	struct insert_entries arg = { vm, vma_res, pat_index, flags };
 717
 718	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
 719}
 720
 721static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 722				  u64 start, u64 length)
 723{
 724	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 725	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
 726	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
 727	gen6_pte_t scratch_pte, __iomem *gtt_base =
 728		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
 729	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 730	int i;
 731
 732	if (WARN(num_entries > max_entries,
 733		 "First entry = %d; Num entries = %d (max=%d)\n",
 734		 first_entry, num_entries, max_entries))
 735		num_entries = max_entries;
 736
 737	scratch_pte = vm->scratch[0]->encode;
 738	for (i = 0; i < num_entries; i++)
 739		iowrite32(scratch_pte, &gtt_base[i]);
 740}
 741
 742void intel_ggtt_bind_vma(struct i915_address_space *vm,
 743			 struct i915_vm_pt_stash *stash,
 744			 struct i915_vma_resource *vma_res,
 745			 unsigned int pat_index,
 746			 u32 flags)
 747{
 748	u32 pte_flags;
 749
 750	if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
 751		return;
 752
 753	vma_res->bound_flags |= flags;
 754
 755	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 756	pte_flags = 0;
 757	if (vma_res->bi.readonly)
 758		pte_flags |= PTE_READ_ONLY;
 759	if (vma_res->bi.lmem)
 760		pte_flags |= PTE_LM;
 761
 762	vm->insert_entries(vm, vma_res, pat_index, pte_flags);
 763	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 764}
 765
 766void intel_ggtt_unbind_vma(struct i915_address_space *vm,
 767			   struct i915_vma_resource *vma_res)
 768{
 769	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 770}
 771
 772/*
 773 * Reserve the top of the GuC address space for firmware images. Addresses
 774 * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
 775 * which makes for a suitable range to hold GuC/HuC firmware images if the
 776 * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
 777 * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
 778 * of the same size anyway, which is far more than needed, to keep the logic
 779 * in uc_fw_ggtt_offset() simple.
 780 */
 781#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
 782
 783static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
 784{
 785	u64 offset;
 786	int ret;
 787
 788	if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
 789		return 0;
 790
 791	GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
 792	offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
 793
 794	ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw,
 795				   GUC_TOP_RESERVE_SIZE, offset,
 796				   I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
 797	if (ret)
 798		drm_dbg(&ggtt->vm.i915->drm,
 799			"Failed to reserve top of GGTT for GuC\n");
 800
 801	return ret;
 802}
 803
 804static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
 805{
 806	if (drm_mm_node_allocated(&ggtt->uc_fw))
 807		drm_mm_remove_node(&ggtt->uc_fw);
 808}
 809
 810static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
 811{
 812	ggtt_release_guc_top(ggtt);
 813	if (drm_mm_node_allocated(&ggtt->error_capture))
 814		drm_mm_remove_node(&ggtt->error_capture);
 815	mutex_destroy(&ggtt->error_mutex);
 816}
 817
 818static int init_ggtt(struct i915_ggtt *ggtt)
 819{
 820	/*
 821	 * Let GEM Manage all of the aperture.
 822	 *
 823	 * However, leave one page at the end still bound to the scratch page.
 824	 * There are a number of places where the hardware apparently prefetches
 825	 * past the end of the object, and we've seen multiple hangs with the
 826	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
 827	 * aperture.  One page should be enough to keep any prefetching inside
 828	 * of the aperture.
 829	 */
 830	unsigned long hole_start, hole_end;
 831	struct drm_mm_node *entry;
 832	int ret;
 833
 834	/*
 835	 * GuC requires all resources that we're sharing with it to be placed in
 836	 * non-WOPCM memory. If GuC is not present or not in use we still need a
 837	 * small bias as ring wraparound at offset 0 sometimes hangs. No idea
 838	 * why.
 839	 */
 840	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
 841			       intel_wopcm_guc_size(&ggtt->vm.gt->wopcm));
 842
 843	ret = intel_vgt_balloon(ggtt);
 844	if (ret)
 845		return ret;
 846
 847	mutex_init(&ggtt->error_mutex);
 848	if (ggtt->mappable_end) {
 849		/*
 850		 * Reserve a mappable slot for our lockless error capture.
 851		 *
 852		 * We strongly prefer taking address 0x0 in order to protect
 853		 * other critical buffers against accidental overwrites,
 854		 * as writing to address 0 is a very common mistake.
 855		 *
 856		 * Since 0 may already be in use by the system (e.g. the BIOS
 857		 * framebuffer), we let the reservation fail quietly and hope
 858		 * 0 remains reserved always.
 859		 *
 860		 * If we fail to reserve 0, and then fail to find any space
 861		 * for an error-capture, remain silent. We can afford not
 862		 * to reserve an error_capture node as we have fallback
 863		 * paths, and we trust that 0 will remain reserved. However,
 864		 * the only likely reason for failure to insert is a driver
 865		 * bug, which we expect to cause other failures...
 866		 *
 867		 * Since CPU can perform speculative reads on error capture
 868		 * (write-combining allows it) add scratch page after error
 869		 * capture to avoid DMAR errors.
 870		 */
 871		ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE;
 872		ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
 873		if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture))
 874			drm_mm_insert_node_in_range(&ggtt->vm.mm,
 875						    &ggtt->error_capture,
 876						    ggtt->error_capture.size, 0,
 877						    ggtt->error_capture.color,
 878						    0, ggtt->mappable_end,
 879						    DRM_MM_INSERT_LOW);
 880	}
 881	if (drm_mm_node_allocated(&ggtt->error_capture)) {
 882		u64 start = ggtt->error_capture.start;
 883		u64 size = ggtt->error_capture.size;
 884
 885		ggtt->vm.scratch_range(&ggtt->vm, start, size);
 886		drm_dbg(&ggtt->vm.i915->drm,
 887			"Reserved GGTT:[%llx, %llx] for use by error capture\n",
 888			start, start + size);
 889	}
 890
 891	/*
 892	 * The upper portion of the GuC address space has a sizeable hole
 893	 * (several MB) that is inaccessible by GuC. Reserve this range within
 894	 * GGTT as it can comfortably hold GuC/HuC firmware images.
 895	 */
 896	ret = ggtt_reserve_guc_top(ggtt);
 897	if (ret)
 898		goto err;
 899
 900	/* Clear any non-preallocated blocks */
 901	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
 902		drm_dbg(&ggtt->vm.i915->drm,
 903			"clearing unused GTT space: [%lx, %lx]\n",
 904			hole_start, hole_end);
 905		ggtt->vm.clear_range(&ggtt->vm, hole_start,
 906				     hole_end - hole_start);
 907	}
 908
 909	/* And finally clear the reserved guard page */
 910	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
 911
 912	return 0;
 913
 914err:
 915	cleanup_init_ggtt(ggtt);
 916	return ret;
 917}
 918
 919static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
 920				  struct i915_vm_pt_stash *stash,
 921				  struct i915_vma_resource *vma_res,
 922				  unsigned int pat_index,
 923				  u32 flags)
 924{
 925	u32 pte_flags;
 926
 927	/* Currently applicable only to VLV */
 928	pte_flags = 0;
 929	if (vma_res->bi.readonly)
 930		pte_flags |= PTE_READ_ONLY;
 931
 932	if (flags & I915_VMA_LOCAL_BIND)
 933		ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
 934			       stash, vma_res, pat_index, flags);
 935
 936	if (flags & I915_VMA_GLOBAL_BIND)
 937		vm->insert_entries(vm, vma_res, pat_index, pte_flags);
 938
 939	vma_res->bound_flags |= flags;
 940}
 941
 942static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
 943				    struct i915_vma_resource *vma_res)
 944{
 945	if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
 946		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 947
 948	if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
 949		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res);
 950}
 951
 952static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 953{
 954	struct i915_vm_pt_stash stash = {};
 955	struct i915_ppgtt *ppgtt;
 956	int err;
 957
 958	ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
 959	if (IS_ERR(ppgtt))
 960		return PTR_ERR(ppgtt);
 961
 962	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
 963		err = -ENODEV;
 964		goto err_ppgtt;
 965	}
 966
 967	err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
 968	if (err)
 969		goto err_ppgtt;
 970
 971	i915_gem_object_lock(ppgtt->vm.scratch[0], NULL);
 972	err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
 973	i915_gem_object_unlock(ppgtt->vm.scratch[0]);
 974	if (err)
 975		goto err_stash;
 976
 977	/*
 978	 * Note we only pre-allocate as far as the end of the global
 979	 * GTT. On 48b / 4-level page-tables, the difference is very,
 980	 * very significant! We have to preallocate as GVT/vgpu does
 981	 * not like the page directory disappearing.
 982	 */
 983	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
 984
 985	ggtt->alias = ppgtt;
 986	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
 987
 988	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma);
 989	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
 990
 991	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma);
 992	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
 993
 994	i915_vm_free_pt_stash(&ppgtt->vm, &stash);
 995	return 0;
 996
 997err_stash:
 998	i915_vm_free_pt_stash(&ppgtt->vm, &stash);
 999err_ppgtt:
1000	i915_vm_put(&ppgtt->vm);
1001	return err;
1002}
1003
1004static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
1005{
1006	struct i915_ppgtt *ppgtt;
1007
1008	ppgtt = fetch_and_zero(&ggtt->alias);
1009	if (!ppgtt)
1010		return;
1011
1012	i915_vm_put(&ppgtt->vm);
1013
1014	ggtt->vm.vma_ops.bind_vma   = intel_ggtt_bind_vma;
1015	ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
1016}
1017
1018int i915_init_ggtt(struct drm_i915_private *i915)
1019{
1020	int ret;
1021
1022	ret = init_ggtt(to_gt(i915)->ggtt);
1023	if (ret)
1024		return ret;
1025
1026	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
1027		ret = init_aliasing_ppgtt(to_gt(i915)->ggtt);
1028		if (ret)
1029			cleanup_init_ggtt(to_gt(i915)->ggtt);
1030	}
1031
1032	return 0;
1033}
1034
1035static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
1036{
1037	struct i915_vma *vma, *vn;
1038
1039	flush_workqueue(ggtt->vm.i915->wq);
1040	i915_gem_drain_freed_objects(ggtt->vm.i915);
1041
1042	mutex_lock(&ggtt->vm.mutex);
1043
1044	ggtt->vm.skip_pte_rewrite = true;
1045
1046	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
1047		struct drm_i915_gem_object *obj = vma->obj;
1048		bool trylock;
1049
1050		trylock = i915_gem_object_trylock(obj, NULL);
1051		WARN_ON(!trylock);
1052
1053		WARN_ON(__i915_vma_unbind(vma));
1054		if (trylock)
1055			i915_gem_object_unlock(obj);
1056	}
1057
1058	if (drm_mm_node_allocated(&ggtt->error_capture))
1059		drm_mm_remove_node(&ggtt->error_capture);
1060	mutex_destroy(&ggtt->error_mutex);
1061
1062	ggtt_release_guc_top(ggtt);
1063	intel_vgt_deballoon(ggtt);
1064
1065	ggtt->vm.cleanup(&ggtt->vm);
1066
1067	mutex_unlock(&ggtt->vm.mutex);
1068	i915_address_space_fini(&ggtt->vm);
1069
1070	arch_phys_wc_del(ggtt->mtrr);
1071
1072	if (ggtt->iomap.size)
1073		io_mapping_fini(&ggtt->iomap);
1074}
1075
1076/**
1077 * i915_ggtt_driver_release - Clean up GGTT hardware initialization
1078 * @i915: i915 device
1079 */
1080void i915_ggtt_driver_release(struct drm_i915_private *i915)
1081{
1082	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
1083
1084	fini_aliasing_ppgtt(ggtt);
1085
1086	intel_ggtt_fini_fences(ggtt);
1087	ggtt_cleanup_hw(ggtt);
1088}
1089
1090/**
1091 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after
1092 * all free objects have been drained.
1093 * @i915: i915 device
1094 */
1095void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
1096{
1097	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
1098
1099	GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
1100	dma_resv_fini(&ggtt->vm._resv);
1101}
1102
1103static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
1104{
1105	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1106	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1107	return snb_gmch_ctl << 20;
1108}
1109
1110static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
1111{
1112	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
1113	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
1114	if (bdw_gmch_ctl)
1115		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
1116
1117#ifdef CONFIG_X86_32
1118	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
1119	if (bdw_gmch_ctl > 4)
1120		bdw_gmch_ctl = 4;
1121#endif
1122
1123	return bdw_gmch_ctl << 20;
1124}
1125
1126static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
1127{
1128	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
1129	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
1130
1131	if (gmch_ctrl)
1132		return 1 << (20 + gmch_ctrl);
1133
1134	return 0;
1135}
1136
1137static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
1138{
1139	/*
1140	 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
1141	 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
1142	 */
1143	GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
1144	return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
1145}
1146
1147static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
1148{
1149	return gen6_gttmmadr_size(i915) / 2;
1150}
1151
1152static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
1153{
1154	struct drm_i915_private *i915 = ggtt->vm.i915;
1155	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1156	phys_addr_t phys_addr;
1157	u32 pte_flags;
1158	int ret;
1159
1160	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
1161	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
1162
1163	if (needs_wc_ggtt_mapping(i915))
1164		ggtt->gsm = ioremap_wc(phys_addr, size);
1165	else
1166		ggtt->gsm = ioremap(phys_addr, size);
1167
1168	if (!ggtt->gsm) {
1169		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
1170		return -ENOMEM;
1171	}
1172
1173	kref_init(&ggtt->vm.resv_ref);
1174	ret = setup_scratch_page(&ggtt->vm);
1175	if (ret) {
1176		drm_err(&i915->drm, "Scratch setup failed\n");
1177		/* iounmap will also get called at remove, but meh */
1178		iounmap(ggtt->gsm);
1179		return ret;
1180	}
1181
1182	pte_flags = 0;
1183	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
1184		pte_flags |= PTE_LM;
1185
1186	ggtt->vm.scratch[0]->encode =
1187		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
1188				    i915_gem_get_pat_index(i915,
1189							   I915_CACHE_NONE),
1190				    pte_flags);
1191
1192	return 0;
1193}
1194
1195static void gen6_gmch_remove(struct i915_address_space *vm)
1196{
1197	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
1198
1199	iounmap(ggtt->gsm);
1200	free_scratch(vm);
1201}
1202
1203static struct resource pci_resource(struct pci_dev *pdev, int bar)
1204{
1205	return DEFINE_RES_MEM(pci_resource_start(pdev, bar),
1206			      pci_resource_len(pdev, bar));
1207}
1208
1209static int gen8_gmch_probe(struct i915_ggtt *ggtt)
1210{
1211	struct drm_i915_private *i915 = ggtt->vm.i915;
1212	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1213	unsigned int size;
1214	u16 snb_gmch_ctl;
1215
1216	if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) {
1217		if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
1218			return -ENXIO;
1219
1220		ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
1221		ggtt->mappable_end = resource_size(&ggtt->gmadr);
1222	}
1223
1224	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1225	if (IS_CHERRYVIEW(i915))
1226		size = chv_get_total_gtt_size(snb_gmch_ctl);
1227	else
1228		size = gen8_get_total_gtt_size(snb_gmch_ctl);
1229
1230	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1231	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
1232	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
1233
1234	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
1235	ggtt->vm.cleanup = gen6_gmch_remove;
1236	ggtt->vm.insert_page = gen8_ggtt_insert_page;
1237	ggtt->vm.clear_range = nop_clear_range;
1238	ggtt->vm.scratch_range = gen8_ggtt_clear_range;
1239
1240	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
1241
1242	/*
1243	 * Serialize GTT updates with aperture access on BXT if VT-d is on,
1244	 * and always on CHV.
1245	 */
1246	if (intel_vm_no_concurrent_access_wa(i915)) {
1247		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
1248		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
1249
1250		/*
1251		 * Calling stop_machine() version of GGTT update function
1252		 * at error capture/reset path will raise lockdep warning.
1253		 * Allow calling gen8_ggtt_insert_* directly at reset path
1254		 * which is safe from parallel GGTT updates.
1255		 */
1256		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
1257		ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries;
1258
1259		ggtt->vm.bind_async_flags =
1260			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
1261	}
1262
1263	if (i915_ggtt_require_binder(i915)) {
1264		ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
1265		ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
1266		ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
1267		/*
1268		 * On GPU is hung, we might bind VMAs for error capture.
1269		 * Fallback to CPU GGTT updates in that case.
1270		 */
1271		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
1272	}
1273
1274	if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc))
1275		ggtt->invalidate = guc_ggtt_invalidate;
1276	else
1277		ggtt->invalidate = gen8_ggtt_invalidate;
1278
1279	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
1280	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
1281
1282	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
1283		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
1284	else
1285		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
1286
1287	return ggtt_probe_common(ggtt, size);
1288}
1289
1290/*
1291 * For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
1292 * so the switch-case statements in these PTE encode functions are still valid.
1293 * See translation table LEGACY_CACHELEVEL.
1294 */
1295static u64 snb_pte_encode(dma_addr_t addr,
1296			  unsigned int pat_index,
1297			  u32 flags)
1298{
1299	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1300
1301	switch (pat_index) {
1302	case I915_CACHE_L3_LLC:
1303	case I915_CACHE_LLC:
1304		pte |= GEN6_PTE_CACHE_LLC;
1305		break;
1306	case I915_CACHE_NONE:
1307		pte |= GEN6_PTE_UNCACHED;
1308		break;
1309	default:
1310		MISSING_CASE(pat_index);
1311	}
1312
1313	return pte;
1314}
1315
1316static u64 ivb_pte_encode(dma_addr_t addr,
1317			  unsigned int pat_index,
1318			  u32 flags)
1319{
1320	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1321
1322	switch (pat_index) {
1323	case I915_CACHE_L3_LLC:
1324		pte |= GEN7_PTE_CACHE_L3_LLC;
1325		break;
1326	case I915_CACHE_LLC:
1327		pte |= GEN6_PTE_CACHE_LLC;
1328		break;
1329	case I915_CACHE_NONE:
1330		pte |= GEN6_PTE_UNCACHED;
1331		break;
1332	default:
1333		MISSING_CASE(pat_index);
1334	}
1335
1336	return pte;
1337}
1338
1339static u64 byt_pte_encode(dma_addr_t addr,
1340			  unsigned int pat_index,
1341			  u32 flags)
1342{
1343	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1344
1345	if (!(flags & PTE_READ_ONLY))
1346		pte |= BYT_PTE_WRITEABLE;
1347
1348	if (pat_index != I915_CACHE_NONE)
1349		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
1350
1351	return pte;
1352}
1353
1354static u64 hsw_pte_encode(dma_addr_t addr,
1355			  unsigned int pat_index,
1356			  u32 flags)
1357{
1358	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1359
1360	if (pat_index != I915_CACHE_NONE)
1361		pte |= HSW_WB_LLC_AGE3;
1362
1363	return pte;
1364}
1365
1366static u64 iris_pte_encode(dma_addr_t addr,
1367			   unsigned int pat_index,
1368			   u32 flags)
1369{
1370	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
1371
1372	switch (pat_index) {
1373	case I915_CACHE_NONE:
1374		break;
1375	case I915_CACHE_WT:
1376		pte |= HSW_WT_ELLC_LLC_AGE3;
1377		break;
1378	default:
1379		pte |= HSW_WB_ELLC_LLC_AGE3;
1380		break;
1381	}
1382
1383	return pte;
1384}
1385
1386static int gen6_gmch_probe(struct i915_ggtt *ggtt)
1387{
1388	struct drm_i915_private *i915 = ggtt->vm.i915;
1389	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1390	unsigned int size;
1391	u16 snb_gmch_ctl;
1392
1393	if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
1394		return -ENXIO;
1395
1396	ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
1397	ggtt->mappable_end = resource_size(&ggtt->gmadr);
1398
1399	/*
1400	 * 64/512MB is the current min/max we actually know of, but this is
1401	 * just a coarse sanity check.
1402	 */
1403	if (ggtt->mappable_end < (64 << 20) ||
1404	    ggtt->mappable_end > (512 << 20)) {
1405		drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
1406			&ggtt->mappable_end);
1407		return -ENXIO;
1408	}
1409
1410	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1411
1412	size = gen6_get_total_gtt_size(snb_gmch_ctl);
1413	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
1414
1415	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1416	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
1417
1418	ggtt->vm.clear_range = nop_clear_range;
1419	if (!HAS_FULL_PPGTT(i915))
1420		ggtt->vm.clear_range = gen6_ggtt_clear_range;
1421	ggtt->vm.scratch_range = gen6_ggtt_clear_range;
1422	ggtt->vm.insert_page = gen6_ggtt_insert_page;
1423	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
1424	ggtt->vm.cleanup = gen6_gmch_remove;
1425
1426	ggtt->invalidate = gen6_ggtt_invalidate;
1427
1428	if (HAS_EDRAM(i915))
1429		ggtt->vm.pte_encode = iris_pte_encode;
1430	else if (IS_HASWELL(i915))
1431		ggtt->vm.pte_encode = hsw_pte_encode;
1432	else if (IS_VALLEYVIEW(i915))
1433		ggtt->vm.pte_encode = byt_pte_encode;
1434	else if (GRAPHICS_VER(i915) >= 7)
1435		ggtt->vm.pte_encode = ivb_pte_encode;
1436	else
1437		ggtt->vm.pte_encode = snb_pte_encode;
1438
1439	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
1440	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
1441
1442	return ggtt_probe_common(ggtt, size);
1443}
1444
1445static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
1446{
1447	struct drm_i915_private *i915 = gt->i915;
1448	int ret;
1449
1450	ggtt->vm.gt = gt;
1451	ggtt->vm.i915 = i915;
1452	ggtt->vm.dma = i915->drm.dev;
1453	dma_resv_init(&ggtt->vm._resv);
1454
1455	if (GRAPHICS_VER(i915) >= 8)
1456		ret = gen8_gmch_probe(ggtt);
1457	else if (GRAPHICS_VER(i915) >= 6)
1458		ret = gen6_gmch_probe(ggtt);
1459	else
1460		ret = intel_ggtt_gmch_probe(ggtt);
1461
1462	if (ret) {
1463		dma_resv_fini(&ggtt->vm._resv);
1464		return ret;
1465	}
1466
1467	if ((ggtt->vm.total - 1) >> 32) {
1468		drm_err(&i915->drm,
1469			"We never expected a Global GTT with more than 32bits"
1470			" of address space! Found %lldM!\n",
1471			ggtt->vm.total >> 20);
1472		ggtt->vm.total = 1ULL << 32;
1473		ggtt->mappable_end =
1474			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
1475	}
1476
1477	if (ggtt->mappable_end > ggtt->vm.total) {
1478		drm_err(&i915->drm,
1479			"mappable aperture extends past end of GGTT,"
1480			" aperture=%pa, total=%llx\n",
1481			&ggtt->mappable_end, ggtt->vm.total);
1482		ggtt->mappable_end = ggtt->vm.total;
1483	}
1484
1485	/* GMADR is the PCI mmio aperture into the global GTT. */
1486	drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20);
1487	drm_dbg(&i915->drm, "GMADR size = %lluM\n",
1488		(u64)ggtt->mappable_end >> 20);
1489	drm_dbg(&i915->drm, "DSM size = %lluM\n",
1490		(u64)resource_size(&intel_graphics_stolen_res) >> 20);
1491
1492	return 0;
1493}
1494
1495/**
1496 * i915_ggtt_probe_hw - Probe GGTT hardware location
1497 * @i915: i915 device
1498 */
1499int i915_ggtt_probe_hw(struct drm_i915_private *i915)
1500{
1501	struct intel_gt *gt;
1502	int ret, i;
1503
1504	for_each_gt(gt, i915, i) {
1505		ret = intel_gt_assign_ggtt(gt);
1506		if (ret)
1507			return ret;
1508	}
1509
1510	ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
1511	if (ret)
1512		return ret;
1513
1514	if (i915_vtd_active(i915))
1515		drm_info(&i915->drm, "VT-d active for gfx access\n");
1516
1517	return 0;
1518}
1519
1520struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
1521{
1522	struct i915_ggtt *ggtt;
1523
1524	ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL);
1525	if (!ggtt)
1526		return ERR_PTR(-ENOMEM);
1527
1528	INIT_LIST_HEAD(&ggtt->gt_list);
1529
1530	return ggtt;
1531}
1532
1533int i915_ggtt_enable_hw(struct drm_i915_private *i915)
1534{
1535	if (GRAPHICS_VER(i915) < 6)
1536		return intel_ggtt_gmch_enable_hw(i915);
1537
1538	return 0;
1539}
1540
1541/**
1542 * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
1543 * @vm: The VM to restore the mappings for
1544 *
1545 * Restore the memory mappings for all objects mapped to HW via the GGTT or a
1546 * DPT page table.
1547 *
1548 * Returns %true if restoring the mapping for any object that was in a write
1549 * domain before suspend.
1550 */
1551bool i915_ggtt_resume_vm(struct i915_address_space *vm)
1552{
1553	struct i915_vma *vma;
1554	bool write_domain_objs = false;
1555
1556	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
1557
1558	/* First fill our portion of the GTT with scratch pages */
1559	vm->clear_range(vm, 0, vm->total);
1560
1561	/* clflush objects bound into the GGTT and rebind them. */
1562	list_for_each_entry(vma, &vm->bound_list, vm_link) {
1563		struct drm_i915_gem_object *obj = vma->obj;
1564		unsigned int was_bound =
1565			atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
1566
1567		GEM_BUG_ON(!was_bound);
1568
1569		/*
1570		 * Clear the bound flags of the vma resource to allow
1571		 * ptes to be repopulated.
1572		 */
1573		vma->resource->bound_flags = 0;
1574		vma->ops->bind_vma(vm, NULL, vma->resource,
1575				   obj ? obj->pat_index :
1576					 i915_gem_get_pat_index(vm->i915,
1577								I915_CACHE_NONE),
1578				   was_bound);
1579
1580		if (obj) { /* only used during resume => exclusive access */
1581			write_domain_objs |= fetch_and_zero(&obj->write_domain);
1582			obj->read_domains |= I915_GEM_DOMAIN_GTT;
1583		}
1584	}
1585
1586	return write_domain_objs;
1587}
1588
1589void i915_ggtt_resume(struct i915_ggtt *ggtt)
1590{
1591	struct intel_gt *gt;
1592	bool flush;
1593
1594	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1595		intel_gt_check_and_clear_faults(gt);
1596
1597	flush = i915_ggtt_resume_vm(&ggtt->vm);
1598
1599	if (drm_mm_node_allocated(&ggtt->error_capture))
1600		ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
1601				       ggtt->error_capture.size);
1602
1603	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1604		intel_uc_resume_mappings(&gt->uc);
1605
1606	ggtt->invalidate(ggtt);
1607
1608	if (flush)
1609		wbinvd_on_all_cpus();
1610
1611	intel_ggtt_restore_fences(ggtt);
1612}