Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * GTT virtualization
   3 *
   4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice (including the next
  14 * paragraph) shall be included in all copies or substantial portions of the
  15 * Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 * SOFTWARE.
  24 *
  25 * Authors:
  26 *    Zhi Wang <zhi.a.wang@intel.com>
  27 *    Zhenyu Wang <zhenyuw@linux.intel.com>
  28 *    Xiao Zheng <xiao.zheng@intel.com>
  29 *
  30 * Contributors:
  31 *    Min He <min.he@intel.com>
  32 *    Bing Niu <bing.niu@intel.com>
  33 *
  34 */
  35
  36#include "i915_drv.h"
  37#include "gvt.h"
  38#include "i915_pvinfo.h"
  39#include "trace.h"
  40
  41static bool enable_out_of_sync = false;
  42static int preallocated_oos_pages = 8192;
  43
  44/*
  45 * validate a gm address and related range size,
  46 * translate it to host gm address
  47 */
  48bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
  49{
  50	if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
  51			&& !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
  52		gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n",
  53				vgpu->id, addr, size);
  54		return false;
  55	}
  56	return true;
  57}
  58
  59/* translate a guest gmadr to host gmadr */
  60int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
  61{
  62	if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
  63		 "invalid guest gmadr %llx\n", g_addr))
  64		return -EACCES;
  65
  66	if (vgpu_gmadr_is_aperture(vgpu, g_addr))
  67		*h_addr = vgpu_aperture_gmadr_base(vgpu)
  68			  + (g_addr - vgpu_aperture_offset(vgpu));
  69	else
  70		*h_addr = vgpu_hidden_gmadr_base(vgpu)
  71			  + (g_addr - vgpu_hidden_offset(vgpu));
  72	return 0;
  73}
  74
  75/* translate a host gmadr to guest gmadr */
  76int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
  77{
  78	if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
  79		 "invalid host gmadr %llx\n", h_addr))
  80		return -EACCES;
  81
  82	if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
  83		*g_addr = vgpu_aperture_gmadr_base(vgpu)
  84			+ (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
  85	else
  86		*g_addr = vgpu_hidden_gmadr_base(vgpu)
  87			+ (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
  88	return 0;
  89}
  90
  91int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
  92			     unsigned long *h_index)
  93{
  94	u64 h_addr;
  95	int ret;
  96
  97	ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << GTT_PAGE_SHIFT,
  98				       &h_addr);
  99	if (ret)
 100		return ret;
 101
 102	*h_index = h_addr >> GTT_PAGE_SHIFT;
 103	return 0;
 104}
 105
 106int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
 107			     unsigned long *g_index)
 108{
 109	u64 g_addr;
 110	int ret;
 111
 112	ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << GTT_PAGE_SHIFT,
 113				       &g_addr);
 114	if (ret)
 115		return ret;
 116
 117	*g_index = g_addr >> GTT_PAGE_SHIFT;
 118	return 0;
 119}
 120
 121#define gtt_type_is_entry(type) \
 122	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
 123	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
 124	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
 125
 126#define gtt_type_is_pt(type) \
 127	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
 128
 129#define gtt_type_is_pte_pt(type) \
 130	(type == GTT_TYPE_PPGTT_PTE_PT)
 131
 132#define gtt_type_is_root_pointer(type) \
 133	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
 134
 135#define gtt_init_entry(e, t, p, v) do { \
 136	(e)->type = t; \
 137	(e)->pdev = p; \
 138	memcpy(&(e)->val64, &v, sizeof(v)); \
 139} while (0)
 140
 141/*
 142 * Mappings between GTT_TYPE* enumerations.
 143 * Following information can be found according to the given type:
 144 * - type of next level page table
 145 * - type of entry inside this level page table
 146 * - type of entry with PSE set
 147 *
 148 * If the given type doesn't have such a kind of information,
 149 * e.g. give a l4 root entry type, then request to get its PSE type,
 150 * give a PTE page table type, then request to get its next level page
 151 * table type, as we know l4 root entry doesn't have a PSE bit,
 152 * and a PTE page table doesn't have a next level page table type,
 153 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
 154 * page table.
 155 */
 156
 157struct gtt_type_table_entry {
 158	int entry_type;
 159	int next_pt_type;
 160	int pse_entry_type;
 161};
 162
 163#define GTT_TYPE_TABLE_ENTRY(type, e_type, npt_type, pse_type) \
 164	[type] = { \
 165		.entry_type = e_type, \
 166		.next_pt_type = npt_type, \
 167		.pse_entry_type = pse_type, \
 168	}
 169
 170static struct gtt_type_table_entry gtt_type_table[] = {
 171	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
 172			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
 173			GTT_TYPE_PPGTT_PML4_PT,
 174			GTT_TYPE_INVALID),
 175	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
 176			GTT_TYPE_PPGTT_PML4_ENTRY,
 177			GTT_TYPE_PPGTT_PDP_PT,
 178			GTT_TYPE_INVALID),
 179	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
 180			GTT_TYPE_PPGTT_PML4_ENTRY,
 181			GTT_TYPE_PPGTT_PDP_PT,
 182			GTT_TYPE_INVALID),
 183	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
 184			GTT_TYPE_PPGTT_PDP_ENTRY,
 185			GTT_TYPE_PPGTT_PDE_PT,
 186			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 187	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
 188			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
 189			GTT_TYPE_PPGTT_PDE_PT,
 190			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 191	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
 192			GTT_TYPE_PPGTT_PDP_ENTRY,
 193			GTT_TYPE_PPGTT_PDE_PT,
 194			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 195	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
 196			GTT_TYPE_PPGTT_PDE_ENTRY,
 197			GTT_TYPE_PPGTT_PTE_PT,
 198			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 199	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
 200			GTT_TYPE_PPGTT_PDE_ENTRY,
 201			GTT_TYPE_PPGTT_PTE_PT,
 202			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 203	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
 204			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 205			GTT_TYPE_INVALID,
 206			GTT_TYPE_INVALID),
 207	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 208			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
 209			GTT_TYPE_INVALID,
 210			GTT_TYPE_INVALID),
 211	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
 212			GTT_TYPE_PPGTT_PDE_ENTRY,
 213			GTT_TYPE_INVALID,
 214			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
 215	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
 216			GTT_TYPE_PPGTT_PDP_ENTRY,
 217			GTT_TYPE_INVALID,
 218			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
 219	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
 220			GTT_TYPE_GGTT_PTE,
 221			GTT_TYPE_INVALID,
 222			GTT_TYPE_INVALID),
 223};
 224
 225static inline int get_next_pt_type(int type)
 226{
 227	return gtt_type_table[type].next_pt_type;
 228}
 229
 230static inline int get_entry_type(int type)
 231{
 232	return gtt_type_table[type].entry_type;
 233}
 234
 235static inline int get_pse_type(int type)
 236{
 237	return gtt_type_table[type].pse_entry_type;
 238}
 239
 240static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
 241{
 242	void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
 243
 244	return readq(addr);
 245}
 246
 247static void write_pte64(struct drm_i915_private *dev_priv,
 248		unsigned long index, u64 pte)
 249{
 250	void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
 251
 252	writeq(pte, addr);
 253
 254	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 255	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 256}
 257
 258static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt,
 259		struct intel_gvt_gtt_entry *e,
 260		unsigned long index, bool hypervisor_access, unsigned long gpa,
 261		struct intel_vgpu *vgpu)
 262{
 263	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
 264	int ret;
 265
 266	if (WARN_ON(info->gtt_entry_size != 8))
 267		return e;
 268
 269	if (hypervisor_access) {
 270		ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
 271				(index << info->gtt_entry_size_shift),
 272				&e->val64, 8);
 273		WARN_ON(ret);
 274	} else if (!pt) {
 275		e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
 276	} else {
 277		e->val64 = *((u64 *)pt + index);
 278	}
 279	return e;
 280}
 281
 282static inline struct intel_gvt_gtt_entry *gtt_set_entry64(void *pt,
 283		struct intel_gvt_gtt_entry *e,
 284		unsigned long index, bool hypervisor_access, unsigned long gpa,
 285		struct intel_vgpu *vgpu)
 286{
 287	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
 288	int ret;
 289
 290	if (WARN_ON(info->gtt_entry_size != 8))
 291		return e;
 292
 293	if (hypervisor_access) {
 294		ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
 295				(index << info->gtt_entry_size_shift),
 296				&e->val64, 8);
 297		WARN_ON(ret);
 298	} else if (!pt) {
 299		write_pte64(vgpu->gvt->dev_priv, index, e->val64);
 300	} else {
 301		*((u64 *)pt + index) = e->val64;
 302	}
 303	return e;
 304}
 305
 306#define GTT_HAW 46
 307
 308#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30)
 309#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21)
 310#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12)
 311
 312static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
 313{
 314	unsigned long pfn;
 315
 316	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
 317		pfn = (e->val64 & ADDR_1G_MASK) >> 12;
 318	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
 319		pfn = (e->val64 & ADDR_2M_MASK) >> 12;
 320	else
 321		pfn = (e->val64 & ADDR_4K_MASK) >> 12;
 322	return pfn;
 323}
 324
 325static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
 326{
 327	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
 328		e->val64 &= ~ADDR_1G_MASK;
 329		pfn &= (ADDR_1G_MASK >> 12);
 330	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
 331		e->val64 &= ~ADDR_2M_MASK;
 332		pfn &= (ADDR_2M_MASK >> 12);
 333	} else {
 334		e->val64 &= ~ADDR_4K_MASK;
 335		pfn &= (ADDR_4K_MASK >> 12);
 336	}
 337
 338	e->val64 |= (pfn << 12);
 339}
 340
 341static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
 342{
 343	/* Entry doesn't have PSE bit. */
 344	if (get_pse_type(e->type) == GTT_TYPE_INVALID)
 345		return false;
 346
 347	e->type = get_entry_type(e->type);
 348	if (!(e->val64 & (1 << 7)))
 349		return false;
 350
 351	e->type = get_pse_type(e->type);
 352	return true;
 353}
 354
 355static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
 356{
 357	/*
 358	 * i915 writes PDP root pointer registers without present bit,
 359	 * it also works, so we need to treat root pointer entry
 360	 * specifically.
 361	 */
 362	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
 363			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
 364		return (e->val64 != 0);
 365	else
 366		return (e->val64 & (1 << 0));
 367}
 368
 369static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
 370{
 371	e->val64 &= ~(1 << 0);
 372}
 373
 374/*
 375 * Per-platform GMA routines.
 376 */
 377static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
 378{
 379	unsigned long x = (gma >> GTT_PAGE_SHIFT);
 380
 381	trace_gma_index(__func__, gma, x);
 382	return x;
 383}
 384
 385#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
 386static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
 387{ \
 388	unsigned long x = (exp); \
 389	trace_gma_index(__func__, gma, x); \
 390	return x; \
 391}
 392
 393DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
 394DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
 395DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
 396DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
 397DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
 398
 399static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
 400	.get_entry = gtt_get_entry64,
 401	.set_entry = gtt_set_entry64,
 402	.clear_present = gtt_entry_clear_present,
 403	.test_present = gen8_gtt_test_present,
 404	.test_pse = gen8_gtt_test_pse,
 405	.get_pfn = gen8_gtt_get_pfn,
 406	.set_pfn = gen8_gtt_set_pfn,
 407};
 408
 409static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
 410	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
 411	.gma_to_pte_index = gen8_gma_to_pte_index,
 412	.gma_to_pde_index = gen8_gma_to_pde_index,
 413	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
 414	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
 415	.gma_to_pml4_index = gen8_gma_to_pml4_index,
 416};
 417
 418static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p,
 419		struct intel_gvt_gtt_entry *m)
 420{
 421	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 422	unsigned long gfn, mfn;
 423
 424	*m = *p;
 425
 426	if (!ops->test_present(p))
 427		return 0;
 428
 429	gfn = ops->get_pfn(p);
 430
 431	mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
 432	if (mfn == INTEL_GVT_INVALID_ADDR) {
 433		gvt_err("fail to translate gfn: 0x%lx\n", gfn);
 434		return -ENXIO;
 435	}
 436
 437	ops->set_pfn(m, mfn);
 438	return 0;
 439}
 440
 441/*
 442 * MM helpers.
 443 */
 444struct intel_gvt_gtt_entry *intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm,
 445		void *page_table, struct intel_gvt_gtt_entry *e,
 446		unsigned long index)
 447{
 448	struct intel_gvt *gvt = mm->vgpu->gvt;
 449	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 450
 451	e->type = mm->page_table_entry_type;
 452
 453	ops->get_entry(page_table, e, index, false, 0, mm->vgpu);
 454	ops->test_pse(e);
 455	return e;
 456}
 457
 458struct intel_gvt_gtt_entry *intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm,
 459		void *page_table, struct intel_gvt_gtt_entry *e,
 460		unsigned long index)
 461{
 462	struct intel_gvt *gvt = mm->vgpu->gvt;
 463	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 464
 465	return ops->set_entry(page_table, e, index, false, 0, mm->vgpu);
 466}
 467
 468/*
 469 * PPGTT shadow page table helpers.
 470 */
 471static inline struct intel_gvt_gtt_entry *ppgtt_spt_get_entry(
 472		struct intel_vgpu_ppgtt_spt *spt,
 473		void *page_table, int type,
 474		struct intel_gvt_gtt_entry *e, unsigned long index,
 475		bool guest)
 476{
 477	struct intel_gvt *gvt = spt->vgpu->gvt;
 478	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 479
 480	e->type = get_entry_type(type);
 481
 482	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
 483		return e;
 484
 485	ops->get_entry(page_table, e, index, guest,
 486			spt->guest_page.gfn << GTT_PAGE_SHIFT,
 487			spt->vgpu);
 488	ops->test_pse(e);
 489	return e;
 490}
 491
 492static inline struct intel_gvt_gtt_entry *ppgtt_spt_set_entry(
 493		struct intel_vgpu_ppgtt_spt *spt,
 494		void *page_table, int type,
 495		struct intel_gvt_gtt_entry *e, unsigned long index,
 496		bool guest)
 497{
 498	struct intel_gvt *gvt = spt->vgpu->gvt;
 499	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 500
 501	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
 502		return e;
 503
 504	return ops->set_entry(page_table, e, index, guest,
 505			spt->guest_page.gfn << GTT_PAGE_SHIFT,
 506			spt->vgpu);
 507}
 508
 509#define ppgtt_get_guest_entry(spt, e, index) \
 510	ppgtt_spt_get_entry(spt, NULL, \
 511		spt->guest_page_type, e, index, true)
 512
 513#define ppgtt_set_guest_entry(spt, e, index) \
 514	ppgtt_spt_set_entry(spt, NULL, \
 515		spt->guest_page_type, e, index, true)
 516
 517#define ppgtt_get_shadow_entry(spt, e, index) \
 518	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
 519		spt->shadow_page.type, e, index, false)
 520
 521#define ppgtt_set_shadow_entry(spt, e, index) \
 522	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
 523		spt->shadow_page.type, e, index, false)
 524
 525/**
 526 * intel_vgpu_init_guest_page - init a guest page data structure
 527 * @vgpu: a vGPU
 528 * @p: a guest page data structure
 529 * @gfn: guest memory page frame number
 530 * @handler: function will be called when target guest memory page has
 531 * been modified.
 532 *
 533 * This function is called when user wants to track a guest memory page.
 534 *
 535 * Returns:
 536 * Zero on success, negative error code if failed.
 537 */
 538int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu,
 539		struct intel_vgpu_guest_page *p,
 540		unsigned long gfn,
 541		int (*handler)(void *, u64, void *, int),
 542		void *data)
 543{
 544	INIT_HLIST_NODE(&p->node);
 545
 546	p->writeprotection = false;
 547	p->gfn = gfn;
 548	p->handler = handler;
 549	p->data = data;
 550	p->oos_page = NULL;
 551	p->write_cnt = 0;
 552
 553	hash_add(vgpu->gtt.guest_page_hash_table, &p->node, p->gfn);
 554	return 0;
 555}
 556
 557static int detach_oos_page(struct intel_vgpu *vgpu,
 558		struct intel_vgpu_oos_page *oos_page);
 559
 560/**
 561 * intel_vgpu_clean_guest_page - release the resource owned by guest page data
 562 * structure
 563 * @vgpu: a vGPU
 564 * @p: a tracked guest page
 565 *
 566 * This function is called when user tries to stop tracking a guest memory
 567 * page.
 568 */
 569void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu,
 570		struct intel_vgpu_guest_page *p)
 571{
 572	if (!hlist_unhashed(&p->node))
 573		hash_del(&p->node);
 574
 575	if (p->oos_page)
 576		detach_oos_page(vgpu, p->oos_page);
 577
 578	if (p->writeprotection)
 579		intel_gvt_hypervisor_unset_wp_page(vgpu, p);
 580}
 581
 582/**
 583 * intel_vgpu_find_guest_page - find a guest page data structure by GFN.
 584 * @vgpu: a vGPU
 585 * @gfn: guest memory page frame number
 586 *
 587 * This function is called when emulation logic wants to know if a trapped GFN
 588 * is a tracked guest page.
 589 *
 590 * Returns:
 591 * Pointer to guest page data structure, NULL if failed.
 592 */
 593struct intel_vgpu_guest_page *intel_vgpu_find_guest_page(
 594		struct intel_vgpu *vgpu, unsigned long gfn)
 595{
 596	struct intel_vgpu_guest_page *p;
 597
 598	hash_for_each_possible(vgpu->gtt.guest_page_hash_table,
 599		p, node, gfn) {
 600		if (p->gfn == gfn)
 601			return p;
 602	}
 603	return NULL;
 604}
 605
 606static inline int init_shadow_page(struct intel_vgpu *vgpu,
 607		struct intel_vgpu_shadow_page *p, int type)
 608{
 609	p->vaddr = page_address(p->page);
 610	p->type = type;
 611
 612	INIT_HLIST_NODE(&p->node);
 613
 614	p->mfn = intel_gvt_hypervisor_virt_to_mfn(p->vaddr);
 615	if (p->mfn == INTEL_GVT_INVALID_ADDR)
 616		return -EFAULT;
 617
 618	hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn);
 619	return 0;
 620}
 621
 622static inline void clean_shadow_page(struct intel_vgpu_shadow_page *p)
 623{
 624	if (!hlist_unhashed(&p->node))
 625		hash_del(&p->node);
 626}
 627
 628static inline struct intel_vgpu_shadow_page *find_shadow_page(
 629		struct intel_vgpu *vgpu, unsigned long mfn)
 630{
 631	struct intel_vgpu_shadow_page *p;
 632
 633	hash_for_each_possible(vgpu->gtt.shadow_page_hash_table,
 634		p, node, mfn) {
 635		if (p->mfn == mfn)
 636			return p;
 637	}
 638	return NULL;
 639}
 640
 641#define guest_page_to_ppgtt_spt(ptr) \
 642	container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page)
 643
 644#define shadow_page_to_ppgtt_spt(ptr) \
 645	container_of(ptr, struct intel_vgpu_ppgtt_spt, shadow_page)
 646
 647static void *alloc_spt(gfp_t gfp_mask)
 648{
 649	struct intel_vgpu_ppgtt_spt *spt;
 650
 651	spt = kzalloc(sizeof(*spt), gfp_mask);
 652	if (!spt)
 653		return NULL;
 654
 655	spt->shadow_page.page = alloc_page(gfp_mask);
 656	if (!spt->shadow_page.page) {
 657		kfree(spt);
 658		return NULL;
 659	}
 660	return spt;
 661}
 662
 663static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
 664{
 665	__free_page(spt->shadow_page.page);
 666	kfree(spt);
 667}
 668
 669static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 670{
 671	trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type);
 672
 673	clean_shadow_page(&spt->shadow_page);
 674	intel_vgpu_clean_guest_page(spt->vgpu, &spt->guest_page);
 675	list_del_init(&spt->post_shadow_list);
 676
 677	free_spt(spt);
 678}
 679
 680static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu)
 681{
 682	struct hlist_node *n;
 683	struct intel_vgpu_shadow_page *sp;
 684	int i;
 685
 686	hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, sp, node)
 687		ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp));
 688}
 689
 690static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
 691		u64 pa, void *p_data, int bytes);
 692
 693static int ppgtt_write_protection_handler(void *gp, u64 pa,
 694		void *p_data, int bytes)
 695{
 696	struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp;
 697	int ret;
 698
 699	if (bytes != 4 && bytes != 8)
 700		return -EINVAL;
 701
 702	if (!gpt->writeprotection)
 703		return -EINVAL;
 704
 705	ret = ppgtt_handle_guest_write_page_table_bytes(gp,
 706		pa, p_data, bytes);
 707	if (ret)
 708		return ret;
 709	return ret;
 710}
 711
 712static int reclaim_one_mm(struct intel_gvt *gvt);
 713
 714static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page(
 715		struct intel_vgpu *vgpu, int type, unsigned long gfn)
 716{
 717	struct intel_vgpu_ppgtt_spt *spt = NULL;
 718	int ret;
 719
 720retry:
 721	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
 722	if (!spt) {
 723		if (reclaim_one_mm(vgpu->gvt))
 724			goto retry;
 725
 726		gvt_err("fail to allocate ppgtt shadow page\n");
 727		return ERR_PTR(-ENOMEM);
 728	}
 729
 730	spt->vgpu = vgpu;
 731	spt->guest_page_type = type;
 732	atomic_set(&spt->refcount, 1);
 733	INIT_LIST_HEAD(&spt->post_shadow_list);
 734
 735	/*
 736	 * TODO: guest page type may be different with shadow page type,
 737	 *	 when we support PSE page in future.
 738	 */
 739	ret = init_shadow_page(vgpu, &spt->shadow_page, type);
 740	if (ret) {
 741		gvt_err("fail to initialize shadow page for spt\n");
 742		goto err;
 743	}
 744
 745	ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page,
 746			gfn, ppgtt_write_protection_handler, NULL);
 747	if (ret) {
 748		gvt_err("fail to initialize guest page for spt\n");
 749		goto err;
 750	}
 751
 752	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
 753	return spt;
 754err:
 755	ppgtt_free_shadow_page(spt);
 756	return ERR_PTR(ret);
 757}
 758
 759static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page(
 760		struct intel_vgpu *vgpu, unsigned long mfn)
 761{
 762	struct intel_vgpu_shadow_page *p = find_shadow_page(vgpu, mfn);
 763
 764	if (p)
 765		return shadow_page_to_ppgtt_spt(p);
 766
 767	gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n",
 768			vgpu->id, mfn);
 769	return NULL;
 770}
 771
 772#define pt_entry_size_shift(spt) \
 773	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
 774
 775#define pt_entries(spt) \
 776	(GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
 777
 778#define for_each_present_guest_entry(spt, e, i) \
 779	for (i = 0; i < pt_entries(spt); i++) \
 780	if (spt->vgpu->gvt->gtt.pte_ops->test_present( \
 781		ppgtt_get_guest_entry(spt, e, i)))
 782
 783#define for_each_present_shadow_entry(spt, e, i) \
 784	for (i = 0; i < pt_entries(spt); i++) \
 785	if (spt->vgpu->gvt->gtt.pte_ops->test_present( \
 786		ppgtt_get_shadow_entry(spt, e, i)))
 787
 788static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 789{
 790	int v = atomic_read(&spt->refcount);
 791
 792	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
 793
 794	atomic_inc(&spt->refcount);
 795}
 796
 797static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt);
 798
 799static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
 800		struct intel_gvt_gtt_entry *e)
 801{
 802	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 803	struct intel_vgpu_ppgtt_spt *s;
 804	intel_gvt_gtt_type_t cur_pt_type;
 805
 806	if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type))))
 807		return -EINVAL;
 808
 809	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
 810		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
 811		cur_pt_type = get_next_pt_type(e->type) + 1;
 812		if (ops->get_pfn(e) ==
 813			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
 814			return 0;
 815	}
 816	s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
 817	if (!s) {
 818		gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n",
 819				vgpu->id, ops->get_pfn(e));
 820		return -ENXIO;
 821	}
 822	return ppgtt_invalidate_shadow_page(s);
 823}
 824
 825static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 826{
 827	struct intel_gvt_gtt_entry e;
 828	unsigned long index;
 829	int ret;
 830	int v = atomic_read(&spt->refcount);
 831
 832	trace_spt_change(spt->vgpu->id, "die", spt,
 833			spt->guest_page.gfn, spt->shadow_page.type);
 834
 835	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
 836
 837	if (atomic_dec_return(&spt->refcount) > 0)
 838		return 0;
 839
 840	if (gtt_type_is_pte_pt(spt->shadow_page.type))
 841		goto release;
 842
 843	for_each_present_shadow_entry(spt, &e, index) {
 844		if (!gtt_type_is_pt(get_next_pt_type(e.type))) {
 845			gvt_err("GVT doesn't support pse bit for now\n");
 846			return -EINVAL;
 847		}
 848		ret = ppgtt_invalidate_shadow_page_by_shadow_entry(
 849				spt->vgpu, &e);
 850		if (ret)
 851			goto fail;
 852	}
 853release:
 854	trace_spt_change(spt->vgpu->id, "release", spt,
 855			spt->guest_page.gfn, spt->shadow_page.type);
 856	ppgtt_free_shadow_page(spt);
 857	return 0;
 858fail:
 859	gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n",
 860			spt->vgpu->id, spt, e.val64, e.type);
 861	return ret;
 862}
 863
 864static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt);
 865
 866static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry(
 867		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
 868{
 869	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 870	struct intel_vgpu_ppgtt_spt *s = NULL;
 871	struct intel_vgpu_guest_page *g;
 872	int ret;
 873
 874	if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) {
 875		ret = -EINVAL;
 876		goto fail;
 877	}
 878
 879	g = intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we));
 880	if (g) {
 881		s = guest_page_to_ppgtt_spt(g);
 882		ppgtt_get_shadow_page(s);
 883	} else {
 884		int type = get_next_pt_type(we->type);
 885
 886		s = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we));
 887		if (IS_ERR(s)) {
 888			ret = PTR_ERR(s);
 889			goto fail;
 890		}
 891
 892		ret = intel_gvt_hypervisor_set_wp_page(vgpu, &s->guest_page);
 893		if (ret)
 894			goto fail;
 895
 896		ret = ppgtt_populate_shadow_page(s);
 897		if (ret)
 898			goto fail;
 899
 900		trace_spt_change(vgpu->id, "new", s, s->guest_page.gfn,
 901			s->shadow_page.type);
 902	}
 903	return s;
 904fail:
 905	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
 906			vgpu->id, s, we->val64, we->type);
 907	return ERR_PTR(ret);
 908}
 909
 910static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
 911		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
 912{
 913	struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
 914
 915	se->type = ge->type;
 916	se->val64 = ge->val64;
 917
 918	ops->set_pfn(se, s->shadow_page.mfn);
 919}
 920
 921static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 922{
 923	struct intel_vgpu *vgpu = spt->vgpu;
 924	struct intel_vgpu_ppgtt_spt *s;
 925	struct intel_gvt_gtt_entry se, ge;
 926	unsigned long i;
 927	int ret;
 928
 929	trace_spt_change(spt->vgpu->id, "born", spt,
 930			spt->guest_page.gfn, spt->shadow_page.type);
 931
 932	if (gtt_type_is_pte_pt(spt->shadow_page.type)) {
 933		for_each_present_guest_entry(spt, &ge, i) {
 934			ret = gtt_entry_p2m(vgpu, &ge, &se);
 935			if (ret)
 936				goto fail;
 937			ppgtt_set_shadow_entry(spt, &se, i);
 938		}
 939		return 0;
 940	}
 941
 942	for_each_present_guest_entry(spt, &ge, i) {
 943		if (!gtt_type_is_pt(get_next_pt_type(ge.type))) {
 944			gvt_err("GVT doesn't support pse bit now\n");
 945			ret = -EINVAL;
 946			goto fail;
 947		}
 948
 949		s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
 950		if (IS_ERR(s)) {
 951			ret = PTR_ERR(s);
 952			goto fail;
 953		}
 954		ppgtt_get_shadow_entry(spt, &se, i);
 955		ppgtt_generate_shadow_entry(&se, s, &ge);
 956		ppgtt_set_shadow_entry(spt, &se, i);
 957	}
 958	return 0;
 959fail:
 960	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
 961			vgpu->id, spt, ge.val64, ge.type);
 962	return ret;
 963}
 964
 965static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 966		unsigned long index)
 967{
 968	struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
 969	struct intel_vgpu_shadow_page *sp = &spt->shadow_page;
 970	struct intel_vgpu *vgpu = spt->vgpu;
 971	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 972	struct intel_gvt_gtt_entry e;
 973	int ret;
 974
 975	ppgtt_get_shadow_entry(spt, &e, index);
 976
 977	trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, e.val64,
 978			 index);
 979
 980	if (!ops->test_present(&e))
 981		return 0;
 982
 983	if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn)
 984		return 0;
 985
 986	if (gtt_type_is_pt(get_next_pt_type(e.type))) {
 987		struct intel_vgpu_ppgtt_spt *s =
 988			ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e));
 989		if (!s) {
 990			gvt_err("fail to find guest page\n");
 991			ret = -ENXIO;
 992			goto fail;
 993		}
 994		ret = ppgtt_invalidate_shadow_page(s);
 995		if (ret)
 996			goto fail;
 997	}
 998	ops->set_pfn(&e, vgpu->gtt.scratch_pt[sp->type].page_mfn);
 999	ppgtt_set_shadow_entry(spt, &e, index);
1000	return 0;
1001fail:
1002	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
1003			vgpu->id, spt, e.val64, e.type);
1004	return ret;
1005}
1006
1007static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt,
1008		struct intel_gvt_gtt_entry *we, unsigned long index)
1009{
1010	struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1011	struct intel_vgpu_shadow_page *sp = &spt->shadow_page;
1012	struct intel_vgpu *vgpu = spt->vgpu;
1013	struct intel_gvt_gtt_entry m;
1014	struct intel_vgpu_ppgtt_spt *s;
1015	int ret;
1016
1017	trace_gpt_change(spt->vgpu->id, "add", spt, sp->type,
1018		we->val64, index);
1019
1020	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1021		s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we);
1022		if (IS_ERR(s)) {
1023			ret = PTR_ERR(s);
1024			goto fail;
1025		}
1026		ppgtt_get_shadow_entry(spt, &m, index);
1027		ppgtt_generate_shadow_entry(&m, s, we);
1028		ppgtt_set_shadow_entry(spt, &m, index);
1029	} else {
1030		ret = gtt_entry_p2m(vgpu, we, &m);
1031		if (ret)
1032			goto fail;
1033		ppgtt_set_shadow_entry(spt, &m, index);
1034	}
1035	return 0;
1036fail:
1037	gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id,
1038			spt, we->val64, we->type);
1039	return ret;
1040}
1041
1042static int sync_oos_page(struct intel_vgpu *vgpu,
1043		struct intel_vgpu_oos_page *oos_page)
1044{
1045	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1046	struct intel_gvt *gvt = vgpu->gvt;
1047	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1048	struct intel_vgpu_ppgtt_spt *spt =
1049		guest_page_to_ppgtt_spt(oos_page->guest_page);
1050	struct intel_gvt_gtt_entry old, new, m;
1051	int index;
1052	int ret;
1053
1054	trace_oos_change(vgpu->id, "sync", oos_page->id,
1055			oos_page->guest_page, spt->guest_page_type);
1056
1057	old.type = new.type = get_entry_type(spt->guest_page_type);
1058	old.val64 = new.val64 = 0;
1059
1060	for (index = 0; index < (GTT_PAGE_SIZE >> info->gtt_entry_size_shift);
1061		index++) {
1062		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1063		ops->get_entry(NULL, &new, index, true,
1064			oos_page->guest_page->gfn << PAGE_SHIFT, vgpu);
1065
1066		if (old.val64 == new.val64
1067			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
1068			continue;
1069
1070		trace_oos_sync(vgpu->id, oos_page->id,
1071				oos_page->guest_page, spt->guest_page_type,
1072				new.val64, index);
1073
1074		ret = gtt_entry_p2m(vgpu, &new, &m);
1075		if (ret)
1076			return ret;
1077
1078		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1079		ppgtt_set_shadow_entry(spt, &m, index);
1080	}
1081
1082	oos_page->guest_page->write_cnt = 0;
1083	list_del_init(&spt->post_shadow_list);
1084	return 0;
1085}
1086
1087static int detach_oos_page(struct intel_vgpu *vgpu,
1088		struct intel_vgpu_oos_page *oos_page)
1089{
1090	struct intel_gvt *gvt = vgpu->gvt;
1091	struct intel_vgpu_ppgtt_spt *spt =
1092		guest_page_to_ppgtt_spt(oos_page->guest_page);
1093
1094	trace_oos_change(vgpu->id, "detach", oos_page->id,
1095			oos_page->guest_page, spt->guest_page_type);
1096
1097	oos_page->guest_page->write_cnt = 0;
1098	oos_page->guest_page->oos_page = NULL;
1099	oos_page->guest_page = NULL;
1100
1101	list_del_init(&oos_page->vm_list);
1102	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1103
1104	return 0;
1105}
1106
1107static int attach_oos_page(struct intel_vgpu *vgpu,
1108		struct intel_vgpu_oos_page *oos_page,
1109		struct intel_vgpu_guest_page *gpt)
1110{
1111	struct intel_gvt *gvt = vgpu->gvt;
1112	int ret;
1113
1114	ret = intel_gvt_hypervisor_read_gpa(vgpu, gpt->gfn << GTT_PAGE_SHIFT,
1115		oos_page->mem, GTT_PAGE_SIZE);
1116	if (ret)
1117		return ret;
1118
1119	oos_page->guest_page = gpt;
1120	gpt->oos_page = oos_page;
1121
1122	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1123
1124	trace_oos_change(vgpu->id, "attach", gpt->oos_page->id,
1125			gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1126	return 0;
1127}
1128
1129static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu,
1130		struct intel_vgpu_guest_page *gpt)
1131{
1132	int ret;
1133
1134	ret = intel_gvt_hypervisor_set_wp_page(vgpu, gpt);
1135	if (ret)
1136		return ret;
1137
1138	trace_oos_change(vgpu->id, "set page sync", gpt->oos_page->id,
1139			gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1140
1141	list_del_init(&gpt->oos_page->vm_list);
1142	return sync_oos_page(vgpu, gpt->oos_page);
1143}
1144
1145static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu,
1146		struct intel_vgpu_guest_page *gpt)
1147{
1148	struct intel_gvt *gvt = vgpu->gvt;
1149	struct intel_gvt_gtt *gtt = &gvt->gtt;
1150	struct intel_vgpu_oos_page *oos_page = gpt->oos_page;
1151	int ret;
1152
1153	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1154
1155	if (list_empty(&gtt->oos_page_free_list_head)) {
1156		oos_page = container_of(gtt->oos_page_use_list_head.next,
1157			struct intel_vgpu_oos_page, list);
1158		ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page);
1159		if (ret)
1160			return ret;
1161		ret = detach_oos_page(vgpu, oos_page);
1162		if (ret)
1163			return ret;
1164	} else
1165		oos_page = container_of(gtt->oos_page_free_list_head.next,
1166			struct intel_vgpu_oos_page, list);
1167	return attach_oos_page(vgpu, oos_page, gpt);
1168}
1169
1170static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu,
1171		struct intel_vgpu_guest_page *gpt)
1172{
1173	struct intel_vgpu_oos_page *oos_page = gpt->oos_page;
1174
1175	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1176		return -EINVAL;
1177
1178	trace_oos_change(vgpu->id, "set page out of sync", gpt->oos_page->id,
1179			gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type);
1180
1181	list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head);
1182	return intel_gvt_hypervisor_unset_wp_page(vgpu, gpt);
1183}
1184
1185/**
1186 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1187 * @vgpu: a vGPU
1188 *
1189 * This function is called before submitting a guest workload to host,
1190 * to sync all the out-of-synced shadow for vGPU
1191 *
1192 * Returns:
1193 * Zero on success, negative error code if failed.
1194 */
1195int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1196{
1197	struct list_head *pos, *n;
1198	struct intel_vgpu_oos_page *oos_page;
1199	int ret;
1200
1201	if (!enable_out_of_sync)
1202		return 0;
1203
1204	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1205		oos_page = container_of(pos,
1206				struct intel_vgpu_oos_page, vm_list);
1207		ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page);
1208		if (ret)
1209			return ret;
1210	}
1211	return 0;
1212}
1213
1214/*
1215 * The heart of PPGTT shadow page table.
1216 */
1217static int ppgtt_handle_guest_write_page_table(
1218		struct intel_vgpu_guest_page *gpt,
1219		struct intel_gvt_gtt_entry *we, unsigned long index)
1220{
1221	struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1222	struct intel_vgpu *vgpu = spt->vgpu;
1223	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1224
1225	int ret;
1226	int new_present;
1227
1228	new_present = ops->test_present(we);
1229
1230	ret = ppgtt_handle_guest_entry_removal(gpt, index);
1231	if (ret)
1232		goto fail;
1233
1234	if (new_present) {
1235		ret = ppgtt_handle_guest_entry_add(gpt, we, index);
1236		if (ret)
1237			goto fail;
1238	}
1239	return 0;
1240fail:
1241	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n",
1242			vgpu->id, spt, we->val64, we->type);
1243	return ret;
1244}
1245
1246static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt)
1247{
1248	return enable_out_of_sync
1249		&& gtt_type_is_pte_pt(
1250			guest_page_to_ppgtt_spt(gpt)->guest_page_type)
1251		&& gpt->write_cnt >= 2;
1252}
1253
1254static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1255		unsigned long index)
1256{
1257	set_bit(index, spt->post_shadow_bitmap);
1258	if (!list_empty(&spt->post_shadow_list))
1259		return;
1260
1261	list_add_tail(&spt->post_shadow_list,
1262			&spt->vgpu->gtt.post_shadow_list_head);
1263}
1264
1265/**
1266 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1267 * @vgpu: a vGPU
1268 *
1269 * This function is called before submitting a guest workload to host,
1270 * to flush all the post shadows for a vGPU.
1271 *
1272 * Returns:
1273 * Zero on success, negative error code if failed.
1274 */
1275int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1276{
1277	struct list_head *pos, *n;
1278	struct intel_vgpu_ppgtt_spt *spt;
1279	struct intel_gvt_gtt_entry ge;
1280	unsigned long index;
1281	int ret;
1282
1283	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1284		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1285				post_shadow_list);
1286
1287		for_each_set_bit(index, spt->post_shadow_bitmap,
1288				GTT_ENTRY_NUM_IN_ONE_PAGE) {
1289			ppgtt_get_guest_entry(spt, &ge, index);
1290
1291			ret = ppgtt_handle_guest_write_page_table(
1292					&spt->guest_page, &ge, index);
1293			if (ret)
1294				return ret;
1295			clear_bit(index, spt->post_shadow_bitmap);
1296		}
1297		list_del_init(&spt->post_shadow_list);
1298	}
1299	return 0;
1300}
1301
1302static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
1303		u64 pa, void *p_data, int bytes)
1304{
1305	struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp;
1306	struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt);
1307	struct intel_vgpu *vgpu = spt->vgpu;
1308	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1309	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1310	struct intel_gvt_gtt_entry we;
1311	unsigned long index;
1312	int ret;
1313
1314	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1315
1316	ppgtt_get_guest_entry(spt, &we, index);
1317
1318	ops->test_pse(&we);
1319
1320	if (bytes == info->gtt_entry_size) {
1321		ret = ppgtt_handle_guest_write_page_table(gpt, &we, index);
1322		if (ret)
1323			return ret;
1324	} else {
1325		if (!test_bit(index, spt->post_shadow_bitmap)) {
1326			ret = ppgtt_handle_guest_entry_removal(gpt, index);
1327			if (ret)
1328				return ret;
1329		}
1330
1331		ppgtt_set_post_shadow(spt, index);
1332	}
1333
1334	if (!enable_out_of_sync)
1335		return 0;
1336
1337	gpt->write_cnt++;
1338
1339	if (gpt->oos_page)
1340		ops->set_entry(gpt->oos_page->mem, &we, index,
1341				false, 0, vgpu);
1342
1343	if (can_do_out_of_sync(gpt)) {
1344		if (!gpt->oos_page)
1345			ppgtt_allocate_oos_page(vgpu, gpt);
1346
1347		ret = ppgtt_set_guest_page_oos(vgpu, gpt);
1348		if (ret < 0)
1349			return ret;
1350	}
1351	return 0;
1352}
1353
1354/*
1355 * mm page table allocation policy for bdw+
1356 *  - for ggtt, only virtual page table will be allocated.
1357 *  - for ppgtt, dedicated virtual/shadow page table will be allocated.
1358 */
1359static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm)
1360{
1361	struct intel_vgpu *vgpu = mm->vgpu;
1362	struct intel_gvt *gvt = vgpu->gvt;
1363	const struct intel_gvt_device_info *info = &gvt->device_info;
1364	void *mem;
1365
1366	if (mm->type == INTEL_GVT_MM_PPGTT) {
1367		mm->page_table_entry_cnt = 4;
1368		mm->page_table_entry_size = mm->page_table_entry_cnt *
1369			info->gtt_entry_size;
1370		mem = kzalloc(mm->has_shadow_page_table ?
1371			mm->page_table_entry_size * 2
1372				: mm->page_table_entry_size, GFP_KERNEL);
1373		if (!mem)
1374			return -ENOMEM;
1375		mm->virtual_page_table = mem;
1376		if (!mm->has_shadow_page_table)
1377			return 0;
1378		mm->shadow_page_table = mem + mm->page_table_entry_size;
1379	} else if (mm->type == INTEL_GVT_MM_GGTT) {
1380		mm->page_table_entry_cnt =
1381			(gvt_ggtt_gm_sz(gvt) >> GTT_PAGE_SHIFT);
1382		mm->page_table_entry_size = mm->page_table_entry_cnt *
1383			info->gtt_entry_size;
1384		mem = vzalloc(mm->page_table_entry_size);
1385		if (!mem)
1386			return -ENOMEM;
1387		mm->virtual_page_table = mem;
1388	}
1389	return 0;
1390}
1391
1392static void gen8_mm_free_page_table(struct intel_vgpu_mm *mm)
1393{
1394	if (mm->type == INTEL_GVT_MM_PPGTT) {
1395		kfree(mm->virtual_page_table);
1396	} else if (mm->type == INTEL_GVT_MM_GGTT) {
1397		if (mm->virtual_page_table)
1398			vfree(mm->virtual_page_table);
1399	}
1400	mm->virtual_page_table = mm->shadow_page_table = NULL;
1401}
1402
1403static void invalidate_mm(struct intel_vgpu_mm *mm)
1404{
1405	struct intel_vgpu *vgpu = mm->vgpu;
1406	struct intel_gvt *gvt = vgpu->gvt;
1407	struct intel_gvt_gtt *gtt = &gvt->gtt;
1408	struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1409	struct intel_gvt_gtt_entry se;
1410	int i;
1411
1412	if (WARN_ON(!mm->has_shadow_page_table || !mm->shadowed))
1413		return;
1414
1415	for (i = 0; i < mm->page_table_entry_cnt; i++) {
1416		ppgtt_get_shadow_root_entry(mm, &se, i);
1417		if (!ops->test_present(&se))
1418			continue;
1419		ppgtt_invalidate_shadow_page_by_shadow_entry(
1420				vgpu, &se);
1421		se.val64 = 0;
1422		ppgtt_set_shadow_root_entry(mm, &se, i);
1423
1424		trace_gpt_change(vgpu->id, "destroy root pointer",
1425				NULL, se.type, se.val64, i);
1426	}
1427	mm->shadowed = false;
1428}
1429
1430/**
1431 * intel_vgpu_destroy_mm - destroy a mm object
1432 * @mm: a kref object
1433 *
1434 * This function is used to destroy a mm object for vGPU
1435 *
1436 */
1437void intel_vgpu_destroy_mm(struct kref *mm_ref)
1438{
1439	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1440	struct intel_vgpu *vgpu = mm->vgpu;
1441	struct intel_gvt *gvt = vgpu->gvt;
1442	struct intel_gvt_gtt *gtt = &gvt->gtt;
1443
1444	if (!mm->initialized)
1445		goto out;
1446
1447	list_del(&mm->list);
1448	list_del(&mm->lru_list);
1449
1450	if (mm->has_shadow_page_table)
1451		invalidate_mm(mm);
1452
1453	gtt->mm_free_page_table(mm);
1454out:
1455	kfree(mm);
1456}
1457
1458static int shadow_mm(struct intel_vgpu_mm *mm)
1459{
1460	struct intel_vgpu *vgpu = mm->vgpu;
1461	struct intel_gvt *gvt = vgpu->gvt;
1462	struct intel_gvt_gtt *gtt = &gvt->gtt;
1463	struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1464	struct intel_vgpu_ppgtt_spt *spt;
1465	struct intel_gvt_gtt_entry ge, se;
1466	int i;
1467	int ret;
1468
1469	if (WARN_ON(!mm->has_shadow_page_table || mm->shadowed))
1470		return 0;
1471
1472	mm->shadowed = true;
1473
1474	for (i = 0; i < mm->page_table_entry_cnt; i++) {
1475		ppgtt_get_guest_root_entry(mm, &ge, i);
1476		if (!ops->test_present(&ge))
1477			continue;
1478
1479		trace_gpt_change(vgpu->id, __func__, NULL,
1480				ge.type, ge.val64, i);
1481
1482		spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
1483		if (IS_ERR(spt)) {
1484			gvt_err("fail to populate guest root pointer\n");
1485			ret = PTR_ERR(spt);
1486			goto fail;
1487		}
1488		ppgtt_generate_shadow_entry(&se, spt, &ge);
1489		ppgtt_set_shadow_root_entry(mm, &se, i);
1490
1491		trace_gpt_change(vgpu->id, "populate root pointer",
1492				NULL, se.type, se.val64, i);
1493	}
1494	return 0;
1495fail:
1496	invalidate_mm(mm);
1497	return ret;
1498}
1499
1500/**
1501 * intel_vgpu_create_mm - create a mm object for a vGPU
1502 * @vgpu: a vGPU
1503 * @mm_type: mm object type, should be PPGTT or GGTT
1504 * @virtual_page_table: page table root pointers. Could be NULL if user wants
1505 *	to populate shadow later.
1506 * @page_table_level: describe the page table level of the mm object
1507 * @pde_base_index: pde root pointer base in GGTT MMIO.
1508 *
1509 * This function is used to create a mm object for a vGPU.
1510 *
1511 * Returns:
1512 * Zero on success, negative error code in pointer if failed.
1513 */
1514struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
1515		int mm_type, void *virtual_page_table, int page_table_level,
1516		u32 pde_base_index)
1517{
1518	struct intel_gvt *gvt = vgpu->gvt;
1519	struct intel_gvt_gtt *gtt = &gvt->gtt;
1520	struct intel_vgpu_mm *mm;
1521	int ret;
1522
1523	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1524	if (!mm) {
1525		ret = -ENOMEM;
1526		goto fail;
1527	}
1528
1529	mm->type = mm_type;
1530
1531	if (page_table_level == 1)
1532		mm->page_table_entry_type = GTT_TYPE_GGTT_PTE;
1533	else if (page_table_level == 3)
1534		mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
1535	else if (page_table_level == 4)
1536		mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
1537	else {
1538		WARN_ON(1);
1539		ret = -EINVAL;
1540		goto fail;
1541	}
1542
1543	mm->page_table_level = page_table_level;
1544	mm->pde_base_index = pde_base_index;
1545
1546	mm->vgpu = vgpu;
1547	mm->has_shadow_page_table = !!(mm_type == INTEL_GVT_MM_PPGTT);
1548
1549	kref_init(&mm->ref);
1550	atomic_set(&mm->pincount, 0);
1551	INIT_LIST_HEAD(&mm->list);
1552	INIT_LIST_HEAD(&mm->lru_list);
1553	list_add_tail(&mm->list, &vgpu->gtt.mm_list_head);
1554
1555	ret = gtt->mm_alloc_page_table(mm);
1556	if (ret) {
1557		gvt_err("fail to allocate page table for mm\n");
1558		goto fail;
1559	}
1560
1561	mm->initialized = true;
1562
1563	if (virtual_page_table)
1564		memcpy(mm->virtual_page_table, virtual_page_table,
1565				mm->page_table_entry_size);
1566
1567	if (mm->has_shadow_page_table) {
1568		ret = shadow_mm(mm);
1569		if (ret)
1570			goto fail;
1571		list_add_tail(&mm->lru_list, &gvt->gtt.mm_lru_list_head);
1572	}
1573	return mm;
1574fail:
1575	gvt_err("fail to create mm\n");
1576	if (mm)
1577		intel_gvt_mm_unreference(mm);
1578	return ERR_PTR(ret);
1579}
1580
1581/**
1582 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1583 * @mm: a vGPU mm object
1584 *
1585 * This function is called when user doesn't want to use a vGPU mm object
1586 */
1587void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1588{
1589	if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT))
1590		return;
1591
1592	atomic_dec(&mm->pincount);
1593}
1594
1595/**
1596 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1597 * @vgpu: a vGPU
1598 *
1599 * This function is called when user wants to use a vGPU mm object. If this
1600 * mm object hasn't been shadowed yet, the shadow will be populated at this
1601 * time.
1602 *
1603 * Returns:
1604 * Zero on success, negative error code if failed.
1605 */
1606int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1607{
1608	int ret;
1609
1610	if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT))
1611		return 0;
1612
1613	atomic_inc(&mm->pincount);
1614
1615	if (!mm->shadowed) {
1616		ret = shadow_mm(mm);
1617		if (ret)
1618			return ret;
1619	}
1620
1621	list_del_init(&mm->lru_list);
1622	list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head);
1623	return 0;
1624}
1625
1626static int reclaim_one_mm(struct intel_gvt *gvt)
1627{
1628	struct intel_vgpu_mm *mm;
1629	struct list_head *pos, *n;
1630
1631	list_for_each_safe(pos, n, &gvt->gtt.mm_lru_list_head) {
1632		mm = container_of(pos, struct intel_vgpu_mm, lru_list);
1633
1634		if (mm->type != INTEL_GVT_MM_PPGTT)
1635			continue;
1636		if (atomic_read(&mm->pincount))
1637			continue;
1638
1639		list_del_init(&mm->lru_list);
1640		invalidate_mm(mm);
1641		return 1;
1642	}
1643	return 0;
1644}
1645
1646/*
1647 * GMA translation APIs.
1648 */
1649static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1650		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1651{
1652	struct intel_vgpu *vgpu = mm->vgpu;
1653	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1654	struct intel_vgpu_ppgtt_spt *s;
1655
1656	if (WARN_ON(!mm->has_shadow_page_table))
1657		return -EINVAL;
1658
1659	s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
1660	if (!s)
1661		return -ENXIO;
1662
1663	if (!guest)
1664		ppgtt_get_shadow_entry(s, e, index);
1665	else
1666		ppgtt_get_guest_entry(s, e, index);
1667	return 0;
1668}
1669
1670/**
1671 * intel_vgpu_gma_to_gpa - translate a gma to GPA
1672 * @mm: mm object. could be a PPGTT or GGTT mm object
1673 * @gma: graphics memory address in this mm object
1674 *
1675 * This function is used to translate a graphics memory address in specific
1676 * graphics memory space to guest physical address.
1677 *
1678 * Returns:
1679 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
1680 */
1681unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
1682{
1683	struct intel_vgpu *vgpu = mm->vgpu;
1684	struct intel_gvt *gvt = vgpu->gvt;
1685	struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
1686	struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
1687	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
1688	unsigned long gma_index[4];
1689	struct intel_gvt_gtt_entry e;
1690	int i, index;
1691	int ret;
1692
1693	if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
1694		return INTEL_GVT_INVALID_ADDR;
1695
1696	if (mm->type == INTEL_GVT_MM_GGTT) {
1697		if (!vgpu_gmadr_is_valid(vgpu, gma))
1698			goto err;
1699
1700		ggtt_get_guest_entry(mm, &e,
1701			gma_ops->gma_to_ggtt_pte_index(gma));
1702		gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT)
1703			+ (gma & ~GTT_PAGE_MASK);
1704
1705		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
1706		return gpa;
1707	}
1708
1709	switch (mm->page_table_level) {
1710	case 4:
1711		ppgtt_get_shadow_root_entry(mm, &e, 0);
1712		gma_index[0] = gma_ops->gma_to_pml4_index(gma);
1713		gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
1714		gma_index[2] = gma_ops->gma_to_pde_index(gma);
1715		gma_index[3] = gma_ops->gma_to_pte_index(gma);
1716		index = 4;
1717		break;
1718	case 3:
1719		ppgtt_get_shadow_root_entry(mm, &e,
1720				gma_ops->gma_to_l3_pdp_index(gma));
1721		gma_index[0] = gma_ops->gma_to_pde_index(gma);
1722		gma_index[1] = gma_ops->gma_to_pte_index(gma);
1723		index = 2;
1724		break;
1725	case 2:
1726		ppgtt_get_shadow_root_entry(mm, &e,
1727				gma_ops->gma_to_pde_index(gma));
1728		gma_index[0] = gma_ops->gma_to_pte_index(gma);
1729		index = 1;
1730		break;
1731	default:
1732		WARN_ON(1);
1733		goto err;
1734	}
1735
1736	/* walk into the shadow page table and get gpa from guest entry */
1737	for (i = 0; i < index; i++) {
1738		ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
1739			(i == index - 1));
1740		if (ret)
1741			goto err;
1742	}
1743
1744	gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT)
1745		+ (gma & ~GTT_PAGE_MASK);
1746
1747	trace_gma_translate(vgpu->id, "ppgtt", 0,
1748			mm->page_table_level, gma, gpa);
1749	return gpa;
1750err:
1751	gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma);
1752	return INTEL_GVT_INVALID_ADDR;
1753}
1754
1755static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu,
1756	unsigned int off, void *p_data, unsigned int bytes)
1757{
1758	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1759	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1760	unsigned long index = off >> info->gtt_entry_size_shift;
1761	struct intel_gvt_gtt_entry e;
1762
1763	if (bytes != 4 && bytes != 8)
1764		return -EINVAL;
1765
1766	ggtt_get_guest_entry(ggtt_mm, &e, index);
1767	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
1768			bytes);
1769	return 0;
1770}
1771
1772/**
1773 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
1774 * @vgpu: a vGPU
1775 * @off: register offset
1776 * @p_data: data will be returned to guest
1777 * @bytes: data length
1778 *
1779 * This function is used to emulate the GTT MMIO register read
1780 *
1781 * Returns:
1782 * Zero on success, error code if failed.
1783 */
1784int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
1785	void *p_data, unsigned int bytes)
1786{
1787	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1788	int ret;
1789
1790	if (bytes != 4 && bytes != 8)
1791		return -EINVAL;
1792
1793	off -= info->gtt_start_offset;
1794	ret = emulate_gtt_mmio_read(vgpu, off, p_data, bytes);
1795	return ret;
1796}
1797
1798static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
1799	void *p_data, unsigned int bytes)
1800{
1801	struct intel_gvt *gvt = vgpu->gvt;
1802	const struct intel_gvt_device_info *info = &gvt->device_info;
1803	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
1804	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1805	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
1806	unsigned long gma;
1807	struct intel_gvt_gtt_entry e, m;
1808	int ret;
1809
1810	if (bytes != 4 && bytes != 8)
1811		return -EINVAL;
1812
1813	gma = g_gtt_index << GTT_PAGE_SHIFT;
1814
1815	/* the VM may configure the whole GM space when ballooning is used */
1816	if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma),
1817				"vgpu%d: found oob ggtt write, offset %x\n",
1818				vgpu->id, off)) {
1819		return 0;
1820	}
1821
1822	ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
1823
1824	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
1825			bytes);
1826
1827	if (ops->test_present(&e)) {
1828		ret = gtt_entry_p2m(vgpu, &e, &m);
1829		if (ret) {
1830			gvt_err("vgpu%d: fail to translate guest gtt entry\n",
1831					vgpu->id);
1832			return ret;
1833		}
1834	} else {
1835		m = e;
1836		m.val64 = 0;
1837	}
1838
1839	ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);
1840	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
1841	return 0;
1842}
1843
1844/*
1845 * intel_vgpu_emulate_gtt_mmio_write - emulate GTT MMIO register write
1846 * @vgpu: a vGPU
1847 * @off: register offset
1848 * @p_data: data from guest write
1849 * @bytes: data length
1850 *
1851 * This function is used to emulate the GTT MMIO register write
1852 *
1853 * Returns:
1854 * Zero on success, error code if failed.
1855 */
1856int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
1857	void *p_data, unsigned int bytes)
1858{
1859	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1860	int ret;
1861
1862	if (bytes != 4 && bytes != 8)
1863		return -EINVAL;
1864
1865	off -= info->gtt_start_offset;
1866	ret = emulate_gtt_mmio_write(vgpu, off, p_data, bytes);
1867	return ret;
1868}
1869
1870static int alloc_scratch_pages(struct intel_vgpu *vgpu,
1871		intel_gvt_gtt_type_t type)
1872{
1873	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
1874	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1875	int page_entry_num = GTT_PAGE_SIZE >>
1876				vgpu->gvt->device_info.gtt_entry_size_shift;
1877	void *scratch_pt;
1878	unsigned long mfn;
1879	int i;
1880
1881	if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
1882		return -EINVAL;
1883
1884	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
1885	if (!scratch_pt) {
1886		gvt_err("fail to allocate scratch page\n");
1887		return -ENOMEM;
1888	}
1889
1890	mfn = intel_gvt_hypervisor_virt_to_mfn(scratch_pt);
1891	if (mfn == INTEL_GVT_INVALID_ADDR) {
1892		gvt_err("fail to translate vaddr:0x%lx\n", (unsigned long)scratch_pt);
1893		free_page((unsigned long)scratch_pt);
1894		return -EFAULT;
1895	}
1896	gtt->scratch_pt[type].page_mfn = mfn;
1897	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
1898	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
1899			vgpu->id, type, mfn);
1900
1901	/* Build the tree by full filled the scratch pt with the entries which
1902	 * point to the next level scratch pt or scratch page. The
1903	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
1904	 * 'type' pt.
1905	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
1906	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
1907	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
1908	 */
1909	if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) {
1910		struct intel_gvt_gtt_entry se;
1911
1912		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
1913		se.type = get_entry_type(type - 1);
1914		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
1915
1916		/* The entry parameters like present/writeable/cache type
1917		 * set to the same as i915's scratch page tree.
1918		 */
1919		se.val64 |= _PAGE_PRESENT | _PAGE_RW;
1920		if (type == GTT_TYPE_PPGTT_PDE_PT)
1921			se.val64 |= PPAT_CACHED_INDEX;
1922
1923		for (i = 0; i < page_entry_num; i++)
1924			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
1925	}
1926
1927	return 0;
1928}
1929
1930static int release_scratch_page_tree(struct intel_vgpu *vgpu)
1931{
1932	int i;
1933
1934	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
1935		if (vgpu->gtt.scratch_pt[i].page != NULL) {
1936			__free_page(vgpu->gtt.scratch_pt[i].page);
1937			vgpu->gtt.scratch_pt[i].page = NULL;
1938			vgpu->gtt.scratch_pt[i].page_mfn = 0;
1939		}
1940	}
1941
1942	return 0;
1943}
1944
1945static int create_scratch_page_tree(struct intel_vgpu *vgpu)
1946{
1947	int i, ret;
1948
1949	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
1950		ret = alloc_scratch_pages(vgpu, i);
1951		if (ret)
1952			goto err;
1953	}
1954
1955	return 0;
1956
1957err:
1958	release_scratch_page_tree(vgpu);
1959	return ret;
1960}
1961
1962/**
1963 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
1964 * @vgpu: a vGPU
1965 *
1966 * This function is used to initialize per-vGPU graphics memory virtualization
1967 * components.
1968 *
1969 * Returns:
1970 * Zero on success, error code if failed.
1971 */
1972int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
1973{
1974	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
1975	struct intel_vgpu_mm *ggtt_mm;
1976
1977	hash_init(gtt->guest_page_hash_table);
1978	hash_init(gtt->shadow_page_hash_table);
1979
1980	INIT_LIST_HEAD(&gtt->mm_list_head);
1981	INIT_LIST_HEAD(&gtt->oos_page_list_head);
1982	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
1983
1984	intel_vgpu_reset_ggtt(vgpu);
1985
1986	ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT,
1987			NULL, 1, 0);
1988	if (IS_ERR(ggtt_mm)) {
1989		gvt_err("fail to create mm for ggtt.\n");
1990		return PTR_ERR(ggtt_mm);
1991	}
1992
1993	gtt->ggtt_mm = ggtt_mm;
1994
1995	return create_scratch_page_tree(vgpu);
1996}
1997
1998/**
1999 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2000 * @vgpu: a vGPU
2001 *
2002 * This function is used to clean up per-vGPU graphics memory virtualization
2003 * components.
2004 *
2005 * Returns:
2006 * Zero on success, error code if failed.
2007 */
2008void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2009{
2010	struct list_head *pos, *n;
2011	struct intel_vgpu_mm *mm;
2012
2013	ppgtt_free_all_shadow_page(vgpu);
2014	release_scratch_page_tree(vgpu);
2015
2016	list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
2017		mm = container_of(pos, struct intel_vgpu_mm, list);
2018		vgpu->gvt->gtt.mm_free_page_table(mm);
2019		list_del(&mm->list);
2020		list_del(&mm->lru_list);
2021		kfree(mm);
2022	}
2023}
2024
2025static void clean_spt_oos(struct intel_gvt *gvt)
2026{
2027	struct intel_gvt_gtt *gtt = &gvt->gtt;
2028	struct list_head *pos, *n;
2029	struct intel_vgpu_oos_page *oos_page;
2030
2031	WARN(!list_empty(&gtt->oos_page_use_list_head),
2032		"someone is still using oos page\n");
2033
2034	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2035		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2036		list_del(&oos_page->list);
2037		kfree(oos_page);
2038	}
2039}
2040
2041static int setup_spt_oos(struct intel_gvt *gvt)
2042{
2043	struct intel_gvt_gtt *gtt = &gvt->gtt;
2044	struct intel_vgpu_oos_page *oos_page;
2045	int i;
2046	int ret;
2047
2048	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2049	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2050
2051	for (i = 0; i < preallocated_oos_pages; i++) {
2052		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2053		if (!oos_page) {
2054			gvt_err("fail to pre-allocate oos page\n");
2055			ret = -ENOMEM;
2056			goto fail;
2057		}
2058
2059		INIT_LIST_HEAD(&oos_page->list);
2060		INIT_LIST_HEAD(&oos_page->vm_list);
2061		oos_page->id = i;
2062		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2063	}
2064
2065	gvt_dbg_mm("%d oos pages preallocated\n", i);
2066
2067	return 0;
2068fail:
2069	clean_spt_oos(gvt);
2070	return ret;
2071}
2072
2073/**
2074 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2075 * @vgpu: a vGPU
2076 * @page_table_level: PPGTT page table level
2077 * @root_entry: PPGTT page table root pointers
2078 *
2079 * This function is used to find a PPGTT mm object from mm object pool
2080 *
2081 * Returns:
2082 * pointer to mm object on success, NULL if failed.
2083 */
2084struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2085		int page_table_level, void *root_entry)
2086{
2087	struct list_head *pos;
2088	struct intel_vgpu_mm *mm;
2089	u64 *src, *dst;
2090
2091	list_for_each(pos, &vgpu->gtt.mm_list_head) {
2092		mm = container_of(pos, struct intel_vgpu_mm, list);
2093		if (mm->type != INTEL_GVT_MM_PPGTT)
2094			continue;
2095
2096		if (mm->page_table_level != page_table_level)
2097			continue;
2098
2099		src = root_entry;
2100		dst = mm->virtual_page_table;
2101
2102		if (page_table_level == 3) {
2103			if (src[0] == dst[0]
2104					&& src[1] == dst[1]
2105					&& src[2] == dst[2]
2106					&& src[3] == dst[3])
2107				return mm;
2108		} else {
2109			if (src[0] == dst[0])
2110				return mm;
2111		}
2112	}
2113	return NULL;
2114}
2115
2116/**
2117 * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from
2118 * g2v notification
2119 * @vgpu: a vGPU
2120 * @page_table_level: PPGTT page table level
2121 *
2122 * This function is used to create a PPGTT mm object from a guest to GVT-g
2123 * notification.
2124 *
2125 * Returns:
2126 * Zero on success, negative error code if failed.
2127 */
2128int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
2129		int page_table_level)
2130{
2131	u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
2132	struct intel_vgpu_mm *mm;
2133
2134	if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
2135		return -EINVAL;
2136
2137	mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
2138	if (mm) {
2139		intel_gvt_mm_reference(mm);
2140	} else {
2141		mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT,
2142				pdp, page_table_level, 0);
2143		if (IS_ERR(mm)) {
2144			gvt_err("fail to create mm\n");
2145			return PTR_ERR(mm);
2146		}
2147	}
2148	return 0;
2149}
2150
2151/**
2152 * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from
2153 * g2v notification
2154 * @vgpu: a vGPU
2155 * @page_table_level: PPGTT page table level
2156 *
2157 * This function is used to create a PPGTT mm object from a guest to GVT-g
2158 * notification.
2159 *
2160 * Returns:
2161 * Zero on success, negative error code if failed.
2162 */
2163int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
2164		int page_table_level)
2165{
2166	u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
2167	struct intel_vgpu_mm *mm;
2168
2169	if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
2170		return -EINVAL;
2171
2172	mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
2173	if (!mm) {
2174		gvt_err("fail to find ppgtt instance.\n");
2175		return -EINVAL;
2176	}
2177	intel_gvt_mm_unreference(mm);
2178	return 0;
2179}
2180
2181/**
2182 * intel_gvt_init_gtt - initialize mm components of a GVT device
2183 * @gvt: GVT device
2184 *
2185 * This function is called at the initialization stage, to initialize
2186 * the mm components of a GVT device.
2187 *
2188 * Returns:
2189 * zero on success, negative error code if failed.
2190 */
2191int intel_gvt_init_gtt(struct intel_gvt *gvt)
2192{
2193	int ret;
2194	void *page;
2195
2196	gvt_dbg_core("init gtt\n");
2197
2198	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
2199		gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2200		gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2201		gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table;
2202		gvt->gtt.mm_free_page_table = gen8_mm_free_page_table;
2203	} else {
2204		return -ENODEV;
2205	}
2206
2207	page = (void *)get_zeroed_page(GFP_KERNEL);
2208	if (!page) {
2209		gvt_err("fail to allocate scratch ggtt page\n");
2210		return -ENOMEM;
2211	}
2212	gvt->gtt.scratch_ggtt_page = virt_to_page(page);
2213
2214	gvt->gtt.scratch_ggtt_mfn = intel_gvt_hypervisor_virt_to_mfn(page);
2215	if (gvt->gtt.scratch_ggtt_mfn == INTEL_GVT_INVALID_ADDR) {
2216		gvt_err("fail to translate scratch ggtt page\n");
2217		__free_page(gvt->gtt.scratch_ggtt_page);
2218		return -EFAULT;
2219	}
2220
2221	if (enable_out_of_sync) {
2222		ret = setup_spt_oos(gvt);
2223		if (ret) {
2224			gvt_err("fail to initialize SPT oos\n");
2225			return ret;
2226		}
2227	}
2228	INIT_LIST_HEAD(&gvt->gtt.mm_lru_list_head);
2229	return 0;
2230}
2231
2232/**
2233 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2234 * @gvt: GVT device
2235 *
2236 * This function is called at the driver unloading stage, to clean up the
2237 * the mm components of a GVT device.
2238 *
2239 */
2240void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2241{
2242	__free_page(gvt->gtt.scratch_ggtt_page);
2243
2244	if (enable_out_of_sync)
2245		clean_spt_oos(gvt);
2246}
2247
2248/**
2249 * intel_vgpu_reset_ggtt - reset the GGTT entry
2250 * @vgpu: a vGPU
2251 *
2252 * This function is called at the vGPU create stage
2253 * to reset all the GGTT entries.
2254 *
2255 */
2256void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
2257{
2258	struct intel_gvt *gvt = vgpu->gvt;
2259	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2260	u32 index;
2261	u32 offset;
2262	u32 num_entries;
2263	struct intel_gvt_gtt_entry e;
2264
2265	memset(&e, 0, sizeof(struct intel_gvt_gtt_entry));
2266	e.type = GTT_TYPE_GGTT_PTE;
2267	ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn);
2268	e.val64 |= _PAGE_PRESENT;
2269
2270	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2271	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2272	for (offset = 0; offset < num_entries; offset++)
2273		ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);
2274
2275	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2276	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2277	for (offset = 0; offset < num_entries; offset++)
2278		ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);
2279}
2280
2281/**
2282 * intel_vgpu_reset_gtt - reset the all GTT related status
2283 * @vgpu: a vGPU
2284 * @dmlr: true for vGPU Device Model Level Reset, false for GT Reset
2285 *
2286 * This function is called from vfio core to reset reset all
2287 * GTT related status, including GGTT, PPGTT, scratch page.
2288 *
2289 */
2290void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr)
2291{
2292	int i;
2293
2294	ppgtt_free_all_shadow_page(vgpu);
2295	if (!dmlr)
2296		return;
2297
2298	intel_vgpu_reset_ggtt(vgpu);
2299
2300	/* clear scratch page for security */
2301	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2302		if (vgpu->gtt.scratch_pt[i].page != NULL)
2303			memset(page_address(vgpu->gtt.scratch_pt[i].page),
2304				0, PAGE_SIZE);
2305	}
2306}