Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1/*
   2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM
   3 *
   4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice (including the next
  14 * paragraph) shall be included in all copies or substantial portions of the
  15 * Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 * SOFTWARE.
  24 *
  25 * Authors:
  26 *    Kevin Tian <kevin.tian@intel.com>
  27 *    Jike Song <jike.song@intel.com>
  28 *    Xiaoguang Chen <xiaoguang.chen@intel.com>
  29 *    Eddie Dong <eddie.dong@intel.com>
  30 *
  31 * Contributors:
  32 *    Niu Bing <bing.niu@intel.com>
  33 *    Zhi Wang <zhi.a.wang@intel.com>
  34 */
  35
  36#include <linux/init.h>
  37#include <linux/mm.h>
  38#include <linux/kthread.h>
  39#include <linux/sched/mm.h>
  40#include <linux/types.h>
  41#include <linux/list.h>
  42#include <linux/rbtree.h>
  43#include <linux/spinlock.h>
  44#include <linux/eventfd.h>
  45#include <linux/mdev.h>
  46#include <linux/debugfs.h>
  47
  48#include <linux/nospec.h>
  49
  50#include <drm/drm_edid.h>
  51
  52#include "i915_drv.h"
  53#include "intel_gvt.h"
  54#include "gvt.h"
  55
  56MODULE_IMPORT_NS(DMA_BUF);
  57MODULE_IMPORT_NS(I915_GVT);
  58
  59/* helper macros copied from vfio-pci */
  60#define VFIO_PCI_OFFSET_SHIFT   40
  61#define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
  62#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
  63#define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
  64
  65#define EDID_BLOB_OFFSET (PAGE_SIZE/2)
  66
  67#define OPREGION_SIGNATURE "IntelGraphicsMem"
  68
  69struct vfio_region;
  70struct intel_vgpu_regops {
  71	size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
  72			size_t count, loff_t *ppos, bool iswrite);
  73	void (*release)(struct intel_vgpu *vgpu,
  74			struct vfio_region *region);
  75};
  76
  77struct vfio_region {
  78	u32				type;
  79	u32				subtype;
  80	size_t				size;
  81	u32				flags;
  82	const struct intel_vgpu_regops	*ops;
  83	void				*data;
  84};
  85
  86struct vfio_edid_region {
  87	struct vfio_region_gfx_edid vfio_edid_regs;
  88	void *edid_blob;
  89};
  90
  91struct kvmgt_pgfn {
  92	gfn_t gfn;
  93	struct hlist_node hnode;
  94};
  95
  96struct gvt_dma {
  97	struct intel_vgpu *vgpu;
  98	struct rb_node gfn_node;
  99	struct rb_node dma_addr_node;
 100	gfn_t gfn;
 101	dma_addr_t dma_addr;
 102	unsigned long size;
 103	struct kref ref;
 104};
 105
 106#define vfio_dev_to_vgpu(vfio_dev) \
 107	container_of((vfio_dev), struct intel_vgpu, vfio_device)
 108
 109static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 110		const u8 *val, int len,
 111		struct kvm_page_track_notifier_node *node);
 112static void kvmgt_page_track_flush_slot(struct kvm *kvm,
 113		struct kvm_memory_slot *slot,
 114		struct kvm_page_track_notifier_node *node);
 115
 116static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
 117{
 118	struct intel_vgpu_type *type =
 119		container_of(mtype, struct intel_vgpu_type, type);
 120
 121	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
 122		       "fence: %d\nresolution: %s\n"
 123		       "weight: %d\n",
 124		       BYTES_TO_MB(type->conf->low_mm),
 125		       BYTES_TO_MB(type->conf->high_mm),
 126		       type->conf->fence, vgpu_edid_str(type->conf->edid),
 127		       type->conf->weight);
 128}
 129
 130static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
 131		unsigned long size)
 132{
 133	vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT,
 134			 DIV_ROUND_UP(size, PAGE_SIZE));
 135}
 136
 137/* Pin a normal or compound guest page for dma. */
 138static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
 139		unsigned long size, struct page **page)
 140{
 141	int total_pages = DIV_ROUND_UP(size, PAGE_SIZE);
 142	struct page *base_page = NULL;
 143	int npage;
 144	int ret;
 145
 146	/*
 147	 * We pin the pages one-by-one to avoid allocating a big arrary
 148	 * on stack to hold pfns.
 149	 */
 150	for (npage = 0; npage < total_pages; npage++) {
 151		dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT;
 152		struct page *cur_page;
 153
 154		ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1,
 155				     IOMMU_READ | IOMMU_WRITE, &cur_page);
 156		if (ret != 1) {
 157			gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n",
 158				     &cur_iova, ret);
 159			goto err;
 160		}
 161
 162		if (npage == 0)
 163			base_page = cur_page;
 164		else if (base_page + npage != cur_page) {
 165			gvt_vgpu_err("The pages are not continuous\n");
 166			ret = -EINVAL;
 167			npage++;
 168			goto err;
 169		}
 170	}
 171
 172	*page = base_page;
 173	return 0;
 174err:
 175	gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
 176	return ret;
 177}
 178
 179static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
 180		dma_addr_t *dma_addr, unsigned long size)
 181{
 182	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
 183	struct page *page = NULL;
 184	int ret;
 185
 186	ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
 187	if (ret)
 188		return ret;
 189
 190	/* Setup DMA mapping. */
 191	*dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL);
 192	if (dma_mapping_error(dev, *dma_addr)) {
 193		gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
 194			     page_to_pfn(page), ret);
 195		gvt_unpin_guest_page(vgpu, gfn, size);
 196		return -ENOMEM;
 197	}
 198
 199	return 0;
 200}
 201
 202static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
 203		dma_addr_t dma_addr, unsigned long size)
 204{
 205	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
 206
 207	dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL);
 208	gvt_unpin_guest_page(vgpu, gfn, size);
 209}
 210
 211static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
 212		dma_addr_t dma_addr)
 213{
 214	struct rb_node *node = vgpu->dma_addr_cache.rb_node;
 215	struct gvt_dma *itr;
 216
 217	while (node) {
 218		itr = rb_entry(node, struct gvt_dma, dma_addr_node);
 219
 220		if (dma_addr < itr->dma_addr)
 221			node = node->rb_left;
 222		else if (dma_addr > itr->dma_addr)
 223			node = node->rb_right;
 224		else
 225			return itr;
 226	}
 227	return NULL;
 228}
 229
 230static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
 231{
 232	struct rb_node *node = vgpu->gfn_cache.rb_node;
 233	struct gvt_dma *itr;
 234
 235	while (node) {
 236		itr = rb_entry(node, struct gvt_dma, gfn_node);
 237
 238		if (gfn < itr->gfn)
 239			node = node->rb_left;
 240		else if (gfn > itr->gfn)
 241			node = node->rb_right;
 242		else
 243			return itr;
 244	}
 245	return NULL;
 246}
 247
 248static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 249		dma_addr_t dma_addr, unsigned long size)
 250{
 251	struct gvt_dma *new, *itr;
 252	struct rb_node **link, *parent = NULL;
 253
 254	new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
 255	if (!new)
 256		return -ENOMEM;
 257
 258	new->vgpu = vgpu;
 259	new->gfn = gfn;
 260	new->dma_addr = dma_addr;
 261	new->size = size;
 262	kref_init(&new->ref);
 263
 264	/* gfn_cache maps gfn to struct gvt_dma. */
 265	link = &vgpu->gfn_cache.rb_node;
 266	while (*link) {
 267		parent = *link;
 268		itr = rb_entry(parent, struct gvt_dma, gfn_node);
 269
 270		if (gfn < itr->gfn)
 271			link = &parent->rb_left;
 272		else
 273			link = &parent->rb_right;
 274	}
 275	rb_link_node(&new->gfn_node, parent, link);
 276	rb_insert_color(&new->gfn_node, &vgpu->gfn_cache);
 277
 278	/* dma_addr_cache maps dma addr to struct gvt_dma. */
 279	parent = NULL;
 280	link = &vgpu->dma_addr_cache.rb_node;
 281	while (*link) {
 282		parent = *link;
 283		itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
 284
 285		if (dma_addr < itr->dma_addr)
 286			link = &parent->rb_left;
 287		else
 288			link = &parent->rb_right;
 289	}
 290	rb_link_node(&new->dma_addr_node, parent, link);
 291	rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache);
 292
 293	vgpu->nr_cache_entries++;
 294	return 0;
 295}
 296
 297static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
 298				struct gvt_dma *entry)
 299{
 300	rb_erase(&entry->gfn_node, &vgpu->gfn_cache);
 301	rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache);
 302	kfree(entry);
 303	vgpu->nr_cache_entries--;
 304}
 305
 306static void gvt_cache_destroy(struct intel_vgpu *vgpu)
 307{
 308	struct gvt_dma *dma;
 309	struct rb_node *node = NULL;
 310
 311	for (;;) {
 312		mutex_lock(&vgpu->cache_lock);
 313		node = rb_first(&vgpu->gfn_cache);
 314		if (!node) {
 315			mutex_unlock(&vgpu->cache_lock);
 316			break;
 317		}
 318		dma = rb_entry(node, struct gvt_dma, gfn_node);
 319		gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
 320		__gvt_cache_remove_entry(vgpu, dma);
 321		mutex_unlock(&vgpu->cache_lock);
 322	}
 323}
 324
 325static void gvt_cache_init(struct intel_vgpu *vgpu)
 326{
 327	vgpu->gfn_cache = RB_ROOT;
 328	vgpu->dma_addr_cache = RB_ROOT;
 329	vgpu->nr_cache_entries = 0;
 330	mutex_init(&vgpu->cache_lock);
 331}
 332
 333static void kvmgt_protect_table_init(struct intel_vgpu *info)
 334{
 335	hash_init(info->ptable);
 336}
 337
 338static void kvmgt_protect_table_destroy(struct intel_vgpu *info)
 339{
 340	struct kvmgt_pgfn *p;
 341	struct hlist_node *tmp;
 342	int i;
 343
 344	hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
 345		hash_del(&p->hnode);
 346		kfree(p);
 347	}
 348}
 349
 350static struct kvmgt_pgfn *
 351__kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
 352{
 353	struct kvmgt_pgfn *p, *res = NULL;
 354
 355	hash_for_each_possible(info->ptable, p, hnode, gfn) {
 356		if (gfn == p->gfn) {
 357			res = p;
 358			break;
 359		}
 360	}
 361
 362	return res;
 363}
 364
 365static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn)
 366{
 367	struct kvmgt_pgfn *p;
 368
 369	p = __kvmgt_protect_table_find(info, gfn);
 370	return !!p;
 371}
 372
 373static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn)
 374{
 375	struct kvmgt_pgfn *p;
 376
 377	if (kvmgt_gfn_is_write_protected(info, gfn))
 378		return;
 379
 380	p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
 381	if (WARN(!p, "gfn: 0x%llx\n", gfn))
 382		return;
 383
 384	p->gfn = gfn;
 385	hash_add(info->ptable, &p->hnode, gfn);
 386}
 387
 388static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn)
 389{
 390	struct kvmgt_pgfn *p;
 391
 392	p = __kvmgt_protect_table_find(info, gfn);
 393	if (p) {
 394		hash_del(&p->hnode);
 395		kfree(p);
 396	}
 397}
 398
 399static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
 400		size_t count, loff_t *ppos, bool iswrite)
 401{
 402	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
 403			VFIO_PCI_NUM_REGIONS;
 404	void *base = vgpu->region[i].data;
 405	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 406
 407
 408	if (pos >= vgpu->region[i].size || iswrite) {
 409		gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
 410		return -EINVAL;
 411	}
 412	count = min(count, (size_t)(vgpu->region[i].size - pos));
 413	memcpy(buf, base + pos, count);
 414
 415	return count;
 416}
 417
 418static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
 419		struct vfio_region *region)
 420{
 421}
 422
 423static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
 424	.rw = intel_vgpu_reg_rw_opregion,
 425	.release = intel_vgpu_reg_release_opregion,
 426};
 427
 428static int handle_edid_regs(struct intel_vgpu *vgpu,
 429			struct vfio_edid_region *region, char *buf,
 430			size_t count, u16 offset, bool is_write)
 431{
 432	struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
 433	unsigned int data;
 434
 435	if (offset + count > sizeof(*regs))
 436		return -EINVAL;
 437
 438	if (count != 4)
 439		return -EINVAL;
 440
 441	if (is_write) {
 442		data = *((unsigned int *)buf);
 443		switch (offset) {
 444		case offsetof(struct vfio_region_gfx_edid, link_state):
 445			if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
 446				if (!drm_edid_block_valid(
 447					(u8 *)region->edid_blob,
 448					0,
 449					true,
 450					NULL)) {
 451					gvt_vgpu_err("invalid EDID blob\n");
 452					return -EINVAL;
 453				}
 454				intel_vgpu_emulate_hotplug(vgpu, true);
 455			} else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
 456				intel_vgpu_emulate_hotplug(vgpu, false);
 457			else {
 458				gvt_vgpu_err("invalid EDID link state %d\n",
 459					regs->link_state);
 460				return -EINVAL;
 461			}
 462			regs->link_state = data;
 463			break;
 464		case offsetof(struct vfio_region_gfx_edid, edid_size):
 465			if (data > regs->edid_max_size) {
 466				gvt_vgpu_err("EDID size is bigger than %d!\n",
 467					regs->edid_max_size);
 468				return -EINVAL;
 469			}
 470			regs->edid_size = data;
 471			break;
 472		default:
 473			/* read-only regs */
 474			gvt_vgpu_err("write read-only EDID region at offset %d\n",
 475				offset);
 476			return -EPERM;
 477		}
 478	} else {
 479		memcpy(buf, (char *)regs + offset, count);
 480	}
 481
 482	return count;
 483}
 484
 485static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
 486			size_t count, u16 offset, bool is_write)
 487{
 488	if (offset + count > region->vfio_edid_regs.edid_size)
 489		return -EINVAL;
 490
 491	if (is_write)
 492		memcpy(region->edid_blob + offset, buf, count);
 493	else
 494		memcpy(buf, region->edid_blob + offset, count);
 495
 496	return count;
 497}
 498
 499static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
 500		size_t count, loff_t *ppos, bool iswrite)
 501{
 502	int ret;
 503	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
 504			VFIO_PCI_NUM_REGIONS;
 505	struct vfio_edid_region *region = vgpu->region[i].data;
 506	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 507
 508	if (pos < region->vfio_edid_regs.edid_offset) {
 509		ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
 510	} else {
 511		pos -= EDID_BLOB_OFFSET;
 512		ret = handle_edid_blob(region, buf, count, pos, iswrite);
 513	}
 514
 515	if (ret < 0)
 516		gvt_vgpu_err("failed to access EDID region\n");
 517
 518	return ret;
 519}
 520
 521static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
 522					struct vfio_region *region)
 523{
 524	kfree(region->data);
 525}
 526
 527static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
 528	.rw = intel_vgpu_reg_rw_edid,
 529	.release = intel_vgpu_reg_release_edid,
 530};
 531
 532static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
 533		unsigned int type, unsigned int subtype,
 534		const struct intel_vgpu_regops *ops,
 535		size_t size, u32 flags, void *data)
 536{
 537	struct vfio_region *region;
 538
 539	region = krealloc(vgpu->region,
 540			(vgpu->num_regions + 1) * sizeof(*region),
 541			GFP_KERNEL);
 542	if (!region)
 543		return -ENOMEM;
 544
 545	vgpu->region = region;
 546	vgpu->region[vgpu->num_regions].type = type;
 547	vgpu->region[vgpu->num_regions].subtype = subtype;
 548	vgpu->region[vgpu->num_regions].ops = ops;
 549	vgpu->region[vgpu->num_regions].size = size;
 550	vgpu->region[vgpu->num_regions].flags = flags;
 551	vgpu->region[vgpu->num_regions].data = data;
 552	vgpu->num_regions++;
 553	return 0;
 554}
 555
 556int intel_gvt_set_opregion(struct intel_vgpu *vgpu)
 557{
 558	void *base;
 559	int ret;
 560
 561	/* Each vgpu has its own opregion, although VFIO would create another
 562	 * one later. This one is used to expose opregion to VFIO. And the
 563	 * other one created by VFIO later, is used by guest actually.
 564	 */
 565	base = vgpu_opregion(vgpu)->va;
 566	if (!base)
 567		return -ENOMEM;
 568
 569	if (memcmp(base, OPREGION_SIGNATURE, 16)) {
 570		memunmap(base);
 571		return -EINVAL;
 572	}
 573
 574	ret = intel_vgpu_register_reg(vgpu,
 575			PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
 576			VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
 577			&intel_vgpu_regops_opregion, OPREGION_SIZE,
 578			VFIO_REGION_INFO_FLAG_READ, base);
 579
 580	return ret;
 581}
 582
 583int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
 584{
 585	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
 586	struct vfio_edid_region *base;
 587	int ret;
 588
 589	base = kzalloc(sizeof(*base), GFP_KERNEL);
 590	if (!base)
 591		return -ENOMEM;
 592
 593	/* TODO: Add multi-port and EDID extension block support */
 594	base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
 595	base->vfio_edid_regs.edid_max_size = EDID_SIZE;
 596	base->vfio_edid_regs.edid_size = EDID_SIZE;
 597	base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
 598	base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
 599	base->edid_blob = port->edid->edid_block;
 600
 601	ret = intel_vgpu_register_reg(vgpu,
 602			VFIO_REGION_TYPE_GFX,
 603			VFIO_REGION_SUBTYPE_GFX_EDID,
 604			&intel_vgpu_regops_edid, EDID_SIZE,
 605			VFIO_REGION_INFO_FLAG_READ |
 606			VFIO_REGION_INFO_FLAG_WRITE |
 607			VFIO_REGION_INFO_FLAG_CAPS, base);
 608
 609	return ret;
 610}
 611
 612static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
 613				 u64 length)
 614{
 615	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 616	struct gvt_dma *entry;
 617	u64 iov_pfn = iova >> PAGE_SHIFT;
 618	u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
 619
 620	mutex_lock(&vgpu->cache_lock);
 621	for (; iov_pfn < end_iov_pfn; iov_pfn++) {
 622		entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
 623		if (!entry)
 624			continue;
 625
 626		gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
 627				   entry->size);
 628		__gvt_cache_remove_entry(vgpu, entry);
 629	}
 630	mutex_unlock(&vgpu->cache_lock);
 631}
 632
 633static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
 634{
 635	struct intel_vgpu *itr;
 636	int id;
 637	bool ret = false;
 638
 639	mutex_lock(&vgpu->gvt->lock);
 640	for_each_active_vgpu(vgpu->gvt, itr, id) {
 641		if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status))
 642			continue;
 643
 644		if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
 645			ret = true;
 646			goto out;
 647		}
 648	}
 649out:
 650	mutex_unlock(&vgpu->gvt->lock);
 651	return ret;
 652}
 653
 654static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 655{
 656	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 657
 658	if (!vgpu->vfio_device.kvm ||
 659	    vgpu->vfio_device.kvm->mm != current->mm) {
 660		gvt_vgpu_err("KVM is required to use Intel vGPU\n");
 661		return -ESRCH;
 662	}
 663
 664	if (__kvmgt_vgpu_exist(vgpu))
 665		return -EEXIST;
 666
 667	vgpu->track_node.track_write = kvmgt_page_track_write;
 668	vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
 669	kvm_get_kvm(vgpu->vfio_device.kvm);
 670	kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
 671					 &vgpu->track_node);
 672
 673	set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
 674
 675	debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
 676			     &vgpu->nr_cache_entries);
 677
 678	intel_gvt_activate_vgpu(vgpu);
 679
 680	return 0;
 681}
 682
 683static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
 684{
 685	struct eventfd_ctx *trigger;
 686
 687	trigger = vgpu->msi_trigger;
 688	if (trigger) {
 689		eventfd_ctx_put(trigger);
 690		vgpu->msi_trigger = NULL;
 691	}
 692}
 693
 694static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 695{
 696	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 697
 698	intel_gvt_release_vgpu(vgpu);
 699
 700	clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
 701
 702	debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
 703
 704	kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
 705					   &vgpu->track_node);
 706	kvm_put_kvm(vgpu->vfio_device.kvm);
 707
 708	kvmgt_protect_table_destroy(vgpu);
 709	gvt_cache_destroy(vgpu);
 710
 711	WARN_ON(vgpu->nr_cache_entries);
 712
 713	vgpu->gfn_cache = RB_ROOT;
 714	vgpu->dma_addr_cache = RB_ROOT;
 715
 716	intel_vgpu_release_msi_eventfd_ctx(vgpu);
 717}
 718
 719static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
 720{
 721	u32 start_lo, start_hi;
 722	u32 mem_type;
 723
 724	start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
 725			PCI_BASE_ADDRESS_MEM_MASK;
 726	mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
 727			PCI_BASE_ADDRESS_MEM_TYPE_MASK;
 728
 729	switch (mem_type) {
 730	case PCI_BASE_ADDRESS_MEM_TYPE_64:
 731		start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
 732						+ bar + 4));
 733		break;
 734	case PCI_BASE_ADDRESS_MEM_TYPE_32:
 735	case PCI_BASE_ADDRESS_MEM_TYPE_1M:
 736		/* 1M mem BAR treated as 32-bit BAR */
 737	default:
 738		/* mem unknown type treated as 32-bit BAR */
 739		start_hi = 0;
 740		break;
 741	}
 742
 743	return ((u64)start_hi << 32) | start_lo;
 744}
 745
 746static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
 747			     void *buf, unsigned int count, bool is_write)
 748{
 749	u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
 750	int ret;
 751
 752	if (is_write)
 753		ret = intel_vgpu_emulate_mmio_write(vgpu,
 754					bar_start + off, buf, count);
 755	else
 756		ret = intel_vgpu_emulate_mmio_read(vgpu,
 757					bar_start + off, buf, count);
 758	return ret;
 759}
 760
 761static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
 762{
 763	return off >= vgpu_aperture_offset(vgpu) &&
 764	       off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
 765}
 766
 767static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
 768		void *buf, unsigned long count, bool is_write)
 769{
 770	void __iomem *aperture_va;
 771
 772	if (!intel_vgpu_in_aperture(vgpu, off) ||
 773	    !intel_vgpu_in_aperture(vgpu, off + count)) {
 774		gvt_vgpu_err("Invalid aperture offset %llu\n", off);
 775		return -EINVAL;
 776	}
 777
 778	aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
 779					ALIGN_DOWN(off, PAGE_SIZE),
 780					count + offset_in_page(off));
 781	if (!aperture_va)
 782		return -EIO;
 783
 784	if (is_write)
 785		memcpy_toio(aperture_va + offset_in_page(off), buf, count);
 786	else
 787		memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
 788
 789	io_mapping_unmap(aperture_va);
 790
 791	return 0;
 792}
 793
 794static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf,
 795			size_t count, loff_t *ppos, bool is_write)
 796{
 797	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 798	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
 799	int ret = -EINVAL;
 800
 801
 802	if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) {
 803		gvt_vgpu_err("invalid index: %u\n", index);
 804		return -EINVAL;
 805	}
 806
 807	switch (index) {
 808	case VFIO_PCI_CONFIG_REGION_INDEX:
 809		if (is_write)
 810			ret = intel_vgpu_emulate_cfg_write(vgpu, pos,
 811						buf, count);
 812		else
 813			ret = intel_vgpu_emulate_cfg_read(vgpu, pos,
 814						buf, count);
 815		break;
 816	case VFIO_PCI_BAR0_REGION_INDEX:
 817		ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
 818					buf, count, is_write);
 819		break;
 820	case VFIO_PCI_BAR2_REGION_INDEX:
 821		ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
 822		break;
 823	case VFIO_PCI_BAR1_REGION_INDEX:
 824	case VFIO_PCI_BAR3_REGION_INDEX:
 825	case VFIO_PCI_BAR4_REGION_INDEX:
 826	case VFIO_PCI_BAR5_REGION_INDEX:
 827	case VFIO_PCI_VGA_REGION_INDEX:
 828	case VFIO_PCI_ROM_REGION_INDEX:
 829		break;
 830	default:
 831		if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions)
 832			return -EINVAL;
 833
 834		index -= VFIO_PCI_NUM_REGIONS;
 835		return vgpu->region[index].ops->rw(vgpu, buf, count,
 836				ppos, is_write);
 837	}
 838
 839	return ret == 0 ? count : ret;
 840}
 841
 842static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos)
 843{
 844	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 845	struct intel_gvt *gvt = vgpu->gvt;
 846	int offset;
 847
 848	/* Only allow MMIO GGTT entry access */
 849	if (index != PCI_BASE_ADDRESS_0)
 850		return false;
 851
 852	offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
 853		intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
 854
 855	return (offset >= gvt->device_info.gtt_start_offset &&
 856		offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
 857			true : false;
 858}
 859
 860static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf,
 861			size_t count, loff_t *ppos)
 862{
 863	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 864	unsigned int done = 0;
 865	int ret;
 866
 867	while (count) {
 868		size_t filled;
 869
 870		/* Only support GGTT entry 8 bytes read */
 871		if (count >= 8 && !(*ppos % 8) &&
 872			gtt_entry(vgpu, ppos)) {
 873			u64 val;
 874
 875			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
 876					ppos, false);
 877			if (ret <= 0)
 878				goto read_err;
 879
 880			if (copy_to_user(buf, &val, sizeof(val)))
 881				goto read_err;
 882
 883			filled = 8;
 884		} else if (count >= 4 && !(*ppos % 4)) {
 885			u32 val;
 886
 887			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
 888					ppos, false);
 889			if (ret <= 0)
 890				goto read_err;
 891
 892			if (copy_to_user(buf, &val, sizeof(val)))
 893				goto read_err;
 894
 895			filled = 4;
 896		} else if (count >= 2 && !(*ppos % 2)) {
 897			u16 val;
 898
 899			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
 900					ppos, false);
 901			if (ret <= 0)
 902				goto read_err;
 903
 904			if (copy_to_user(buf, &val, sizeof(val)))
 905				goto read_err;
 906
 907			filled = 2;
 908		} else {
 909			u8 val;
 910
 911			ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos,
 912					false);
 913			if (ret <= 0)
 914				goto read_err;
 915
 916			if (copy_to_user(buf, &val, sizeof(val)))
 917				goto read_err;
 918
 919			filled = 1;
 920		}
 921
 922		count -= filled;
 923		done += filled;
 924		*ppos += filled;
 925		buf += filled;
 926	}
 927
 928	return done;
 929
 930read_err:
 931	return -EFAULT;
 932}
 933
 934static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev,
 935				const char __user *buf,
 936				size_t count, loff_t *ppos)
 937{
 938	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 939	unsigned int done = 0;
 940	int ret;
 941
 942	while (count) {
 943		size_t filled;
 944
 945		/* Only support GGTT entry 8 bytes write */
 946		if (count >= 8 && !(*ppos % 8) &&
 947			gtt_entry(vgpu, ppos)) {
 948			u64 val;
 949
 950			if (copy_from_user(&val, buf, sizeof(val)))
 951				goto write_err;
 952
 953			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
 954					ppos, true);
 955			if (ret <= 0)
 956				goto write_err;
 957
 958			filled = 8;
 959		} else if (count >= 4 && !(*ppos % 4)) {
 960			u32 val;
 961
 962			if (copy_from_user(&val, buf, sizeof(val)))
 963				goto write_err;
 964
 965			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
 966					ppos, true);
 967			if (ret <= 0)
 968				goto write_err;
 969
 970			filled = 4;
 971		} else if (count >= 2 && !(*ppos % 2)) {
 972			u16 val;
 973
 974			if (copy_from_user(&val, buf, sizeof(val)))
 975				goto write_err;
 976
 977			ret = intel_vgpu_rw(vgpu, (char *)&val,
 978					sizeof(val), ppos, true);
 979			if (ret <= 0)
 980				goto write_err;
 981
 982			filled = 2;
 983		} else {
 984			u8 val;
 985
 986			if (copy_from_user(&val, buf, sizeof(val)))
 987				goto write_err;
 988
 989			ret = intel_vgpu_rw(vgpu, &val, sizeof(val),
 990					ppos, true);
 991			if (ret <= 0)
 992				goto write_err;
 993
 994			filled = 1;
 995		}
 996
 997		count -= filled;
 998		done += filled;
 999		*ppos += filled;
1000		buf += filled;
1001	}
1002
1003	return done;
1004write_err:
1005	return -EFAULT;
1006}
1007
1008static int intel_vgpu_mmap(struct vfio_device *vfio_dev,
1009		struct vm_area_struct *vma)
1010{
1011	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1012	unsigned int index;
1013	u64 virtaddr;
1014	unsigned long req_size, pgoff, req_start;
1015	pgprot_t pg_prot;
1016
1017	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1018	if (index >= VFIO_PCI_ROM_REGION_INDEX)
1019		return -EINVAL;
1020
1021	if (vma->vm_end < vma->vm_start)
1022		return -EINVAL;
1023	if ((vma->vm_flags & VM_SHARED) == 0)
1024		return -EINVAL;
1025	if (index != VFIO_PCI_BAR2_REGION_INDEX)
1026		return -EINVAL;
1027
1028	pg_prot = vma->vm_page_prot;
1029	virtaddr = vma->vm_start;
1030	req_size = vma->vm_end - vma->vm_start;
1031	pgoff = vma->vm_pgoff &
1032		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1033	req_start = pgoff << PAGE_SHIFT;
1034
1035	if (!intel_vgpu_in_aperture(vgpu, req_start))
1036		return -EINVAL;
1037	if (req_start + req_size >
1038	    vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1039		return -EINVAL;
1040
1041	pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1042
1043	return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1044}
1045
1046static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1047{
1048	if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1049		return 1;
1050
1051	return 0;
1052}
1053
1054static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1055			unsigned int index, unsigned int start,
1056			unsigned int count, u32 flags,
1057			void *data)
1058{
1059	return 0;
1060}
1061
1062static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1063			unsigned int index, unsigned int start,
1064			unsigned int count, u32 flags, void *data)
1065{
1066	return 0;
1067}
1068
1069static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1070		unsigned int index, unsigned int start, unsigned int count,
1071		u32 flags, void *data)
1072{
1073	return 0;
1074}
1075
1076static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1077		unsigned int index, unsigned int start, unsigned int count,
1078		u32 flags, void *data)
1079{
1080	struct eventfd_ctx *trigger;
1081
1082	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1083		int fd = *(int *)data;
1084
1085		trigger = eventfd_ctx_fdget(fd);
1086		if (IS_ERR(trigger)) {
1087			gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1088			return PTR_ERR(trigger);
1089		}
1090		vgpu->msi_trigger = trigger;
1091	} else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1092		intel_vgpu_release_msi_eventfd_ctx(vgpu);
1093
1094	return 0;
1095}
1096
1097static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1098		unsigned int index, unsigned int start, unsigned int count,
1099		void *data)
1100{
1101	int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1102			unsigned int start, unsigned int count, u32 flags,
1103			void *data) = NULL;
1104
1105	switch (index) {
1106	case VFIO_PCI_INTX_IRQ_INDEX:
1107		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1108		case VFIO_IRQ_SET_ACTION_MASK:
1109			func = intel_vgpu_set_intx_mask;
1110			break;
1111		case VFIO_IRQ_SET_ACTION_UNMASK:
1112			func = intel_vgpu_set_intx_unmask;
1113			break;
1114		case VFIO_IRQ_SET_ACTION_TRIGGER:
1115			func = intel_vgpu_set_intx_trigger;
1116			break;
1117		}
1118		break;
1119	case VFIO_PCI_MSI_IRQ_INDEX:
1120		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1121		case VFIO_IRQ_SET_ACTION_MASK:
1122		case VFIO_IRQ_SET_ACTION_UNMASK:
1123			/* XXX Need masking support exported */
1124			break;
1125		case VFIO_IRQ_SET_ACTION_TRIGGER:
1126			func = intel_vgpu_set_msi_trigger;
1127			break;
1128		}
1129		break;
1130	}
1131
1132	if (!func)
1133		return -ENOTTY;
1134
1135	return func(vgpu, index, start, count, flags, data);
1136}
1137
1138static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
1139			     unsigned long arg)
1140{
1141	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1142	unsigned long minsz;
1143
1144	gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1145
1146	if (cmd == VFIO_DEVICE_GET_INFO) {
1147		struct vfio_device_info info;
1148
1149		minsz = offsetofend(struct vfio_device_info, num_irqs);
1150
1151		if (copy_from_user(&info, (void __user *)arg, minsz))
1152			return -EFAULT;
1153
1154		if (info.argsz < minsz)
1155			return -EINVAL;
1156
1157		info.flags = VFIO_DEVICE_FLAGS_PCI;
1158		info.flags |= VFIO_DEVICE_FLAGS_RESET;
1159		info.num_regions = VFIO_PCI_NUM_REGIONS +
1160				vgpu->num_regions;
1161		info.num_irqs = VFIO_PCI_NUM_IRQS;
1162
1163		return copy_to_user((void __user *)arg, &info, minsz) ?
1164			-EFAULT : 0;
1165
1166	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1167		struct vfio_region_info info;
1168		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1169		unsigned int i;
1170		int ret;
1171		struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1172		int nr_areas = 1;
1173		int cap_type_id;
1174
1175		minsz = offsetofend(struct vfio_region_info, offset);
1176
1177		if (copy_from_user(&info, (void __user *)arg, minsz))
1178			return -EFAULT;
1179
1180		if (info.argsz < minsz)
1181			return -EINVAL;
1182
1183		switch (info.index) {
1184		case VFIO_PCI_CONFIG_REGION_INDEX:
1185			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1186			info.size = vgpu->gvt->device_info.cfg_space_size;
1187			info.flags = VFIO_REGION_INFO_FLAG_READ |
1188				     VFIO_REGION_INFO_FLAG_WRITE;
1189			break;
1190		case VFIO_PCI_BAR0_REGION_INDEX:
1191			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1192			info.size = vgpu->cfg_space.bar[info.index].size;
1193			if (!info.size) {
1194				info.flags = 0;
1195				break;
1196			}
1197
1198			info.flags = VFIO_REGION_INFO_FLAG_READ |
1199				     VFIO_REGION_INFO_FLAG_WRITE;
1200			break;
1201		case VFIO_PCI_BAR1_REGION_INDEX:
1202			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1203			info.size = 0;
1204			info.flags = 0;
1205			break;
1206		case VFIO_PCI_BAR2_REGION_INDEX:
1207			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1208			info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1209					VFIO_REGION_INFO_FLAG_MMAP |
1210					VFIO_REGION_INFO_FLAG_READ |
1211					VFIO_REGION_INFO_FLAG_WRITE;
1212			info.size = gvt_aperture_sz(vgpu->gvt);
1213
1214			sparse = kzalloc(struct_size(sparse, areas, nr_areas),
1215					 GFP_KERNEL);
1216			if (!sparse)
1217				return -ENOMEM;
1218
1219			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1220			sparse->header.version = 1;
1221			sparse->nr_areas = nr_areas;
1222			cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1223			sparse->areas[0].offset =
1224					PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1225			sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1226			break;
1227
1228		case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1229			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1230			info.size = 0;
1231			info.flags = 0;
1232
1233			gvt_dbg_core("get region info bar:%d\n", info.index);
1234			break;
1235
1236		case VFIO_PCI_ROM_REGION_INDEX:
1237		case VFIO_PCI_VGA_REGION_INDEX:
1238			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1239			info.size = 0;
1240			info.flags = 0;
1241
1242			gvt_dbg_core("get region info index:%d\n", info.index);
1243			break;
1244		default:
1245			{
1246				struct vfio_region_info_cap_type cap_type = {
1247					.header.id = VFIO_REGION_INFO_CAP_TYPE,
1248					.header.version = 1 };
1249
1250				if (info.index >= VFIO_PCI_NUM_REGIONS +
1251						vgpu->num_regions)
1252					return -EINVAL;
1253				info.index =
1254					array_index_nospec(info.index,
1255							VFIO_PCI_NUM_REGIONS +
1256							vgpu->num_regions);
1257
1258				i = info.index - VFIO_PCI_NUM_REGIONS;
1259
1260				info.offset =
1261					VFIO_PCI_INDEX_TO_OFFSET(info.index);
1262				info.size = vgpu->region[i].size;
1263				info.flags = vgpu->region[i].flags;
1264
1265				cap_type.type = vgpu->region[i].type;
1266				cap_type.subtype = vgpu->region[i].subtype;
1267
1268				ret = vfio_info_add_capability(&caps,
1269							&cap_type.header,
1270							sizeof(cap_type));
1271				if (ret)
1272					return ret;
1273			}
1274		}
1275
1276		if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1277			switch (cap_type_id) {
1278			case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1279				ret = vfio_info_add_capability(&caps,
1280					&sparse->header,
1281					struct_size(sparse, areas,
1282						    sparse->nr_areas));
1283				if (ret) {
1284					kfree(sparse);
1285					return ret;
1286				}
1287				break;
1288			default:
1289				kfree(sparse);
1290				return -EINVAL;
1291			}
1292		}
1293
1294		if (caps.size) {
1295			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1296			if (info.argsz < sizeof(info) + caps.size) {
1297				info.argsz = sizeof(info) + caps.size;
1298				info.cap_offset = 0;
1299			} else {
1300				vfio_info_cap_shift(&caps, sizeof(info));
1301				if (copy_to_user((void __user *)arg +
1302						  sizeof(info), caps.buf,
1303						  caps.size)) {
1304					kfree(caps.buf);
1305					kfree(sparse);
1306					return -EFAULT;
1307				}
1308				info.cap_offset = sizeof(info);
1309			}
1310
1311			kfree(caps.buf);
1312		}
1313
1314		kfree(sparse);
1315		return copy_to_user((void __user *)arg, &info, minsz) ?
1316			-EFAULT : 0;
1317	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1318		struct vfio_irq_info info;
1319
1320		minsz = offsetofend(struct vfio_irq_info, count);
1321
1322		if (copy_from_user(&info, (void __user *)arg, minsz))
1323			return -EFAULT;
1324
1325		if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1326			return -EINVAL;
1327
1328		switch (info.index) {
1329		case VFIO_PCI_INTX_IRQ_INDEX:
1330		case VFIO_PCI_MSI_IRQ_INDEX:
1331			break;
1332		default:
1333			return -EINVAL;
1334		}
1335
1336		info.flags = VFIO_IRQ_INFO_EVENTFD;
1337
1338		info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1339
1340		if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1341			info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1342				       VFIO_IRQ_INFO_AUTOMASKED);
1343		else
1344			info.flags |= VFIO_IRQ_INFO_NORESIZE;
1345
1346		return copy_to_user((void __user *)arg, &info, minsz) ?
1347			-EFAULT : 0;
1348	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
1349		struct vfio_irq_set hdr;
1350		u8 *data = NULL;
1351		int ret = 0;
1352		size_t data_size = 0;
1353
1354		minsz = offsetofend(struct vfio_irq_set, count);
1355
1356		if (copy_from_user(&hdr, (void __user *)arg, minsz))
1357			return -EFAULT;
1358
1359		if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1360			int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1361
1362			ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1363						VFIO_PCI_NUM_IRQS, &data_size);
1364			if (ret) {
1365				gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1366				return -EINVAL;
1367			}
1368			if (data_size) {
1369				data = memdup_user((void __user *)(arg + minsz),
1370						   data_size);
1371				if (IS_ERR(data))
1372					return PTR_ERR(data);
1373			}
1374		}
1375
1376		ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1377					hdr.start, hdr.count, data);
1378		kfree(data);
1379
1380		return ret;
1381	} else if (cmd == VFIO_DEVICE_RESET) {
1382		intel_gvt_reset_vgpu(vgpu);
1383		return 0;
1384	} else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1385		struct vfio_device_gfx_plane_info dmabuf;
1386		int ret = 0;
1387
1388		minsz = offsetofend(struct vfio_device_gfx_plane_info,
1389				    dmabuf_id);
1390		if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1391			return -EFAULT;
1392		if (dmabuf.argsz < minsz)
1393			return -EINVAL;
1394
1395		ret = intel_vgpu_query_plane(vgpu, &dmabuf);
1396		if (ret != 0)
1397			return ret;
1398
1399		return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1400								-EFAULT : 0;
1401	} else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1402		__u32 dmabuf_id;
1403
1404		if (get_user(dmabuf_id, (__u32 __user *)arg))
1405			return -EFAULT;
1406		return intel_vgpu_get_dmabuf(vgpu, dmabuf_id);
1407	}
1408
1409	return -ENOTTY;
1410}
1411
1412static ssize_t
1413vgpu_id_show(struct device *dev, struct device_attribute *attr,
1414	     char *buf)
1415{
1416	struct intel_vgpu *vgpu = dev_get_drvdata(dev);
1417
1418	return sprintf(buf, "%d\n", vgpu->id);
1419}
1420
1421static DEVICE_ATTR_RO(vgpu_id);
1422
1423static struct attribute *intel_vgpu_attrs[] = {
1424	&dev_attr_vgpu_id.attr,
1425	NULL
1426};
1427
1428static const struct attribute_group intel_vgpu_group = {
1429	.name = "intel_vgpu",
1430	.attrs = intel_vgpu_attrs,
1431};
1432
1433static const struct attribute_group *intel_vgpu_groups[] = {
1434	&intel_vgpu_group,
1435	NULL,
1436};
1437
1438static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
1439{
1440	struct mdev_device *mdev = to_mdev_device(vfio_dev->dev);
1441	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1442	struct intel_vgpu_type *type =
1443		container_of(mdev->type, struct intel_vgpu_type, type);
1444	int ret;
1445
1446	vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
1447	ret = intel_gvt_create_vgpu(vgpu, type->conf);
1448	if (ret)
1449		return ret;
1450
1451	kvmgt_protect_table_init(vgpu);
1452	gvt_cache_init(vgpu);
1453
1454	return 0;
1455}
1456
1457static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
1458{
1459	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1460
1461	intel_gvt_destroy_vgpu(vgpu);
1462}
1463
1464static const struct vfio_device_ops intel_vgpu_dev_ops = {
1465	.init		= intel_vgpu_init_dev,
1466	.release	= intel_vgpu_release_dev,
1467	.open_device	= intel_vgpu_open_device,
1468	.close_device	= intel_vgpu_close_device,
1469	.read		= intel_vgpu_read,
1470	.write		= intel_vgpu_write,
1471	.mmap		= intel_vgpu_mmap,
1472	.ioctl		= intel_vgpu_ioctl,
1473	.dma_unmap	= intel_vgpu_dma_unmap,
1474	.bind_iommufd	= vfio_iommufd_emulated_bind,
1475	.unbind_iommufd = vfio_iommufd_emulated_unbind,
1476	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
1477};
1478
1479static int intel_vgpu_probe(struct mdev_device *mdev)
1480{
1481	struct intel_vgpu *vgpu;
1482	int ret;
1483
1484	vgpu = vfio_alloc_device(intel_vgpu, vfio_device, &mdev->dev,
1485				 &intel_vgpu_dev_ops);
1486	if (IS_ERR(vgpu)) {
1487		gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu));
1488		return PTR_ERR(vgpu);
1489	}
1490
1491	dev_set_drvdata(&mdev->dev, vgpu);
1492	ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device);
1493	if (ret)
1494		goto out_put_vdev;
1495
1496	gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
1497		     dev_name(mdev_dev(mdev)));
1498	return 0;
1499
1500out_put_vdev:
1501	vfio_put_device(&vgpu->vfio_device);
1502	return ret;
1503}
1504
1505static void intel_vgpu_remove(struct mdev_device *mdev)
1506{
1507	struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev);
1508
1509	vfio_unregister_group_dev(&vgpu->vfio_device);
1510	vfio_put_device(&vgpu->vfio_device);
1511}
1512
1513static unsigned int intel_vgpu_get_available(struct mdev_type *mtype)
1514{
1515	struct intel_vgpu_type *type =
1516		container_of(mtype, struct intel_vgpu_type, type);
1517	struct intel_gvt *gvt = kdev_to_i915(mtype->parent->dev)->gvt;
1518	unsigned int low_gm_avail, high_gm_avail, fence_avail;
1519
1520	mutex_lock(&gvt->lock);
1521	low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
1522		gvt->gm.vgpu_allocated_low_gm_size;
1523	high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
1524		gvt->gm.vgpu_allocated_high_gm_size;
1525	fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
1526		gvt->fence.vgpu_allocated_fence_num;
1527	mutex_unlock(&gvt->lock);
1528
1529	return min3(low_gm_avail / type->conf->low_mm,
1530		    high_gm_avail / type->conf->high_mm,
1531		    fence_avail / type->conf->fence);
1532}
1533
1534static struct mdev_driver intel_vgpu_mdev_driver = {
1535	.device_api	= VFIO_DEVICE_API_PCI_STRING,
1536	.driver = {
1537		.name		= "intel_vgpu_mdev",
1538		.owner		= THIS_MODULE,
1539		.dev_groups	= intel_vgpu_groups,
1540	},
1541	.probe			= intel_vgpu_probe,
1542	.remove			= intel_vgpu_remove,
1543	.get_available		= intel_vgpu_get_available,
1544	.show_description	= intel_vgpu_show_description,
1545};
1546
1547int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
1548{
1549	struct kvm *kvm = info->vfio_device.kvm;
1550	struct kvm_memory_slot *slot;
1551	int idx;
1552
1553	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
1554		return -ESRCH;
1555
1556	idx = srcu_read_lock(&kvm->srcu);
1557	slot = gfn_to_memslot(kvm, gfn);
1558	if (!slot) {
1559		srcu_read_unlock(&kvm->srcu, idx);
1560		return -EINVAL;
1561	}
1562
1563	write_lock(&kvm->mmu_lock);
1564
1565	if (kvmgt_gfn_is_write_protected(info, gfn))
1566		goto out;
1567
1568	kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1569	kvmgt_protect_table_add(info, gfn);
1570
1571out:
1572	write_unlock(&kvm->mmu_lock);
1573	srcu_read_unlock(&kvm->srcu, idx);
1574	return 0;
1575}
1576
1577int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
1578{
1579	struct kvm *kvm = info->vfio_device.kvm;
1580	struct kvm_memory_slot *slot;
1581	int idx;
1582
1583	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
1584		return -ESRCH;
1585
1586	idx = srcu_read_lock(&kvm->srcu);
1587	slot = gfn_to_memslot(kvm, gfn);
1588	if (!slot) {
1589		srcu_read_unlock(&kvm->srcu, idx);
1590		return -EINVAL;
1591	}
1592
1593	write_lock(&kvm->mmu_lock);
1594
1595	if (!kvmgt_gfn_is_write_protected(info, gfn))
1596		goto out;
1597
1598	kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1599	kvmgt_protect_table_del(info, gfn);
1600
1601out:
1602	write_unlock(&kvm->mmu_lock);
1603	srcu_read_unlock(&kvm->srcu, idx);
1604	return 0;
1605}
1606
1607static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1608		const u8 *val, int len,
1609		struct kvm_page_track_notifier_node *node)
1610{
1611	struct intel_vgpu *info =
1612		container_of(node, struct intel_vgpu, track_node);
1613
1614	if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1615		intel_vgpu_page_track_handler(info, gpa,
1616						     (void *)val, len);
1617}
1618
1619static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1620		struct kvm_memory_slot *slot,
1621		struct kvm_page_track_notifier_node *node)
1622{
1623	int i;
1624	gfn_t gfn;
1625	struct intel_vgpu *info =
1626		container_of(node, struct intel_vgpu, track_node);
1627
1628	write_lock(&kvm->mmu_lock);
1629	for (i = 0; i < slot->npages; i++) {
1630		gfn = slot->base_gfn + i;
1631		if (kvmgt_gfn_is_write_protected(info, gfn)) {
1632			kvm_slot_page_track_remove_page(kvm, slot, gfn,
1633						KVM_PAGE_TRACK_WRITE);
1634			kvmgt_protect_table_del(info, gfn);
1635		}
1636	}
1637	write_unlock(&kvm->mmu_lock);
1638}
1639
1640void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
1641{
1642	int i;
1643
1644	if (!vgpu->region)
1645		return;
1646
1647	for (i = 0; i < vgpu->num_regions; i++)
1648		if (vgpu->region[i].ops->release)
1649			vgpu->region[i].ops->release(vgpu,
1650					&vgpu->region[i]);
1651	vgpu->num_regions = 0;
1652	kfree(vgpu->region);
1653	vgpu->region = NULL;
1654}
1655
1656int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
1657		unsigned long size, dma_addr_t *dma_addr)
1658{
1659	struct gvt_dma *entry;
1660	int ret;
1661
1662	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1663		return -EINVAL;
1664
1665	mutex_lock(&vgpu->cache_lock);
1666
1667	entry = __gvt_cache_find_gfn(vgpu, gfn);
1668	if (!entry) {
1669		ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1670		if (ret)
1671			goto err_unlock;
1672
1673		ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1674		if (ret)
1675			goto err_unmap;
1676	} else if (entry->size != size) {
1677		/* the same gfn with different size: unmap and re-map */
1678		gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
1679		__gvt_cache_remove_entry(vgpu, entry);
1680
1681		ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1682		if (ret)
1683			goto err_unlock;
1684
1685		ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1686		if (ret)
1687			goto err_unmap;
1688	} else {
1689		kref_get(&entry->ref);
1690		*dma_addr = entry->dma_addr;
1691	}
1692
1693	mutex_unlock(&vgpu->cache_lock);
1694	return 0;
1695
1696err_unmap:
1697	gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
1698err_unlock:
1699	mutex_unlock(&vgpu->cache_lock);
1700	return ret;
1701}
1702
1703int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr)
1704{
1705	struct gvt_dma *entry;
1706	int ret = 0;
1707
1708	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1709		return -EINVAL;
1710
1711	mutex_lock(&vgpu->cache_lock);
1712	entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1713	if (entry)
1714		kref_get(&entry->ref);
1715	else
1716		ret = -ENOMEM;
1717	mutex_unlock(&vgpu->cache_lock);
1718
1719	return ret;
1720}
1721
1722static void __gvt_dma_release(struct kref *ref)
1723{
1724	struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
1725
1726	gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
1727			   entry->size);
1728	__gvt_cache_remove_entry(entry->vgpu, entry);
1729}
1730
1731void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu,
1732		dma_addr_t dma_addr)
1733{
1734	struct gvt_dma *entry;
1735
1736	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1737		return;
1738
1739	mutex_lock(&vgpu->cache_lock);
1740	entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1741	if (entry)
1742		kref_put(&entry->ref, __gvt_dma_release);
1743	mutex_unlock(&vgpu->cache_lock);
1744}
1745
1746static void init_device_info(struct intel_gvt *gvt)
1747{
1748	struct intel_gvt_device_info *info = &gvt->device_info;
1749	struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
1750
1751	info->max_support_vgpus = 8;
1752	info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE;
1753	info->mmio_size = 2 * 1024 * 1024;
1754	info->mmio_bar = 0;
1755	info->gtt_start_offset = 8 * 1024 * 1024;
1756	info->gtt_entry_size = 8;
1757	info->gtt_entry_size_shift = 3;
1758	info->gmadr_bytes_in_cmd = 8;
1759	info->max_surface_size = 36 * 1024 * 1024;
1760	info->msi_cap_offset = pdev->msi_cap;
1761}
1762
1763static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt)
1764{
1765	struct intel_vgpu *vgpu;
1766	int id;
1767
1768	mutex_lock(&gvt->lock);
1769	idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) {
1770		if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id,
1771				       (void *)&gvt->service_request)) {
1772			if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
1773				intel_vgpu_emulate_vblank(vgpu);
1774		}
1775	}
1776	mutex_unlock(&gvt->lock);
1777}
1778
1779static int gvt_service_thread(void *data)
1780{
1781	struct intel_gvt *gvt = (struct intel_gvt *)data;
1782	int ret;
1783
1784	gvt_dbg_core("service thread start\n");
1785
1786	while (!kthread_should_stop()) {
1787		ret = wait_event_interruptible(gvt->service_thread_wq,
1788				kthread_should_stop() || gvt->service_request);
1789
1790		if (kthread_should_stop())
1791			break;
1792
1793		if (WARN_ONCE(ret, "service thread is waken up by signal.\n"))
1794			continue;
1795
1796		intel_gvt_test_and_emulate_vblank(gvt);
1797
1798		if (test_bit(INTEL_GVT_REQUEST_SCHED,
1799				(void *)&gvt->service_request) ||
1800			test_bit(INTEL_GVT_REQUEST_EVENT_SCHED,
1801					(void *)&gvt->service_request)) {
1802			intel_gvt_schedule(gvt);
1803		}
1804	}
1805
1806	return 0;
1807}
1808
1809static void clean_service_thread(struct intel_gvt *gvt)
1810{
1811	kthread_stop(gvt->service_thread);
1812}
1813
1814static int init_service_thread(struct intel_gvt *gvt)
1815{
1816	init_waitqueue_head(&gvt->service_thread_wq);
1817
1818	gvt->service_thread = kthread_run(gvt_service_thread,
1819			gvt, "gvt_service_thread");
1820	if (IS_ERR(gvt->service_thread)) {
1821		gvt_err("fail to start service thread.\n");
1822		return PTR_ERR(gvt->service_thread);
1823	}
1824	return 0;
1825}
1826
1827/**
1828 * intel_gvt_clean_device - clean a GVT device
1829 * @i915: i915 private
1830 *
1831 * This function is called at the driver unloading stage, to free the
1832 * resources owned by a GVT device.
1833 *
1834 */
1835static void intel_gvt_clean_device(struct drm_i915_private *i915)
1836{
1837	struct intel_gvt *gvt = fetch_and_zero(&i915->gvt);
1838
1839	if (drm_WARN_ON(&i915->drm, !gvt))
1840		return;
1841
1842	mdev_unregister_parent(&gvt->parent);
1843	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1844	intel_gvt_clean_vgpu_types(gvt);
1845
1846	intel_gvt_debugfs_clean(gvt);
1847	clean_service_thread(gvt);
1848	intel_gvt_clean_cmd_parser(gvt);
1849	intel_gvt_clean_sched_policy(gvt);
1850	intel_gvt_clean_workload_scheduler(gvt);
1851	intel_gvt_clean_gtt(gvt);
1852	intel_gvt_free_firmware(gvt);
1853	intel_gvt_clean_mmio_info(gvt);
1854	idr_destroy(&gvt->vgpu_idr);
1855
1856	kfree(i915->gvt);
1857}
1858
1859/**
1860 * intel_gvt_init_device - initialize a GVT device
1861 * @i915: drm i915 private data
1862 *
1863 * This function is called at the initialization stage, to initialize
1864 * necessary GVT components.
1865 *
1866 * Returns:
1867 * Zero on success, negative error code if failed.
1868 *
1869 */
1870static int intel_gvt_init_device(struct drm_i915_private *i915)
1871{
1872	struct intel_gvt *gvt;
1873	struct intel_vgpu *vgpu;
1874	int ret;
1875
1876	if (drm_WARN_ON(&i915->drm, i915->gvt))
1877		return -EEXIST;
1878
1879	gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL);
1880	if (!gvt)
1881		return -ENOMEM;
1882
1883	gvt_dbg_core("init gvt device\n");
1884
1885	idr_init_base(&gvt->vgpu_idr, 1);
1886	spin_lock_init(&gvt->scheduler.mmio_context_lock);
1887	mutex_init(&gvt->lock);
1888	mutex_init(&gvt->sched_lock);
1889	gvt->gt = to_gt(i915);
1890	i915->gvt = gvt;
1891
1892	init_device_info(gvt);
1893
1894	ret = intel_gvt_setup_mmio_info(gvt);
1895	if (ret)
1896		goto out_clean_idr;
1897
1898	intel_gvt_init_engine_mmio_context(gvt);
1899
1900	ret = intel_gvt_load_firmware(gvt);
1901	if (ret)
1902		goto out_clean_mmio_info;
1903
1904	ret = intel_gvt_init_irq(gvt);
1905	if (ret)
1906		goto out_free_firmware;
1907
1908	ret = intel_gvt_init_gtt(gvt);
1909	if (ret)
1910		goto out_free_firmware;
1911
1912	ret = intel_gvt_init_workload_scheduler(gvt);
1913	if (ret)
1914		goto out_clean_gtt;
1915
1916	ret = intel_gvt_init_sched_policy(gvt);
1917	if (ret)
1918		goto out_clean_workload_scheduler;
1919
1920	ret = intel_gvt_init_cmd_parser(gvt);
1921	if (ret)
1922		goto out_clean_sched_policy;
1923
1924	ret = init_service_thread(gvt);
1925	if (ret)
1926		goto out_clean_cmd_parser;
1927
1928	ret = intel_gvt_init_vgpu_types(gvt);
1929	if (ret)
1930		goto out_clean_thread;
1931
1932	vgpu = intel_gvt_create_idle_vgpu(gvt);
1933	if (IS_ERR(vgpu)) {
1934		ret = PTR_ERR(vgpu);
1935		gvt_err("failed to create idle vgpu\n");
1936		goto out_clean_types;
1937	}
1938	gvt->idle_vgpu = vgpu;
1939
1940	intel_gvt_debugfs_init(gvt);
1941
1942	ret = mdev_register_parent(&gvt->parent, i915->drm.dev,
1943				   &intel_vgpu_mdev_driver,
1944				   gvt->mdev_types, gvt->num_types);
1945	if (ret)
1946		goto out_destroy_idle_vgpu;
1947
1948	gvt_dbg_core("gvt device initialization is done\n");
1949	return 0;
1950
1951out_destroy_idle_vgpu:
1952	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1953	intel_gvt_debugfs_clean(gvt);
1954out_clean_types:
1955	intel_gvt_clean_vgpu_types(gvt);
1956out_clean_thread:
1957	clean_service_thread(gvt);
1958out_clean_cmd_parser:
1959	intel_gvt_clean_cmd_parser(gvt);
1960out_clean_sched_policy:
1961	intel_gvt_clean_sched_policy(gvt);
1962out_clean_workload_scheduler:
1963	intel_gvt_clean_workload_scheduler(gvt);
1964out_clean_gtt:
1965	intel_gvt_clean_gtt(gvt);
1966out_free_firmware:
1967	intel_gvt_free_firmware(gvt);
1968out_clean_mmio_info:
1969	intel_gvt_clean_mmio_info(gvt);
1970out_clean_idr:
1971	idr_destroy(&gvt->vgpu_idr);
1972	kfree(gvt);
1973	i915->gvt = NULL;
1974	return ret;
1975}
1976
1977static void intel_gvt_pm_resume(struct drm_i915_private *i915)
1978{
1979	struct intel_gvt *gvt = i915->gvt;
1980
1981	intel_gvt_restore_fence(gvt);
1982	intel_gvt_restore_mmio(gvt);
1983	intel_gvt_restore_ggtt(gvt);
1984}
1985
1986static const struct intel_vgpu_ops intel_gvt_vgpu_ops = {
1987	.init_device	= intel_gvt_init_device,
1988	.clean_device	= intel_gvt_clean_device,
1989	.pm_resume	= intel_gvt_pm_resume,
1990};
1991
1992static int __init kvmgt_init(void)
1993{
1994	int ret;
1995
1996	ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops);
1997	if (ret)
1998		return ret;
1999
2000	ret = mdev_register_driver(&intel_vgpu_mdev_driver);
2001	if (ret)
2002		intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2003	return ret;
2004}
2005
2006static void __exit kvmgt_exit(void)
2007{
2008	mdev_unregister_driver(&intel_vgpu_mdev_driver);
2009	intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2010}
2011
2012module_init(kvmgt_init);
2013module_exit(kvmgt_exit);
2014
2015MODULE_LICENSE("GPL and additional rights");
2016MODULE_AUTHOR("Intel Corporation");