Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
   1/*
   2 * Kernel-based Virtual Machine - device assignment support
   3 *
   4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2.  See
   7 * the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include <linux/kvm_host.h>
  12#include <linux/kvm.h>
  13#include <linux/uaccess.h>
  14#include <linux/vmalloc.h>
  15#include <linux/errno.h>
  16#include <linux/spinlock.h>
  17#include <linux/pci.h>
  18#include <linux/interrupt.h>
  19#include <linux/slab.h>
  20#include <linux/namei.h>
  21#include <linux/fs.h>
  22#include "irq.h"
  23
  24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
  25						      int assigned_dev_id)
  26{
  27	struct list_head *ptr;
  28	struct kvm_assigned_dev_kernel *match;
  29
  30	list_for_each(ptr, head) {
  31		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
  32		if (match->assigned_dev_id == assigned_dev_id)
  33			return match;
  34	}
  35	return NULL;
  36}
  37
  38static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
  39				    *assigned_dev, int irq)
  40{
  41	int i, index;
  42	struct msix_entry *host_msix_entries;
  43
  44	host_msix_entries = assigned_dev->host_msix_entries;
  45
  46	index = -1;
  47	for (i = 0; i < assigned_dev->entries_nr; i++)
  48		if (irq == host_msix_entries[i].vector) {
  49			index = i;
  50			break;
  51		}
  52	if (index < 0)
  53		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
  54
  55	return index;
  56}
  57
  58static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
  59{
  60	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
  61	int ret;
  62
  63	spin_lock(&assigned_dev->intx_lock);
  64	if (pci_check_and_mask_intx(assigned_dev->dev)) {
  65		assigned_dev->host_irq_disabled = true;
  66		ret = IRQ_WAKE_THREAD;
  67	} else
  68		ret = IRQ_NONE;
  69	spin_unlock(&assigned_dev->intx_lock);
  70
  71	return ret;
  72}
  73
  74static void
  75kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
  76				 int vector)
  77{
  78	if (unlikely(assigned_dev->irq_requested_type &
  79		     KVM_DEV_IRQ_GUEST_INTX)) {
  80		spin_lock(&assigned_dev->intx_mask_lock);
  81		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
  82			kvm_set_irq(assigned_dev->kvm,
  83				    assigned_dev->irq_source_id, vector, 1);
  84		spin_unlock(&assigned_dev->intx_mask_lock);
  85	} else
  86		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
  87			    vector, 1);
  88}
  89
  90static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
  91{
  92	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
  93
  94	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
  95		spin_lock_irq(&assigned_dev->intx_lock);
  96		disable_irq_nosync(irq);
  97		assigned_dev->host_irq_disabled = true;
  98		spin_unlock_irq(&assigned_dev->intx_lock);
  99	}
 100
 101	kvm_assigned_dev_raise_guest_irq(assigned_dev,
 102					 assigned_dev->guest_irq);
 103
 104	return IRQ_HANDLED;
 105}
 106
 107#ifdef __KVM_HAVE_MSI
 108static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
 109{
 110	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 111
 112	kvm_assigned_dev_raise_guest_irq(assigned_dev,
 113					 assigned_dev->guest_irq);
 114
 115	return IRQ_HANDLED;
 116}
 117#endif
 118
 119#ifdef __KVM_HAVE_MSIX
 120static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
 121{
 122	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 123	int index = find_index_from_host_irq(assigned_dev, irq);
 124	u32 vector;
 125
 126	if (index >= 0) {
 127		vector = assigned_dev->guest_msix_entries[index].vector;
 128		kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
 129	}
 130
 131	return IRQ_HANDLED;
 132}
 133#endif
 134
 135/* Ack the irq line for an assigned device */
 136static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 137{
 138	struct kvm_assigned_dev_kernel *dev =
 139		container_of(kian, struct kvm_assigned_dev_kernel,
 140			     ack_notifier);
 141
 142	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
 143
 144	spin_lock(&dev->intx_mask_lock);
 145
 146	if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
 147		bool reassert = false;
 148
 149		spin_lock_irq(&dev->intx_lock);
 150		/*
 151		 * The guest IRQ may be shared so this ack can come from an
 152		 * IRQ for another guest device.
 153		 */
 154		if (dev->host_irq_disabled) {
 155			if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
 156				enable_irq(dev->host_irq);
 157			else if (!pci_check_and_unmask_intx(dev->dev))
 158				reassert = true;
 159			dev->host_irq_disabled = reassert;
 160		}
 161		spin_unlock_irq(&dev->intx_lock);
 162
 163		if (reassert)
 164			kvm_set_irq(dev->kvm, dev->irq_source_id,
 165				    dev->guest_irq, 1);
 166	}
 167
 168	spin_unlock(&dev->intx_mask_lock);
 169}
 170
 171static void deassign_guest_irq(struct kvm *kvm,
 172			       struct kvm_assigned_dev_kernel *assigned_dev)
 173{
 174	if (assigned_dev->ack_notifier.gsi != -1)
 175		kvm_unregister_irq_ack_notifier(kvm,
 176						&assigned_dev->ack_notifier);
 177
 178	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
 179		    assigned_dev->guest_irq, 0);
 180
 181	if (assigned_dev->irq_source_id != -1)
 182		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 183	assigned_dev->irq_source_id = -1;
 184	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
 185}
 186
 187/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
 188static void deassign_host_irq(struct kvm *kvm,
 189			      struct kvm_assigned_dev_kernel *assigned_dev)
 190{
 191	/*
 192	 * We disable irq here to prevent further events.
 193	 *
 194	 * Notice this maybe result in nested disable if the interrupt type is
 195	 * INTx, but it's OK for we are going to free it.
 196	 *
 197	 * If this function is a part of VM destroy, please ensure that till
 198	 * now, the kvm state is still legal for probably we also have to wait
 199	 * on a currently running IRQ handler.
 200	 */
 201	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 202		int i;
 203		for (i = 0; i < assigned_dev->entries_nr; i++)
 204			disable_irq(assigned_dev->host_msix_entries[i].vector);
 205
 206		for (i = 0; i < assigned_dev->entries_nr; i++)
 207			free_irq(assigned_dev->host_msix_entries[i].vector,
 208				 assigned_dev);
 209
 210		assigned_dev->entries_nr = 0;
 211		kfree(assigned_dev->host_msix_entries);
 212		kfree(assigned_dev->guest_msix_entries);
 213		pci_disable_msix(assigned_dev->dev);
 214	} else {
 215		/* Deal with MSI and INTx */
 216		if ((assigned_dev->irq_requested_type &
 217		     KVM_DEV_IRQ_HOST_INTX) &&
 218		    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
 219			spin_lock_irq(&assigned_dev->intx_lock);
 220			pci_intx(assigned_dev->dev, false);
 221			spin_unlock_irq(&assigned_dev->intx_lock);
 222			synchronize_irq(assigned_dev->host_irq);
 223		} else
 224			disable_irq(assigned_dev->host_irq);
 225
 226		free_irq(assigned_dev->host_irq, assigned_dev);
 227
 228		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
 229			pci_disable_msi(assigned_dev->dev);
 230	}
 231
 232	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
 233}
 234
 235static int kvm_deassign_irq(struct kvm *kvm,
 236			    struct kvm_assigned_dev_kernel *assigned_dev,
 237			    unsigned long irq_requested_type)
 238{
 239	unsigned long guest_irq_type, host_irq_type;
 240
 241	if (!irqchip_in_kernel(kvm))
 242		return -EINVAL;
 243	/* no irq assignment to deassign */
 244	if (!assigned_dev->irq_requested_type)
 245		return -ENXIO;
 246
 247	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
 248	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
 249
 250	if (host_irq_type)
 251		deassign_host_irq(kvm, assigned_dev);
 252	if (guest_irq_type)
 253		deassign_guest_irq(kvm, assigned_dev);
 254
 255	return 0;
 256}
 257
 258static void kvm_free_assigned_irq(struct kvm *kvm,
 259				  struct kvm_assigned_dev_kernel *assigned_dev)
 260{
 261	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
 262}
 263
 264static void kvm_free_assigned_device(struct kvm *kvm,
 265				     struct kvm_assigned_dev_kernel
 266				     *assigned_dev)
 267{
 268	kvm_free_assigned_irq(kvm, assigned_dev);
 269
 270	pci_reset_function(assigned_dev->dev);
 271	if (pci_load_and_free_saved_state(assigned_dev->dev,
 272					  &assigned_dev->pci_saved_state))
 273		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
 274		       __func__, dev_name(&assigned_dev->dev->dev));
 275	else
 276		pci_restore_state(assigned_dev->dev);
 277
 278	assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 279
 280	pci_release_regions(assigned_dev->dev);
 281	pci_disable_device(assigned_dev->dev);
 282	pci_dev_put(assigned_dev->dev);
 283
 284	list_del(&assigned_dev->list);
 285	kfree(assigned_dev);
 286}
 287
 288void kvm_free_all_assigned_devices(struct kvm *kvm)
 289{
 290	struct list_head *ptr, *ptr2;
 291	struct kvm_assigned_dev_kernel *assigned_dev;
 292
 293	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
 294		assigned_dev = list_entry(ptr,
 295					  struct kvm_assigned_dev_kernel,
 296					  list);
 297
 298		kvm_free_assigned_device(kvm, assigned_dev);
 299	}
 300}
 301
 302static int assigned_device_enable_host_intx(struct kvm *kvm,
 303					    struct kvm_assigned_dev_kernel *dev)
 304{
 305	irq_handler_t irq_handler;
 306	unsigned long flags;
 307
 308	dev->host_irq = dev->dev->irq;
 309
 310	/*
 311	 * We can only share the IRQ line with other host devices if we are
 312	 * able to disable the IRQ source at device-level - independently of
 313	 * the guest driver. Otherwise host devices may suffer from unbounded
 314	 * IRQ latencies when the guest keeps the line asserted.
 315	 */
 316	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
 317		irq_handler = kvm_assigned_dev_intx;
 318		flags = IRQF_SHARED;
 319	} else {
 320		irq_handler = NULL;
 321		flags = IRQF_ONESHOT;
 322	}
 323	if (request_threaded_irq(dev->host_irq, irq_handler,
 324				 kvm_assigned_dev_thread_intx, flags,
 325				 dev->irq_name, dev))
 326		return -EIO;
 327
 328	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
 329		spin_lock_irq(&dev->intx_lock);
 330		pci_intx(dev->dev, true);
 331		spin_unlock_irq(&dev->intx_lock);
 332	}
 333	return 0;
 334}
 335
 336#ifdef __KVM_HAVE_MSI
 337static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
 338{
 339	return IRQ_WAKE_THREAD;
 340}
 341
 342static int assigned_device_enable_host_msi(struct kvm *kvm,
 343					   struct kvm_assigned_dev_kernel *dev)
 344{
 345	int r;
 346
 347	if (!dev->dev->msi_enabled) {
 348		r = pci_enable_msi(dev->dev);
 349		if (r)
 350			return r;
 351	}
 352
 353	dev->host_irq = dev->dev->irq;
 354	if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
 355				 kvm_assigned_dev_thread_msi, 0,
 356				 dev->irq_name, dev)) {
 357		pci_disable_msi(dev->dev);
 358		return -EIO;
 359	}
 360
 361	return 0;
 362}
 363#endif
 364
 365#ifdef __KVM_HAVE_MSIX
 366static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
 367{
 368	return IRQ_WAKE_THREAD;
 369}
 370
 371static int assigned_device_enable_host_msix(struct kvm *kvm,
 372					    struct kvm_assigned_dev_kernel *dev)
 373{
 374	int i, r = -EINVAL;
 375
 376	/* host_msix_entries and guest_msix_entries should have been
 377	 * initialized */
 378	if (dev->entries_nr == 0)
 379		return r;
 380
 381	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
 382	if (r)
 383		return r;
 384
 385	for (i = 0; i < dev->entries_nr; i++) {
 386		r = request_threaded_irq(dev->host_msix_entries[i].vector,
 387					 kvm_assigned_dev_msix,
 388					 kvm_assigned_dev_thread_msix,
 389					 0, dev->irq_name, dev);
 390		if (r)
 391			goto err;
 392	}
 393
 394	return 0;
 395err:
 396	for (i -= 1; i >= 0; i--)
 397		free_irq(dev->host_msix_entries[i].vector, dev);
 398	pci_disable_msix(dev->dev);
 399	return r;
 400}
 401
 402#endif
 403
 404static int assigned_device_enable_guest_intx(struct kvm *kvm,
 405				struct kvm_assigned_dev_kernel *dev,
 406				struct kvm_assigned_irq *irq)
 407{
 408	dev->guest_irq = irq->guest_irq;
 409	dev->ack_notifier.gsi = irq->guest_irq;
 410	return 0;
 411}
 412
 413#ifdef __KVM_HAVE_MSI
 414static int assigned_device_enable_guest_msi(struct kvm *kvm,
 415			struct kvm_assigned_dev_kernel *dev,
 416			struct kvm_assigned_irq *irq)
 417{
 418	dev->guest_irq = irq->guest_irq;
 419	dev->ack_notifier.gsi = -1;
 420	return 0;
 421}
 422#endif
 423
 424#ifdef __KVM_HAVE_MSIX
 425static int assigned_device_enable_guest_msix(struct kvm *kvm,
 426			struct kvm_assigned_dev_kernel *dev,
 427			struct kvm_assigned_irq *irq)
 428{
 429	dev->guest_irq = irq->guest_irq;
 430	dev->ack_notifier.gsi = -1;
 431	return 0;
 432}
 433#endif
 434
 435static int assign_host_irq(struct kvm *kvm,
 436			   struct kvm_assigned_dev_kernel *dev,
 437			   __u32 host_irq_type)
 438{
 439	int r = -EEXIST;
 440
 441	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
 442		return r;
 443
 444	snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
 445		 pci_name(dev->dev));
 446
 447	switch (host_irq_type) {
 448	case KVM_DEV_IRQ_HOST_INTX:
 449		r = assigned_device_enable_host_intx(kvm, dev);
 450		break;
 451#ifdef __KVM_HAVE_MSI
 452	case KVM_DEV_IRQ_HOST_MSI:
 453		r = assigned_device_enable_host_msi(kvm, dev);
 454		break;
 455#endif
 456#ifdef __KVM_HAVE_MSIX
 457	case KVM_DEV_IRQ_HOST_MSIX:
 458		r = assigned_device_enable_host_msix(kvm, dev);
 459		break;
 460#endif
 461	default:
 462		r = -EINVAL;
 463	}
 464	dev->host_irq_disabled = false;
 465
 466	if (!r)
 467		dev->irq_requested_type |= host_irq_type;
 468
 469	return r;
 470}
 471
 472static int assign_guest_irq(struct kvm *kvm,
 473			    struct kvm_assigned_dev_kernel *dev,
 474			    struct kvm_assigned_irq *irq,
 475			    unsigned long guest_irq_type)
 476{
 477	int id;
 478	int r = -EEXIST;
 479
 480	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
 481		return r;
 482
 483	id = kvm_request_irq_source_id(kvm);
 484	if (id < 0)
 485		return id;
 486
 487	dev->irq_source_id = id;
 488
 489	switch (guest_irq_type) {
 490	case KVM_DEV_IRQ_GUEST_INTX:
 491		r = assigned_device_enable_guest_intx(kvm, dev, irq);
 492		break;
 493#ifdef __KVM_HAVE_MSI
 494	case KVM_DEV_IRQ_GUEST_MSI:
 495		r = assigned_device_enable_guest_msi(kvm, dev, irq);
 496		break;
 497#endif
 498#ifdef __KVM_HAVE_MSIX
 499	case KVM_DEV_IRQ_GUEST_MSIX:
 500		r = assigned_device_enable_guest_msix(kvm, dev, irq);
 501		break;
 502#endif
 503	default:
 504		r = -EINVAL;
 505	}
 506
 507	if (!r) {
 508		dev->irq_requested_type |= guest_irq_type;
 509		if (dev->ack_notifier.gsi != -1)
 510			kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
 511	} else
 512		kvm_free_irq_source_id(kvm, dev->irq_source_id);
 513
 514	return r;
 515}
 516
 517/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
 518static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 519				   struct kvm_assigned_irq *assigned_irq)
 520{
 521	int r = -EINVAL;
 522	struct kvm_assigned_dev_kernel *match;
 523	unsigned long host_irq_type, guest_irq_type;
 524
 525	if (!irqchip_in_kernel(kvm))
 526		return r;
 527
 528	mutex_lock(&kvm->lock);
 529	r = -ENODEV;
 530	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 531				      assigned_irq->assigned_dev_id);
 532	if (!match)
 533		goto out;
 534
 535	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
 536	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
 537
 538	r = -EINVAL;
 539	/* can only assign one type at a time */
 540	if (hweight_long(host_irq_type) > 1)
 541		goto out;
 542	if (hweight_long(guest_irq_type) > 1)
 543		goto out;
 544	if (host_irq_type == 0 && guest_irq_type == 0)
 545		goto out;
 546
 547	r = 0;
 548	if (host_irq_type)
 549		r = assign_host_irq(kvm, match, host_irq_type);
 550	if (r)
 551		goto out;
 552
 553	if (guest_irq_type)
 554		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
 555out:
 556	mutex_unlock(&kvm->lock);
 557	return r;
 558}
 559
 560static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
 561					 struct kvm_assigned_irq
 562					 *assigned_irq)
 563{
 564	int r = -ENODEV;
 565	struct kvm_assigned_dev_kernel *match;
 566	unsigned long irq_type;
 567
 568	mutex_lock(&kvm->lock);
 569
 570	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 571				      assigned_irq->assigned_dev_id);
 572	if (!match)
 573		goto out;
 574
 575	irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
 576					  KVM_DEV_IRQ_GUEST_MASK);
 577	r = kvm_deassign_irq(kvm, match, irq_type);
 578out:
 579	mutex_unlock(&kvm->lock);
 580	return r;
 581}
 582
 583/*
 584 * We want to test whether the caller has been granted permissions to
 585 * use this device.  To be able to configure and control the device,
 586 * the user needs access to PCI configuration space and BAR resources.
 587 * These are accessed through PCI sysfs.  PCI config space is often
 588 * passed to the process calling this ioctl via file descriptor, so we
 589 * can't rely on access to that file.  We can check for permissions
 590 * on each of the BAR resource files, which is a pretty clear
 591 * indicator that the user has been granted access to the device.
 592 */
 593static int probe_sysfs_permissions(struct pci_dev *dev)
 594{
 595#ifdef CONFIG_SYSFS
 596	int i;
 597	bool bar_found = false;
 598
 599	for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
 600		char *kpath, *syspath;
 601		struct path path;
 602		struct inode *inode;
 603		int r;
 604
 605		if (!pci_resource_len(dev, i))
 606			continue;
 607
 608		kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
 609		if (!kpath)
 610			return -ENOMEM;
 611
 612		/* Per sysfs-rules, sysfs is always at /sys */
 613		syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
 614		kfree(kpath);
 615		if (!syspath)
 616			return -ENOMEM;
 617
 618		r = kern_path(syspath, LOOKUP_FOLLOW, &path);
 619		kfree(syspath);
 620		if (r)
 621			return r;
 622
 623		inode = path.dentry->d_inode;
 624
 625		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
 626		path_put(&path);
 627		if (r)
 628			return r;
 629
 630		bar_found = true;
 631	}
 632
 633	/* If no resources, probably something special */
 634	if (!bar_found)
 635		return -EPERM;
 636
 637	return 0;
 638#else
 639	return -EINVAL; /* No way to control the device without sysfs */
 640#endif
 641}
 642
 643static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 644				      struct kvm_assigned_pci_dev *assigned_dev)
 645{
 646	int r = 0, idx;
 647	struct kvm_assigned_dev_kernel *match;
 648	struct pci_dev *dev;
 649
 650	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
 651		return -EINVAL;
 652
 653	mutex_lock(&kvm->lock);
 654	idx = srcu_read_lock(&kvm->srcu);
 655
 656	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 657				      assigned_dev->assigned_dev_id);
 658	if (match) {
 659		/* device already assigned */
 660		r = -EEXIST;
 661		goto out;
 662	}
 663
 664	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
 665	if (match == NULL) {
 666		printk(KERN_INFO "%s: Couldn't allocate memory\n",
 667		       __func__);
 668		r = -ENOMEM;
 669		goto out;
 670	}
 671	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
 672				   assigned_dev->busnr,
 673				   assigned_dev->devfn);
 674	if (!dev) {
 675		printk(KERN_INFO "%s: host device not found\n", __func__);
 676		r = -EINVAL;
 677		goto out_free;
 678	}
 679
 680	/* Don't allow bridges to be assigned */
 681	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
 682		r = -EPERM;
 683		goto out_put;
 684	}
 685
 686	r = probe_sysfs_permissions(dev);
 687	if (r)
 688		goto out_put;
 689
 690	if (pci_enable_device(dev)) {
 691		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
 692		r = -EBUSY;
 693		goto out_put;
 694	}
 695	r = pci_request_regions(dev, "kvm_assigned_device");
 696	if (r) {
 697		printk(KERN_INFO "%s: Could not get access to device regions\n",
 698		       __func__);
 699		goto out_disable;
 700	}
 701
 702	pci_reset_function(dev);
 703	pci_save_state(dev);
 704	match->pci_saved_state = pci_store_saved_state(dev);
 705	if (!match->pci_saved_state)
 706		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
 707		       __func__, dev_name(&dev->dev));
 708
 709	if (!pci_intx_mask_supported(dev))
 710		assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
 711
 712	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 713	match->host_segnr = assigned_dev->segnr;
 714	match->host_busnr = assigned_dev->busnr;
 715	match->host_devfn = assigned_dev->devfn;
 716	match->flags = assigned_dev->flags;
 717	match->dev = dev;
 718	spin_lock_init(&match->intx_lock);
 719	spin_lock_init(&match->intx_mask_lock);
 720	match->irq_source_id = -1;
 721	match->kvm = kvm;
 722	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
 723
 724	list_add(&match->list, &kvm->arch.assigned_dev_head);
 725
 726	if (!kvm->arch.iommu_domain) {
 727		r = kvm_iommu_map_guest(kvm);
 728		if (r)
 729			goto out_list_del;
 730	}
 731	r = kvm_assign_device(kvm, match);
 732	if (r)
 733		goto out_list_del;
 734
 735out:
 736	srcu_read_unlock(&kvm->srcu, idx);
 737	mutex_unlock(&kvm->lock);
 738	return r;
 739out_list_del:
 740	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
 741		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
 742		       __func__, dev_name(&dev->dev));
 743	list_del(&match->list);
 744	pci_release_regions(dev);
 745out_disable:
 746	pci_disable_device(dev);
 747out_put:
 748	pci_dev_put(dev);
 749out_free:
 750	kfree(match);
 751	srcu_read_unlock(&kvm->srcu, idx);
 752	mutex_unlock(&kvm->lock);
 753	return r;
 754}
 755
 756static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 757		struct kvm_assigned_pci_dev *assigned_dev)
 758{
 759	int r = 0;
 760	struct kvm_assigned_dev_kernel *match;
 761
 762	mutex_lock(&kvm->lock);
 763
 764	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 765				      assigned_dev->assigned_dev_id);
 766	if (!match) {
 767		printk(KERN_INFO "%s: device hasn't been assigned before, "
 768		  "so cannot be deassigned\n", __func__);
 769		r = -EINVAL;
 770		goto out;
 771	}
 772
 773	kvm_deassign_device(kvm, match);
 774
 775	kvm_free_assigned_device(kvm, match);
 776
 777out:
 778	mutex_unlock(&kvm->lock);
 779	return r;
 780}
 781
 782
 783#ifdef __KVM_HAVE_MSIX
 784static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
 785				    struct kvm_assigned_msix_nr *entry_nr)
 786{
 787	int r = 0;
 788	struct kvm_assigned_dev_kernel *adev;
 789
 790	mutex_lock(&kvm->lock);
 791
 792	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 793				      entry_nr->assigned_dev_id);
 794	if (!adev) {
 795		r = -EINVAL;
 796		goto msix_nr_out;
 797	}
 798
 799	if (adev->entries_nr == 0) {
 800		adev->entries_nr = entry_nr->entry_nr;
 801		if (adev->entries_nr == 0 ||
 802		    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
 803			r = -EINVAL;
 804			goto msix_nr_out;
 805		}
 806
 807		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
 808						entry_nr->entry_nr,
 809						GFP_KERNEL);
 810		if (!adev->host_msix_entries) {
 811			r = -ENOMEM;
 812			goto msix_nr_out;
 813		}
 814		adev->guest_msix_entries =
 815			kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
 816				GFP_KERNEL);
 817		if (!adev->guest_msix_entries) {
 818			kfree(adev->host_msix_entries);
 819			r = -ENOMEM;
 820			goto msix_nr_out;
 821		}
 822	} else /* Not allowed set MSI-X number twice */
 823		r = -EINVAL;
 824msix_nr_out:
 825	mutex_unlock(&kvm->lock);
 826	return r;
 827}
 828
 829static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
 830				       struct kvm_assigned_msix_entry *entry)
 831{
 832	int r = 0, i;
 833	struct kvm_assigned_dev_kernel *adev;
 834
 835	mutex_lock(&kvm->lock);
 836
 837	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 838				      entry->assigned_dev_id);
 839
 840	if (!adev) {
 841		r = -EINVAL;
 842		goto msix_entry_out;
 843	}
 844
 845	for (i = 0; i < adev->entries_nr; i++)
 846		if (adev->guest_msix_entries[i].vector == 0 ||
 847		    adev->guest_msix_entries[i].entry == entry->entry) {
 848			adev->guest_msix_entries[i].entry = entry->entry;
 849			adev->guest_msix_entries[i].vector = entry->gsi;
 850			adev->host_msix_entries[i].entry = entry->entry;
 851			break;
 852		}
 853	if (i == adev->entries_nr) {
 854		r = -ENOSPC;
 855		goto msix_entry_out;
 856	}
 857
 858msix_entry_out:
 859	mutex_unlock(&kvm->lock);
 860
 861	return r;
 862}
 863#endif
 864
 865static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
 866		struct kvm_assigned_pci_dev *assigned_dev)
 867{
 868	int r = 0;
 869	struct kvm_assigned_dev_kernel *match;
 870
 871	mutex_lock(&kvm->lock);
 872
 873	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 874				      assigned_dev->assigned_dev_id);
 875	if (!match) {
 876		r = -ENODEV;
 877		goto out;
 878	}
 879
 880	spin_lock(&match->intx_mask_lock);
 881
 882	match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
 883	match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
 884
 885	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
 886		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
 887			kvm_set_irq(match->kvm, match->irq_source_id,
 888				    match->guest_irq, 0);
 889			/*
 890			 * Masking at hardware-level is performed on demand,
 891			 * i.e. when an IRQ actually arrives at the host.
 892			 */
 893		} else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
 894			/*
 895			 * Unmask the IRQ line if required. Unmasking at
 896			 * device level will be performed by user space.
 897			 */
 898			spin_lock_irq(&match->intx_lock);
 899			if (match->host_irq_disabled) {
 900				enable_irq(match->host_irq);
 901				match->host_irq_disabled = false;
 902			}
 903			spin_unlock_irq(&match->intx_lock);
 904		}
 905	}
 906
 907	spin_unlock(&match->intx_mask_lock);
 908
 909out:
 910	mutex_unlock(&kvm->lock);
 911	return r;
 912}
 913
 914long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 915				  unsigned long arg)
 916{
 917	void __user *argp = (void __user *)arg;
 918	int r;
 919
 920	switch (ioctl) {
 921	case KVM_ASSIGN_PCI_DEVICE: {
 922		struct kvm_assigned_pci_dev assigned_dev;
 923
 924		r = -EFAULT;
 925		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
 926			goto out;
 927		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
 928		if (r)
 929			goto out;
 930		break;
 931	}
 932	case KVM_ASSIGN_IRQ: {
 933		r = -EOPNOTSUPP;
 934		break;
 935	}
 936	case KVM_ASSIGN_DEV_IRQ: {
 937		struct kvm_assigned_irq assigned_irq;
 938
 939		r = -EFAULT;
 940		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
 941			goto out;
 942		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
 943		if (r)
 944			goto out;
 945		break;
 946	}
 947	case KVM_DEASSIGN_DEV_IRQ: {
 948		struct kvm_assigned_irq assigned_irq;
 949
 950		r = -EFAULT;
 951		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
 952			goto out;
 953		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
 954		if (r)
 955			goto out;
 956		break;
 957	}
 958	case KVM_DEASSIGN_PCI_DEVICE: {
 959		struct kvm_assigned_pci_dev assigned_dev;
 960
 961		r = -EFAULT;
 962		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
 963			goto out;
 964		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
 965		if (r)
 966			goto out;
 967		break;
 968	}
 969#ifdef KVM_CAP_IRQ_ROUTING
 970	case KVM_SET_GSI_ROUTING: {
 971		struct kvm_irq_routing routing;
 972		struct kvm_irq_routing __user *urouting;
 973		struct kvm_irq_routing_entry *entries;
 974
 975		r = -EFAULT;
 976		if (copy_from_user(&routing, argp, sizeof(routing)))
 977			goto out;
 978		r = -EINVAL;
 979		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
 980			goto out;
 981		if (routing.flags)
 982			goto out;
 983		r = -ENOMEM;
 984		entries = vmalloc(routing.nr * sizeof(*entries));
 985		if (!entries)
 986			goto out;
 987		r = -EFAULT;
 988		urouting = argp;
 989		if (copy_from_user(entries, urouting->entries,
 990				   routing.nr * sizeof(*entries)))
 991			goto out_free_irq_routing;
 992		r = kvm_set_irq_routing(kvm, entries, routing.nr,
 993					routing.flags);
 994	out_free_irq_routing:
 995		vfree(entries);
 996		break;
 997	}
 998#endif /* KVM_CAP_IRQ_ROUTING */
 999#ifdef __KVM_HAVE_MSIX
1000	case KVM_ASSIGN_SET_MSIX_NR: {
1001		struct kvm_assigned_msix_nr entry_nr;
1002		r = -EFAULT;
1003		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1004			goto out;
1005		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1006		if (r)
1007			goto out;
1008		break;
1009	}
1010	case KVM_ASSIGN_SET_MSIX_ENTRY: {
1011		struct kvm_assigned_msix_entry entry;
1012		r = -EFAULT;
1013		if (copy_from_user(&entry, argp, sizeof entry))
1014			goto out;
1015		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1016		if (r)
1017			goto out;
1018		break;
1019	}
1020#endif
1021	case KVM_ASSIGN_SET_INTX_MASK: {
1022		struct kvm_assigned_pci_dev assigned_dev;
1023
1024		r = -EFAULT;
1025		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1026			goto out;
1027		r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1028		break;
1029	}
1030	default:
1031		r = -ENOTTY;
1032		break;
1033	}
1034out:
1035	return r;
1036}