Linux Audio

Check our new training course

Loading...
v5.9
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * VFIO PCI interrupt handling
  4 *
  5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  6 *     Author: Alex Williamson <alex.williamson@redhat.com>
  7 *
 
 
 
 
  8 * Derived from original vfio:
  9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 10 * Author: Tom Lyon, pugs@cisco.com
 11 */
 12
 13#include <linux/device.h>
 14#include <linux/interrupt.h>
 15#include <linux/eventfd.h>
 16#include <linux/msi.h>
 17#include <linux/pci.h>
 18#include <linux/file.h>
 19#include <linux/vfio.h>
 20#include <linux/wait.h>
 21#include <linux/slab.h>
 22
 23#include "vfio_pci_private.h"
 24
 25/*
 26 * INTx
 27 */
 28static void vfio_send_intx_eventfd(void *opaque, void *unused)
 29{
 30	struct vfio_pci_device *vdev = opaque;
 31
 32	if (likely(is_intx(vdev) && !vdev->virq_disabled))
 33		eventfd_signal(vdev->ctx[0].trigger, 1);
 34}
 35
 36void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
 37{
 38	struct pci_dev *pdev = vdev->pdev;
 39	unsigned long flags;
 40
 41	spin_lock_irqsave(&vdev->irqlock, flags);
 42
 43	/*
 44	 * Masking can come from interrupt, ioctl, or config space
 45	 * via INTx disable.  The latter means this can get called
 46	 * even when not using intx delivery.  In this case, just
 47	 * try to have the physical bit follow the virtual bit.
 48	 */
 49	if (unlikely(!is_intx(vdev))) {
 50		if (vdev->pci_2_3)
 51			pci_intx(pdev, 0);
 52	} else if (!vdev->ctx[0].masked) {
 53		/*
 54		 * Can't use check_and_mask here because we always want to
 55		 * mask, not just when something is pending.
 56		 */
 57		if (vdev->pci_2_3)
 58			pci_intx(pdev, 0);
 59		else
 60			disable_irq_nosync(pdev->irq);
 61
 62		vdev->ctx[0].masked = true;
 63	}
 64
 65	spin_unlock_irqrestore(&vdev->irqlock, flags);
 66}
 67
 68/*
 69 * If this is triggered by an eventfd, we can't call eventfd_signal
 70 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
 71 * a signal is necessary, which can then be handled via a work queue
 72 * or directly depending on the caller.
 73 */
 74static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
 75{
 76	struct vfio_pci_device *vdev = opaque;
 77	struct pci_dev *pdev = vdev->pdev;
 78	unsigned long flags;
 79	int ret = 0;
 80
 81	spin_lock_irqsave(&vdev->irqlock, flags);
 82
 83	/*
 84	 * Unmasking comes from ioctl or config, so again, have the
 85	 * physical bit follow the virtual even when not using INTx.
 86	 */
 87	if (unlikely(!is_intx(vdev))) {
 88		if (vdev->pci_2_3)
 89			pci_intx(pdev, 1);
 90	} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
 91		/*
 92		 * A pending interrupt here would immediately trigger,
 93		 * but we can avoid that overhead by just re-sending
 94		 * the interrupt to the user.
 95		 */
 96		if (vdev->pci_2_3) {
 97			if (!pci_check_and_unmask_intx(pdev))
 98				ret = 1;
 99		} else
100			enable_irq(pdev->irq);
101
102		vdev->ctx[0].masked = (ret > 0);
103	}
104
105	spin_unlock_irqrestore(&vdev->irqlock, flags);
106
107	return ret;
108}
109
110void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
111{
112	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
113		vfio_send_intx_eventfd(vdev, NULL);
114}
115
116static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
117{
118	struct vfio_pci_device *vdev = dev_id;
119	unsigned long flags;
120	int ret = IRQ_NONE;
121
122	spin_lock_irqsave(&vdev->irqlock, flags);
123
124	if (!vdev->pci_2_3) {
125		disable_irq_nosync(vdev->pdev->irq);
126		vdev->ctx[0].masked = true;
127		ret = IRQ_HANDLED;
128	} else if (!vdev->ctx[0].masked &&  /* may be shared */
129		   pci_check_and_mask_intx(vdev->pdev)) {
130		vdev->ctx[0].masked = true;
131		ret = IRQ_HANDLED;
132	}
133
134	spin_unlock_irqrestore(&vdev->irqlock, flags);
135
136	if (ret == IRQ_HANDLED)
137		vfio_send_intx_eventfd(vdev, NULL);
138
139	return ret;
140}
141
142static int vfio_intx_enable(struct vfio_pci_device *vdev)
143{
144	if (!is_irq_none(vdev))
145		return -EINVAL;
146
147	if (!vdev->pdev->irq)
148		return -ENODEV;
149
150	vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
151	if (!vdev->ctx)
152		return -ENOMEM;
153
154	vdev->num_ctx = 1;
155
156	/*
157	 * If the virtual interrupt is masked, restore it.  Devices
158	 * supporting DisINTx can be masked at the hardware level
159	 * here, non-PCI-2.3 devices will have to wait until the
160	 * interrupt is enabled.
161	 */
162	vdev->ctx[0].masked = vdev->virq_disabled;
163	if (vdev->pci_2_3)
164		pci_intx(vdev->pdev, !vdev->ctx[0].masked);
165
166	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
167
168	return 0;
169}
170
171static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
172{
173	struct pci_dev *pdev = vdev->pdev;
174	unsigned long irqflags = IRQF_SHARED;
175	struct eventfd_ctx *trigger;
176	unsigned long flags;
177	int ret;
178
179	if (vdev->ctx[0].trigger) {
180		free_irq(pdev->irq, vdev);
181		kfree(vdev->ctx[0].name);
182		eventfd_ctx_put(vdev->ctx[0].trigger);
183		vdev->ctx[0].trigger = NULL;
184	}
185
186	if (fd < 0) /* Disable only */
187		return 0;
188
189	vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
190				      pci_name(pdev));
191	if (!vdev->ctx[0].name)
192		return -ENOMEM;
193
194	trigger = eventfd_ctx_fdget(fd);
195	if (IS_ERR(trigger)) {
196		kfree(vdev->ctx[0].name);
197		return PTR_ERR(trigger);
198	}
199
200	vdev->ctx[0].trigger = trigger;
201
202	if (!vdev->pci_2_3)
203		irqflags = 0;
204
205	ret = request_irq(pdev->irq, vfio_intx_handler,
206			  irqflags, vdev->ctx[0].name, vdev);
207	if (ret) {
208		vdev->ctx[0].trigger = NULL;
209		kfree(vdev->ctx[0].name);
210		eventfd_ctx_put(trigger);
211		return ret;
212	}
213
214	/*
215	 * INTx disable will stick across the new irq setup,
216	 * disable_irq won't.
217	 */
218	spin_lock_irqsave(&vdev->irqlock, flags);
219	if (!vdev->pci_2_3 && vdev->ctx[0].masked)
220		disable_irq_nosync(pdev->irq);
221	spin_unlock_irqrestore(&vdev->irqlock, flags);
222
223	return 0;
224}
225
226static void vfio_intx_disable(struct vfio_pci_device *vdev)
227{
228	vfio_virqfd_disable(&vdev->ctx[0].unmask);
229	vfio_virqfd_disable(&vdev->ctx[0].mask);
230	vfio_intx_set_signal(vdev, -1);
231	vdev->irq_type = VFIO_PCI_NUM_IRQS;
232	vdev->num_ctx = 0;
233	kfree(vdev->ctx);
234}
235
236/*
237 * MSI/MSI-X
238 */
239static irqreturn_t vfio_msihandler(int irq, void *arg)
240{
241	struct eventfd_ctx *trigger = arg;
242
243	eventfd_signal(trigger, 1);
244	return IRQ_HANDLED;
245}
246
247static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
248{
249	struct pci_dev *pdev = vdev->pdev;
250	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
251	int ret;
252	u16 cmd;
253
254	if (!is_irq_none(vdev))
255		return -EINVAL;
256
257	vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
258	if (!vdev->ctx)
259		return -ENOMEM;
260
261	/* return the number of supported vectors if we can't get all: */
262	cmd = vfio_pci_memory_lock_and_enable(vdev);
263	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
264	if (ret < nvec) {
265		if (ret > 0)
266			pci_free_irq_vectors(pdev);
267		vfio_pci_memory_unlock_and_restore(vdev, cmd);
268		kfree(vdev->ctx);
269		return ret;
270	}
271	vfio_pci_memory_unlock_and_restore(vdev, cmd);
272
273	vdev->num_ctx = nvec;
274	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
275				VFIO_PCI_MSI_IRQ_INDEX;
276
277	if (!msix) {
278		/*
279		 * Compute the virtual hardware field for max msi vectors -
280		 * it is the log base 2 of the number of vectors.
281		 */
282		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
283	}
284
285	return 0;
286}
287
288static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
289				      int vector, int fd, bool msix)
290{
291	struct pci_dev *pdev = vdev->pdev;
292	struct eventfd_ctx *trigger;
293	int irq, ret;
294	u16 cmd;
295
296	if (vector < 0 || vector >= vdev->num_ctx)
297		return -EINVAL;
298
299	irq = pci_irq_vector(pdev, vector);
300
301	if (vdev->ctx[vector].trigger) {
302		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
303
304		cmd = vfio_pci_memory_lock_and_enable(vdev);
305		free_irq(irq, vdev->ctx[vector].trigger);
306		vfio_pci_memory_unlock_and_restore(vdev, cmd);
307
308		kfree(vdev->ctx[vector].name);
309		eventfd_ctx_put(vdev->ctx[vector].trigger);
310		vdev->ctx[vector].trigger = NULL;
311	}
312
313	if (fd < 0)
314		return 0;
315
316	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
317					   msix ? "x" : "", vector,
318					   pci_name(pdev));
319	if (!vdev->ctx[vector].name)
320		return -ENOMEM;
321
322	trigger = eventfd_ctx_fdget(fd);
323	if (IS_ERR(trigger)) {
324		kfree(vdev->ctx[vector].name);
325		return PTR_ERR(trigger);
326	}
327
328	/*
329	 * The MSIx vector table resides in device memory which may be cleared
330	 * via backdoor resets. We don't allow direct access to the vector
331	 * table so even if a userspace driver attempts to save/restore around
332	 * such a reset it would be unsuccessful. To avoid this, restore the
333	 * cached value of the message prior to enabling.
334	 */
335	cmd = vfio_pci_memory_lock_and_enable(vdev);
336	if (msix) {
337		struct msi_msg msg;
338
339		get_cached_msi_msg(irq, &msg);
340		pci_write_msi_msg(irq, &msg);
341	}
342
343	ret = request_irq(irq, vfio_msihandler, 0,
344			  vdev->ctx[vector].name, trigger);
345	vfio_pci_memory_unlock_and_restore(vdev, cmd);
346	if (ret) {
347		kfree(vdev->ctx[vector].name);
348		eventfd_ctx_put(trigger);
349		return ret;
350	}
351
352	vdev->ctx[vector].producer.token = trigger;
353	vdev->ctx[vector].producer.irq = irq;
354	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
355	if (unlikely(ret))
356		dev_info(&pdev->dev,
357		"irq bypass producer (token %p) registration fails: %d\n",
358		vdev->ctx[vector].producer.token, ret);
359
360	vdev->ctx[vector].trigger = trigger;
361
362	return 0;
363}
364
365static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
366			      unsigned count, int32_t *fds, bool msix)
367{
368	int i, j, ret = 0;
369
370	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
371		return -EINVAL;
372
373	for (i = 0, j = start; i < count && !ret; i++, j++) {
374		int fd = fds ? fds[i] : -1;
375		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
376	}
377
378	if (ret) {
379		for (--j; j >= (int)start; j--)
380			vfio_msi_set_vector_signal(vdev, j, -1, msix);
381	}
382
383	return ret;
384}
385
386static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
387{
388	struct pci_dev *pdev = vdev->pdev;
389	int i;
390	u16 cmd;
391
392	for (i = 0; i < vdev->num_ctx; i++) {
393		vfio_virqfd_disable(&vdev->ctx[i].unmask);
394		vfio_virqfd_disable(&vdev->ctx[i].mask);
395	}
396
397	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
398
399	cmd = vfio_pci_memory_lock_and_enable(vdev);
400	pci_free_irq_vectors(pdev);
401	vfio_pci_memory_unlock_and_restore(vdev, cmd);
402
403	/*
404	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
405	 * via their shutdown paths.  Restore for NoINTx devices.
406	 */
407	if (vdev->nointx)
408		pci_intx(pdev, 0);
409
410	vdev->irq_type = VFIO_PCI_NUM_IRQS;
411	vdev->num_ctx = 0;
412	kfree(vdev->ctx);
413}
414
415/*
416 * IOCTL support
417 */
418static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
419				    unsigned index, unsigned start,
420				    unsigned count, uint32_t flags, void *data)
421{
422	if (!is_intx(vdev) || start != 0 || count != 1)
423		return -EINVAL;
424
425	if (flags & VFIO_IRQ_SET_DATA_NONE) {
426		vfio_pci_intx_unmask(vdev);
427	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
428		uint8_t unmask = *(uint8_t *)data;
429		if (unmask)
430			vfio_pci_intx_unmask(vdev);
431	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
432		int32_t fd = *(int32_t *)data;
433		if (fd >= 0)
434			return vfio_virqfd_enable((void *) vdev,
435						  vfio_pci_intx_unmask_handler,
436						  vfio_send_intx_eventfd, NULL,
437						  &vdev->ctx[0].unmask, fd);
438
439		vfio_virqfd_disable(&vdev->ctx[0].unmask);
440	}
441
442	return 0;
443}
444
445static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
446				  unsigned index, unsigned start,
447				  unsigned count, uint32_t flags, void *data)
448{
449	if (!is_intx(vdev) || start != 0 || count != 1)
450		return -EINVAL;
451
452	if (flags & VFIO_IRQ_SET_DATA_NONE) {
453		vfio_pci_intx_mask(vdev);
454	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
455		uint8_t mask = *(uint8_t *)data;
456		if (mask)
457			vfio_pci_intx_mask(vdev);
458	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
459		return -ENOTTY; /* XXX implement me */
460	}
461
462	return 0;
463}
464
465static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
466				     unsigned index, unsigned start,
467				     unsigned count, uint32_t flags, void *data)
468{
469	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
470		vfio_intx_disable(vdev);
471		return 0;
472	}
473
474	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
475		return -EINVAL;
476
477	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
478		int32_t fd = *(int32_t *)data;
479		int ret;
480
481		if (is_intx(vdev))
482			return vfio_intx_set_signal(vdev, fd);
483
484		ret = vfio_intx_enable(vdev);
485		if (ret)
486			return ret;
487
488		ret = vfio_intx_set_signal(vdev, fd);
489		if (ret)
490			vfio_intx_disable(vdev);
491
492		return ret;
493	}
494
495	if (!is_intx(vdev))
496		return -EINVAL;
497
498	if (flags & VFIO_IRQ_SET_DATA_NONE) {
499		vfio_send_intx_eventfd(vdev, NULL);
500	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
501		uint8_t trigger = *(uint8_t *)data;
502		if (trigger)
503			vfio_send_intx_eventfd(vdev, NULL);
504	}
505	return 0;
506}
507
508static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
509				    unsigned index, unsigned start,
510				    unsigned count, uint32_t flags, void *data)
511{
512	int i;
513	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
514
515	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
516		vfio_msi_disable(vdev, msix);
517		return 0;
518	}
519
520	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
521		return -EINVAL;
522
523	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
524		int32_t *fds = data;
525		int ret;
526
527		if (vdev->irq_type == index)
528			return vfio_msi_set_block(vdev, start, count,
529						  fds, msix);
530
531		ret = vfio_msi_enable(vdev, start + count, msix);
532		if (ret)
533			return ret;
534
535		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
536		if (ret)
537			vfio_msi_disable(vdev, msix);
538
539		return ret;
540	}
541
542	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
543		return -EINVAL;
544
545	for (i = start; i < start + count; i++) {
546		if (!vdev->ctx[i].trigger)
547			continue;
548		if (flags & VFIO_IRQ_SET_DATA_NONE) {
549			eventfd_signal(vdev->ctx[i].trigger, 1);
550		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
551			uint8_t *bools = data;
552			if (bools[i - start])
553				eventfd_signal(vdev->ctx[i].trigger, 1);
554		}
555	}
556	return 0;
557}
558
559static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
560					   unsigned int count, uint32_t flags,
561					   void *data)
562{
563	/* DATA_NONE/DATA_BOOL enables loopback testing */
564	if (flags & VFIO_IRQ_SET_DATA_NONE) {
565		if (*ctx) {
566			if (count) {
567				eventfd_signal(*ctx, 1);
568			} else {
569				eventfd_ctx_put(*ctx);
570				*ctx = NULL;
571			}
572			return 0;
573		}
574	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
575		uint8_t trigger;
576
577		if (!count)
578			return -EINVAL;
579
580		trigger = *(uint8_t *)data;
581		if (trigger && *ctx)
582			eventfd_signal(*ctx, 1);
583
584		return 0;
585	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
586		int32_t fd;
587
588		if (!count)
589			return -EINVAL;
590
591		fd = *(int32_t *)data;
592		if (fd == -1) {
593			if (*ctx)
594				eventfd_ctx_put(*ctx);
595			*ctx = NULL;
596		} else if (fd >= 0) {
597			struct eventfd_ctx *efdctx;
598
599			efdctx = eventfd_ctx_fdget(fd);
600			if (IS_ERR(efdctx))
601				return PTR_ERR(efdctx);
602
603			if (*ctx)
604				eventfd_ctx_put(*ctx);
605
606			*ctx = efdctx;
607		}
608		return 0;
609	}
610
611	return -EINVAL;
612}
613
614static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
615				    unsigned index, unsigned start,
616				    unsigned count, uint32_t flags, void *data)
617{
618	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
619		return -EINVAL;
620
621	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
622					       count, flags, data);
623}
624
625static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
626				    unsigned index, unsigned start,
627				    unsigned count, uint32_t flags, void *data)
628{
629	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
630		return -EINVAL;
631
632	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
633					       count, flags, data);
634}
635
636int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
637			    unsigned index, unsigned start, unsigned count,
638			    void *data)
639{
640	int (*func)(struct vfio_pci_device *vdev, unsigned index,
641		    unsigned start, unsigned count, uint32_t flags,
642		    void *data) = NULL;
643
644	switch (index) {
645	case VFIO_PCI_INTX_IRQ_INDEX:
646		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
647		case VFIO_IRQ_SET_ACTION_MASK:
648			func = vfio_pci_set_intx_mask;
649			break;
650		case VFIO_IRQ_SET_ACTION_UNMASK:
651			func = vfio_pci_set_intx_unmask;
652			break;
653		case VFIO_IRQ_SET_ACTION_TRIGGER:
654			func = vfio_pci_set_intx_trigger;
655			break;
656		}
657		break;
658	case VFIO_PCI_MSI_IRQ_INDEX:
659	case VFIO_PCI_MSIX_IRQ_INDEX:
660		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
661		case VFIO_IRQ_SET_ACTION_MASK:
662		case VFIO_IRQ_SET_ACTION_UNMASK:
663			/* XXX Need masking support exported */
664			break;
665		case VFIO_IRQ_SET_ACTION_TRIGGER:
666			func = vfio_pci_set_msi_trigger;
667			break;
668		}
669		break;
670	case VFIO_PCI_ERR_IRQ_INDEX:
671		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
672		case VFIO_IRQ_SET_ACTION_TRIGGER:
673			if (pci_is_pcie(vdev->pdev))
674				func = vfio_pci_set_err_trigger;
675			break;
676		}
677		break;
678	case VFIO_PCI_REQ_IRQ_INDEX:
679		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
680		case VFIO_IRQ_SET_ACTION_TRIGGER:
681			func = vfio_pci_set_req_trigger;
682			break;
683		}
684		break;
685	}
686
687	if (!func)
688		return -ENOTTY;
689
690	return func(vdev, index, start, count, flags, data);
691}
v4.17
 
  1/*
  2 * VFIO PCI interrupt handling
  3 *
  4 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  5 *     Author: Alex Williamson <alex.williamson@redhat.com>
  6 *
  7 * This program is free software; you can redistribute it and/or modify
  8 * it under the terms of the GNU General Public License version 2 as
  9 * published by the Free Software Foundation.
 10 *
 11 * Derived from original vfio:
 12 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 13 * Author: Tom Lyon, pugs@cisco.com
 14 */
 15
 16#include <linux/device.h>
 17#include <linux/interrupt.h>
 18#include <linux/eventfd.h>
 19#include <linux/msi.h>
 20#include <linux/pci.h>
 21#include <linux/file.h>
 22#include <linux/vfio.h>
 23#include <linux/wait.h>
 24#include <linux/slab.h>
 25
 26#include "vfio_pci_private.h"
 27
 28/*
 29 * INTx
 30 */
 31static void vfio_send_intx_eventfd(void *opaque, void *unused)
 32{
 33	struct vfio_pci_device *vdev = opaque;
 34
 35	if (likely(is_intx(vdev) && !vdev->virq_disabled))
 36		eventfd_signal(vdev->ctx[0].trigger, 1);
 37}
 38
 39void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
 40{
 41	struct pci_dev *pdev = vdev->pdev;
 42	unsigned long flags;
 43
 44	spin_lock_irqsave(&vdev->irqlock, flags);
 45
 46	/*
 47	 * Masking can come from interrupt, ioctl, or config space
 48	 * via INTx disable.  The latter means this can get called
 49	 * even when not using intx delivery.  In this case, just
 50	 * try to have the physical bit follow the virtual bit.
 51	 */
 52	if (unlikely(!is_intx(vdev))) {
 53		if (vdev->pci_2_3)
 54			pci_intx(pdev, 0);
 55	} else if (!vdev->ctx[0].masked) {
 56		/*
 57		 * Can't use check_and_mask here because we always want to
 58		 * mask, not just when something is pending.
 59		 */
 60		if (vdev->pci_2_3)
 61			pci_intx(pdev, 0);
 62		else
 63			disable_irq_nosync(pdev->irq);
 64
 65		vdev->ctx[0].masked = true;
 66	}
 67
 68	spin_unlock_irqrestore(&vdev->irqlock, flags);
 69}
 70
 71/*
 72 * If this is triggered by an eventfd, we can't call eventfd_signal
 73 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
 74 * a signal is necessary, which can then be handled via a work queue
 75 * or directly depending on the caller.
 76 */
 77static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
 78{
 79	struct vfio_pci_device *vdev = opaque;
 80	struct pci_dev *pdev = vdev->pdev;
 81	unsigned long flags;
 82	int ret = 0;
 83
 84	spin_lock_irqsave(&vdev->irqlock, flags);
 85
 86	/*
 87	 * Unmasking comes from ioctl or config, so again, have the
 88	 * physical bit follow the virtual even when not using INTx.
 89	 */
 90	if (unlikely(!is_intx(vdev))) {
 91		if (vdev->pci_2_3)
 92			pci_intx(pdev, 1);
 93	} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
 94		/*
 95		 * A pending interrupt here would immediately trigger,
 96		 * but we can avoid that overhead by just re-sending
 97		 * the interrupt to the user.
 98		 */
 99		if (vdev->pci_2_3) {
100			if (!pci_check_and_unmask_intx(pdev))
101				ret = 1;
102		} else
103			enable_irq(pdev->irq);
104
105		vdev->ctx[0].masked = (ret > 0);
106	}
107
108	spin_unlock_irqrestore(&vdev->irqlock, flags);
109
110	return ret;
111}
112
113void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
114{
115	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
116		vfio_send_intx_eventfd(vdev, NULL);
117}
118
119static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
120{
121	struct vfio_pci_device *vdev = dev_id;
122	unsigned long flags;
123	int ret = IRQ_NONE;
124
125	spin_lock_irqsave(&vdev->irqlock, flags);
126
127	if (!vdev->pci_2_3) {
128		disable_irq_nosync(vdev->pdev->irq);
129		vdev->ctx[0].masked = true;
130		ret = IRQ_HANDLED;
131	} else if (!vdev->ctx[0].masked &&  /* may be shared */
132		   pci_check_and_mask_intx(vdev->pdev)) {
133		vdev->ctx[0].masked = true;
134		ret = IRQ_HANDLED;
135	}
136
137	spin_unlock_irqrestore(&vdev->irqlock, flags);
138
139	if (ret == IRQ_HANDLED)
140		vfio_send_intx_eventfd(vdev, NULL);
141
142	return ret;
143}
144
145static int vfio_intx_enable(struct vfio_pci_device *vdev)
146{
147	if (!is_irq_none(vdev))
148		return -EINVAL;
149
150	if (!vdev->pdev->irq)
151		return -ENODEV;
152
153	vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
154	if (!vdev->ctx)
155		return -ENOMEM;
156
157	vdev->num_ctx = 1;
158
159	/*
160	 * If the virtual interrupt is masked, restore it.  Devices
161	 * supporting DisINTx can be masked at the hardware level
162	 * here, non-PCI-2.3 devices will have to wait until the
163	 * interrupt is enabled.
164	 */
165	vdev->ctx[0].masked = vdev->virq_disabled;
166	if (vdev->pci_2_3)
167		pci_intx(vdev->pdev, !vdev->ctx[0].masked);
168
169	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
170
171	return 0;
172}
173
174static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
175{
176	struct pci_dev *pdev = vdev->pdev;
177	unsigned long irqflags = IRQF_SHARED;
178	struct eventfd_ctx *trigger;
179	unsigned long flags;
180	int ret;
181
182	if (vdev->ctx[0].trigger) {
183		free_irq(pdev->irq, vdev);
184		kfree(vdev->ctx[0].name);
185		eventfd_ctx_put(vdev->ctx[0].trigger);
186		vdev->ctx[0].trigger = NULL;
187	}
188
189	if (fd < 0) /* Disable only */
190		return 0;
191
192	vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
193				      pci_name(pdev));
194	if (!vdev->ctx[0].name)
195		return -ENOMEM;
196
197	trigger = eventfd_ctx_fdget(fd);
198	if (IS_ERR(trigger)) {
199		kfree(vdev->ctx[0].name);
200		return PTR_ERR(trigger);
201	}
202
203	vdev->ctx[0].trigger = trigger;
204
205	if (!vdev->pci_2_3)
206		irqflags = 0;
207
208	ret = request_irq(pdev->irq, vfio_intx_handler,
209			  irqflags, vdev->ctx[0].name, vdev);
210	if (ret) {
211		vdev->ctx[0].trigger = NULL;
212		kfree(vdev->ctx[0].name);
213		eventfd_ctx_put(trigger);
214		return ret;
215	}
216
217	/*
218	 * INTx disable will stick across the new irq setup,
219	 * disable_irq won't.
220	 */
221	spin_lock_irqsave(&vdev->irqlock, flags);
222	if (!vdev->pci_2_3 && vdev->ctx[0].masked)
223		disable_irq_nosync(pdev->irq);
224	spin_unlock_irqrestore(&vdev->irqlock, flags);
225
226	return 0;
227}
228
229static void vfio_intx_disable(struct vfio_pci_device *vdev)
230{
231	vfio_virqfd_disable(&vdev->ctx[0].unmask);
232	vfio_virqfd_disable(&vdev->ctx[0].mask);
233	vfio_intx_set_signal(vdev, -1);
234	vdev->irq_type = VFIO_PCI_NUM_IRQS;
235	vdev->num_ctx = 0;
236	kfree(vdev->ctx);
237}
238
239/*
240 * MSI/MSI-X
241 */
242static irqreturn_t vfio_msihandler(int irq, void *arg)
243{
244	struct eventfd_ctx *trigger = arg;
245
246	eventfd_signal(trigger, 1);
247	return IRQ_HANDLED;
248}
249
250static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
251{
252	struct pci_dev *pdev = vdev->pdev;
253	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
254	int ret;
 
255
256	if (!is_irq_none(vdev))
257		return -EINVAL;
258
259	vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
260	if (!vdev->ctx)
261		return -ENOMEM;
262
263	/* return the number of supported vectors if we can't get all: */
 
264	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
265	if (ret < nvec) {
266		if (ret > 0)
267			pci_free_irq_vectors(pdev);
 
268		kfree(vdev->ctx);
269		return ret;
270	}
 
271
272	vdev->num_ctx = nvec;
273	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
274				VFIO_PCI_MSI_IRQ_INDEX;
275
276	if (!msix) {
277		/*
278		 * Compute the virtual hardware field for max msi vectors -
279		 * it is the log base 2 of the number of vectors.
280		 */
281		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
282	}
283
284	return 0;
285}
286
287static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
288				      int vector, int fd, bool msix)
289{
290	struct pci_dev *pdev = vdev->pdev;
291	struct eventfd_ctx *trigger;
292	int irq, ret;
 
293
294	if (vector < 0 || vector >= vdev->num_ctx)
295		return -EINVAL;
296
297	irq = pci_irq_vector(pdev, vector);
298
299	if (vdev->ctx[vector].trigger) {
 
 
 
300		free_irq(irq, vdev->ctx[vector].trigger);
301		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
 
302		kfree(vdev->ctx[vector].name);
303		eventfd_ctx_put(vdev->ctx[vector].trigger);
304		vdev->ctx[vector].trigger = NULL;
305	}
306
307	if (fd < 0)
308		return 0;
309
310	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
311					   msix ? "x" : "", vector,
312					   pci_name(pdev));
313	if (!vdev->ctx[vector].name)
314		return -ENOMEM;
315
316	trigger = eventfd_ctx_fdget(fd);
317	if (IS_ERR(trigger)) {
318		kfree(vdev->ctx[vector].name);
319		return PTR_ERR(trigger);
320	}
321
322	/*
323	 * The MSIx vector table resides in device memory which may be cleared
324	 * via backdoor resets. We don't allow direct access to the vector
325	 * table so even if a userspace driver attempts to save/restore around
326	 * such a reset it would be unsuccessful. To avoid this, restore the
327	 * cached value of the message prior to enabling.
328	 */
 
329	if (msix) {
330		struct msi_msg msg;
331
332		get_cached_msi_msg(irq, &msg);
333		pci_write_msi_msg(irq, &msg);
334	}
335
336	ret = request_irq(irq, vfio_msihandler, 0,
337			  vdev->ctx[vector].name, trigger);
 
338	if (ret) {
339		kfree(vdev->ctx[vector].name);
340		eventfd_ctx_put(trigger);
341		return ret;
342	}
343
344	vdev->ctx[vector].producer.token = trigger;
345	vdev->ctx[vector].producer.irq = irq;
346	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
347	if (unlikely(ret))
348		dev_info(&pdev->dev,
349		"irq bypass producer (token %p) registration fails: %d\n",
350		vdev->ctx[vector].producer.token, ret);
351
352	vdev->ctx[vector].trigger = trigger;
353
354	return 0;
355}
356
357static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
358			      unsigned count, int32_t *fds, bool msix)
359{
360	int i, j, ret = 0;
361
362	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
363		return -EINVAL;
364
365	for (i = 0, j = start; i < count && !ret; i++, j++) {
366		int fd = fds ? fds[i] : -1;
367		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
368	}
369
370	if (ret) {
371		for (--j; j >= (int)start; j--)
372			vfio_msi_set_vector_signal(vdev, j, -1, msix);
373	}
374
375	return ret;
376}
377
378static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
379{
380	struct pci_dev *pdev = vdev->pdev;
381	int i;
 
382
383	for (i = 0; i < vdev->num_ctx; i++) {
384		vfio_virqfd_disable(&vdev->ctx[i].unmask);
385		vfio_virqfd_disable(&vdev->ctx[i].mask);
386	}
387
388	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
389
 
390	pci_free_irq_vectors(pdev);
 
391
392	/*
393	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
394	 * via their shutdown paths.  Restore for NoINTx devices.
395	 */
396	if (vdev->nointx)
397		pci_intx(pdev, 0);
398
399	vdev->irq_type = VFIO_PCI_NUM_IRQS;
400	vdev->num_ctx = 0;
401	kfree(vdev->ctx);
402}
403
404/*
405 * IOCTL support
406 */
407static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
408				    unsigned index, unsigned start,
409				    unsigned count, uint32_t flags, void *data)
410{
411	if (!is_intx(vdev) || start != 0 || count != 1)
412		return -EINVAL;
413
414	if (flags & VFIO_IRQ_SET_DATA_NONE) {
415		vfio_pci_intx_unmask(vdev);
416	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
417		uint8_t unmask = *(uint8_t *)data;
418		if (unmask)
419			vfio_pci_intx_unmask(vdev);
420	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
421		int32_t fd = *(int32_t *)data;
422		if (fd >= 0)
423			return vfio_virqfd_enable((void *) vdev,
424						  vfio_pci_intx_unmask_handler,
425						  vfio_send_intx_eventfd, NULL,
426						  &vdev->ctx[0].unmask, fd);
427
428		vfio_virqfd_disable(&vdev->ctx[0].unmask);
429	}
430
431	return 0;
432}
433
434static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
435				  unsigned index, unsigned start,
436				  unsigned count, uint32_t flags, void *data)
437{
438	if (!is_intx(vdev) || start != 0 || count != 1)
439		return -EINVAL;
440
441	if (flags & VFIO_IRQ_SET_DATA_NONE) {
442		vfio_pci_intx_mask(vdev);
443	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
444		uint8_t mask = *(uint8_t *)data;
445		if (mask)
446			vfio_pci_intx_mask(vdev);
447	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
448		return -ENOTTY; /* XXX implement me */
449	}
450
451	return 0;
452}
453
454static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
455				     unsigned index, unsigned start,
456				     unsigned count, uint32_t flags, void *data)
457{
458	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
459		vfio_intx_disable(vdev);
460		return 0;
461	}
462
463	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
464		return -EINVAL;
465
466	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
467		int32_t fd = *(int32_t *)data;
468		int ret;
469
470		if (is_intx(vdev))
471			return vfio_intx_set_signal(vdev, fd);
472
473		ret = vfio_intx_enable(vdev);
474		if (ret)
475			return ret;
476
477		ret = vfio_intx_set_signal(vdev, fd);
478		if (ret)
479			vfio_intx_disable(vdev);
480
481		return ret;
482	}
483
484	if (!is_intx(vdev))
485		return -EINVAL;
486
487	if (flags & VFIO_IRQ_SET_DATA_NONE) {
488		vfio_send_intx_eventfd(vdev, NULL);
489	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
490		uint8_t trigger = *(uint8_t *)data;
491		if (trigger)
492			vfio_send_intx_eventfd(vdev, NULL);
493	}
494	return 0;
495}
496
497static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
498				    unsigned index, unsigned start,
499				    unsigned count, uint32_t flags, void *data)
500{
501	int i;
502	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
503
504	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
505		vfio_msi_disable(vdev, msix);
506		return 0;
507	}
508
509	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
510		return -EINVAL;
511
512	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
513		int32_t *fds = data;
514		int ret;
515
516		if (vdev->irq_type == index)
517			return vfio_msi_set_block(vdev, start, count,
518						  fds, msix);
519
520		ret = vfio_msi_enable(vdev, start + count, msix);
521		if (ret)
522			return ret;
523
524		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
525		if (ret)
526			vfio_msi_disable(vdev, msix);
527
528		return ret;
529	}
530
531	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
532		return -EINVAL;
533
534	for (i = start; i < start + count; i++) {
535		if (!vdev->ctx[i].trigger)
536			continue;
537		if (flags & VFIO_IRQ_SET_DATA_NONE) {
538			eventfd_signal(vdev->ctx[i].trigger, 1);
539		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
540			uint8_t *bools = data;
541			if (bools[i - start])
542				eventfd_signal(vdev->ctx[i].trigger, 1);
543		}
544	}
545	return 0;
546}
547
548static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
549					   unsigned int count, uint32_t flags,
550					   void *data)
551{
552	/* DATA_NONE/DATA_BOOL enables loopback testing */
553	if (flags & VFIO_IRQ_SET_DATA_NONE) {
554		if (*ctx) {
555			if (count) {
556				eventfd_signal(*ctx, 1);
557			} else {
558				eventfd_ctx_put(*ctx);
559				*ctx = NULL;
560			}
561			return 0;
562		}
563	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
564		uint8_t trigger;
565
566		if (!count)
567			return -EINVAL;
568
569		trigger = *(uint8_t *)data;
570		if (trigger && *ctx)
571			eventfd_signal(*ctx, 1);
572
573		return 0;
574	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
575		int32_t fd;
576
577		if (!count)
578			return -EINVAL;
579
580		fd = *(int32_t *)data;
581		if (fd == -1) {
582			if (*ctx)
583				eventfd_ctx_put(*ctx);
584			*ctx = NULL;
585		} else if (fd >= 0) {
586			struct eventfd_ctx *efdctx;
587
588			efdctx = eventfd_ctx_fdget(fd);
589			if (IS_ERR(efdctx))
590				return PTR_ERR(efdctx);
591
592			if (*ctx)
593				eventfd_ctx_put(*ctx);
594
595			*ctx = efdctx;
596		}
597		return 0;
598	}
599
600	return -EINVAL;
601}
602
603static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
604				    unsigned index, unsigned start,
605				    unsigned count, uint32_t flags, void *data)
606{
607	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
608		return -EINVAL;
609
610	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
611					       count, flags, data);
612}
613
614static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
615				    unsigned index, unsigned start,
616				    unsigned count, uint32_t flags, void *data)
617{
618	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
619		return -EINVAL;
620
621	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
622					       count, flags, data);
623}
624
625int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
626			    unsigned index, unsigned start, unsigned count,
627			    void *data)
628{
629	int (*func)(struct vfio_pci_device *vdev, unsigned index,
630		    unsigned start, unsigned count, uint32_t flags,
631		    void *data) = NULL;
632
633	switch (index) {
634	case VFIO_PCI_INTX_IRQ_INDEX:
635		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
636		case VFIO_IRQ_SET_ACTION_MASK:
637			func = vfio_pci_set_intx_mask;
638			break;
639		case VFIO_IRQ_SET_ACTION_UNMASK:
640			func = vfio_pci_set_intx_unmask;
641			break;
642		case VFIO_IRQ_SET_ACTION_TRIGGER:
643			func = vfio_pci_set_intx_trigger;
644			break;
645		}
646		break;
647	case VFIO_PCI_MSI_IRQ_INDEX:
648	case VFIO_PCI_MSIX_IRQ_INDEX:
649		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
650		case VFIO_IRQ_SET_ACTION_MASK:
651		case VFIO_IRQ_SET_ACTION_UNMASK:
652			/* XXX Need masking support exported */
653			break;
654		case VFIO_IRQ_SET_ACTION_TRIGGER:
655			func = vfio_pci_set_msi_trigger;
656			break;
657		}
658		break;
659	case VFIO_PCI_ERR_IRQ_INDEX:
660		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
661		case VFIO_IRQ_SET_ACTION_TRIGGER:
662			if (pci_is_pcie(vdev->pdev))
663				func = vfio_pci_set_err_trigger;
664			break;
665		}
666		break;
667	case VFIO_PCI_REQ_IRQ_INDEX:
668		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
669		case VFIO_IRQ_SET_ACTION_TRIGGER:
670			func = vfio_pci_set_req_trigger;
671			break;
672		}
673		break;
674	}
675
676	if (!func)
677		return -ENOTTY;
678
679	return func(vdev, index, start, count, flags, data);
680}