Linux Audio

Check our new training course

Yocto distribution development and maintenance

Need a Yocto distribution for your embedded project?
Loading...
v6.8
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * VFIO PCI interrupt handling
  4 *
  5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  6 *     Author: Alex Williamson <alex.williamson@redhat.com>
  7 *
  8 * Derived from original vfio:
  9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 10 * Author: Tom Lyon, pugs@cisco.com
 11 */
 12
 13#include <linux/device.h>
 14#include <linux/interrupt.h>
 15#include <linux/eventfd.h>
 16#include <linux/msi.h>
 17#include <linux/pci.h>
 18#include <linux/file.h>
 19#include <linux/vfio.h>
 20#include <linux/wait.h>
 21#include <linux/slab.h>
 22
 23#include "vfio_pci_priv.h"
 24
 25struct vfio_pci_irq_ctx {
 26	struct eventfd_ctx	*trigger;
 27	struct virqfd		*unmask;
 28	struct virqfd		*mask;
 29	char			*name;
 30	bool			masked;
 31	struct irq_bypass_producer	producer;
 32};
 33
 34static bool irq_is(struct vfio_pci_core_device *vdev, int type)
 35{
 36	return vdev->irq_type == type;
 37}
 38
 39static bool is_intx(struct vfio_pci_core_device *vdev)
 40{
 41	return vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX;
 42}
 43
 44static bool is_irq_none(struct vfio_pci_core_device *vdev)
 45{
 46	return !(vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX ||
 47		 vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX ||
 48		 vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
 49}
 50
 51static
 52struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
 53					  unsigned long index)
 54{
 55	return xa_load(&vdev->ctx, index);
 56}
 57
 58static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
 59			      struct vfio_pci_irq_ctx *ctx, unsigned long index)
 60{
 61	xa_erase(&vdev->ctx, index);
 62	kfree(ctx);
 63}
 64
 65static struct vfio_pci_irq_ctx *
 66vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
 67{
 68	struct vfio_pci_irq_ctx *ctx;
 69	int ret;
 70
 71	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
 72	if (!ctx)
 73		return NULL;
 74
 75	ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
 76	if (ret) {
 77		kfree(ctx);
 78		return NULL;
 79	}
 80
 81	return ctx;
 82}
 83
 84/*
 85 * INTx
 86 */
 87static void vfio_send_intx_eventfd(void *opaque, void *unused)
 88{
 89	struct vfio_pci_core_device *vdev = opaque;
 90
 91	if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
 92		struct vfio_pci_irq_ctx *ctx;
 93
 94		ctx = vfio_irq_ctx_get(vdev, 0);
 95		if (WARN_ON_ONCE(!ctx))
 96			return;
 97		eventfd_signal(ctx->trigger);
 98	}
 99}
100
101/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
102bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
103{
104	struct pci_dev *pdev = vdev->pdev;
105	struct vfio_pci_irq_ctx *ctx;
106	unsigned long flags;
107	bool masked_changed = false;
108
109	spin_lock_irqsave(&vdev->irqlock, flags);
110
111	/*
112	 * Masking can come from interrupt, ioctl, or config space
113	 * via INTx disable.  The latter means this can get called
114	 * even when not using intx delivery.  In this case, just
115	 * try to have the physical bit follow the virtual bit.
116	 */
117	if (unlikely(!is_intx(vdev))) {
118		if (vdev->pci_2_3)
119			pci_intx(pdev, 0);
120		goto out_unlock;
121	}
122
123	ctx = vfio_irq_ctx_get(vdev, 0);
124	if (WARN_ON_ONCE(!ctx))
125		goto out_unlock;
126
127	if (!ctx->masked) {
128		/*
129		 * Can't use check_and_mask here because we always want to
130		 * mask, not just when something is pending.
131		 */
132		if (vdev->pci_2_3)
133			pci_intx(pdev, 0);
134		else
135			disable_irq_nosync(pdev->irq);
136
137		ctx->masked = true;
138		masked_changed = true;
139	}
140
141out_unlock:
142	spin_unlock_irqrestore(&vdev->irqlock, flags);
143	return masked_changed;
144}
145
146/*
147 * If this is triggered by an eventfd, we can't call eventfd_signal
148 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
149 * a signal is necessary, which can then be handled via a work queue
150 * or directly depending on the caller.
151 */
152static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
153{
154	struct vfio_pci_core_device *vdev = opaque;
155	struct pci_dev *pdev = vdev->pdev;
156	struct vfio_pci_irq_ctx *ctx;
157	unsigned long flags;
158	int ret = 0;
159
160	spin_lock_irqsave(&vdev->irqlock, flags);
161
162	/*
163	 * Unmasking comes from ioctl or config, so again, have the
164	 * physical bit follow the virtual even when not using INTx.
165	 */
166	if (unlikely(!is_intx(vdev))) {
167		if (vdev->pci_2_3)
168			pci_intx(pdev, 1);
169		goto out_unlock;
170	}
171
172	ctx = vfio_irq_ctx_get(vdev, 0);
173	if (WARN_ON_ONCE(!ctx))
174		goto out_unlock;
175
176	if (ctx->masked && !vdev->virq_disabled) {
177		/*
178		 * A pending interrupt here would immediately trigger,
179		 * but we can avoid that overhead by just re-sending
180		 * the interrupt to the user.
181		 */
182		if (vdev->pci_2_3) {
183			if (!pci_check_and_unmask_intx(pdev))
184				ret = 1;
185		} else
186			enable_irq(pdev->irq);
187
188		ctx->masked = (ret > 0);
189	}
190
191out_unlock:
192	spin_unlock_irqrestore(&vdev->irqlock, flags);
193
194	return ret;
195}
196
197void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
198{
199	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
200		vfio_send_intx_eventfd(vdev, NULL);
201}
202
203static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
204{
205	struct vfio_pci_core_device *vdev = dev_id;
206	struct vfio_pci_irq_ctx *ctx;
207	unsigned long flags;
208	int ret = IRQ_NONE;
209
210	ctx = vfio_irq_ctx_get(vdev, 0);
211	if (WARN_ON_ONCE(!ctx))
212		return ret;
213
214	spin_lock_irqsave(&vdev->irqlock, flags);
215
216	if (!vdev->pci_2_3) {
217		disable_irq_nosync(vdev->pdev->irq);
218		ctx->masked = true;
219		ret = IRQ_HANDLED;
220	} else if (!ctx->masked &&  /* may be shared */
221		   pci_check_and_mask_intx(vdev->pdev)) {
222		ctx->masked = true;
223		ret = IRQ_HANDLED;
224	}
225
226	spin_unlock_irqrestore(&vdev->irqlock, flags);
227
228	if (ret == IRQ_HANDLED)
229		vfio_send_intx_eventfd(vdev, NULL);
230
231	return ret;
232}
233
234static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
235{
236	struct vfio_pci_irq_ctx *ctx;
237
238	if (!is_irq_none(vdev))
239		return -EINVAL;
240
241	if (!vdev->pdev->irq)
242		return -ENODEV;
243
244	ctx = vfio_irq_ctx_alloc(vdev, 0);
245	if (!ctx)
246		return -ENOMEM;
247
 
 
248	/*
249	 * If the virtual interrupt is masked, restore it.  Devices
250	 * supporting DisINTx can be masked at the hardware level
251	 * here, non-PCI-2.3 devices will have to wait until the
252	 * interrupt is enabled.
253	 */
254	ctx->masked = vdev->virq_disabled;
255	if (vdev->pci_2_3)
256		pci_intx(vdev->pdev, !ctx->masked);
257
258	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
259
260	return 0;
261}
262
263static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
264{
265	struct pci_dev *pdev = vdev->pdev;
266	unsigned long irqflags = IRQF_SHARED;
267	struct vfio_pci_irq_ctx *ctx;
268	struct eventfd_ctx *trigger;
269	unsigned long flags;
270	int ret;
271
272	ctx = vfio_irq_ctx_get(vdev, 0);
273	if (WARN_ON_ONCE(!ctx))
274		return -EINVAL;
275
276	if (ctx->trigger) {
277		free_irq(pdev->irq, vdev);
278		kfree(ctx->name);
279		eventfd_ctx_put(ctx->trigger);
280		ctx->trigger = NULL;
281	}
282
283	if (fd < 0) /* Disable only */
284		return 0;
285
286	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
287			      pci_name(pdev));
288	if (!ctx->name)
289		return -ENOMEM;
290
291	trigger = eventfd_ctx_fdget(fd);
292	if (IS_ERR(trigger)) {
293		kfree(ctx->name);
294		return PTR_ERR(trigger);
295	}
296
297	ctx->trigger = trigger;
298
299	if (!vdev->pci_2_3)
300		irqflags = 0;
301
302	ret = request_irq(pdev->irq, vfio_intx_handler,
303			  irqflags, ctx->name, vdev);
304	if (ret) {
305		ctx->trigger = NULL;
306		kfree(ctx->name);
307		eventfd_ctx_put(trigger);
308		return ret;
309	}
310
311	/*
312	 * INTx disable will stick across the new irq setup,
313	 * disable_irq won't.
314	 */
315	spin_lock_irqsave(&vdev->irqlock, flags);
316	if (!vdev->pci_2_3 && ctx->masked)
317		disable_irq_nosync(pdev->irq);
318	spin_unlock_irqrestore(&vdev->irqlock, flags);
319
320	return 0;
321}
322
323static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
324{
325	struct vfio_pci_irq_ctx *ctx;
326
327	ctx = vfio_irq_ctx_get(vdev, 0);
328	WARN_ON_ONCE(!ctx);
329	if (ctx) {
330		vfio_virqfd_disable(&ctx->unmask);
331		vfio_virqfd_disable(&ctx->mask);
332	}
333	vfio_intx_set_signal(vdev, -1);
334	vdev->irq_type = VFIO_PCI_NUM_IRQS;
335	vfio_irq_ctx_free(vdev, ctx, 0);
 
336}
337
338/*
339 * MSI/MSI-X
340 */
341static irqreturn_t vfio_msihandler(int irq, void *arg)
342{
343	struct eventfd_ctx *trigger = arg;
344
345	eventfd_signal(trigger);
346	return IRQ_HANDLED;
347}
348
349static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix)
350{
351	struct pci_dev *pdev = vdev->pdev;
352	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
353	int ret;
354	u16 cmd;
355
356	if (!is_irq_none(vdev))
357		return -EINVAL;
358
 
 
 
 
359	/* return the number of supported vectors if we can't get all: */
360	cmd = vfio_pci_memory_lock_and_enable(vdev);
361	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
362	if (ret < nvec) {
363		if (ret > 0)
364			pci_free_irq_vectors(pdev);
365		vfio_pci_memory_unlock_and_restore(vdev, cmd);
 
366		return ret;
367	}
368	vfio_pci_memory_unlock_and_restore(vdev, cmd);
369
 
370	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
371				VFIO_PCI_MSI_IRQ_INDEX;
372
373	if (!msix) {
374		/*
375		 * Compute the virtual hardware field for max msi vectors -
376		 * it is the log base 2 of the number of vectors.
377		 */
378		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
379	}
380
381	return 0;
382}
383
384/*
385 * vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
386 * interrupt vector. If a Linux IRQ number is not available then a new
387 * interrupt is allocated if dynamic MSI-X is supported.
388 *
389 * Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
390 * essentially forming a cache that subsequent allocations can draw from.
391 * Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
392 * disabled.
393 */
394static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
395			      unsigned int vector, bool msix)
396{
397	struct pci_dev *pdev = vdev->pdev;
398	struct msi_map map;
399	int irq;
400	u16 cmd;
401
402	irq = pci_irq_vector(pdev, vector);
403	if (WARN_ON_ONCE(irq == 0))
404		return -EINVAL;
405	if (irq > 0 || !msix || !vdev->has_dyn_msix)
406		return irq;
407
408	cmd = vfio_pci_memory_lock_and_enable(vdev);
409	map = pci_msix_alloc_irq_at(pdev, vector, NULL);
410	vfio_pci_memory_unlock_and_restore(vdev, cmd);
411
412	return map.index < 0 ? map.index : map.virq;
413}
414
415static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
416				      unsigned int vector, int fd, bool msix)
417{
418	struct pci_dev *pdev = vdev->pdev;
419	struct vfio_pci_irq_ctx *ctx;
420	struct eventfd_ctx *trigger;
421	int irq = -EINVAL, ret;
422	u16 cmd;
423
424	ctx = vfio_irq_ctx_get(vdev, vector);
 
425
426	if (ctx) {
427		irq_bypass_unregister_producer(&ctx->producer);
428		irq = pci_irq_vector(pdev, vector);
429		cmd = vfio_pci_memory_lock_and_enable(vdev);
430		free_irq(irq, ctx->trigger);
431		vfio_pci_memory_unlock_and_restore(vdev, cmd);
432		/* Interrupt stays allocated, will be freed at MSI-X disable. */
433		kfree(ctx->name);
434		eventfd_ctx_put(ctx->trigger);
435		vfio_irq_ctx_free(vdev, ctx, vector);
436	}
437
438	if (fd < 0)
439		return 0;
440
441	if (irq == -EINVAL) {
442		/* Interrupt stays allocated, will be freed at MSI-X disable. */
443		irq = vfio_msi_alloc_irq(vdev, vector, msix);
444		if (irq < 0)
445			return irq;
446	}
447
448	ctx = vfio_irq_ctx_alloc(vdev, vector);
449	if (!ctx)
450		return -ENOMEM;
451
452	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
453			      msix ? "x" : "", vector, pci_name(pdev));
454	if (!ctx->name) {
455		ret = -ENOMEM;
456		goto out_free_ctx;
457	}
458
459	trigger = eventfd_ctx_fdget(fd);
460	if (IS_ERR(trigger)) {
461		ret = PTR_ERR(trigger);
462		goto out_free_name;
463	}
464
465	/*
466	 * If the vector was previously allocated, refresh the on-device
467	 * message data before enabling in case it had been cleared or
468	 * corrupted (e.g. due to backdoor resets) since writing.
 
 
469	 */
470	cmd = vfio_pci_memory_lock_and_enable(vdev);
471	if (msix) {
472		struct msi_msg msg;
473
474		get_cached_msi_msg(irq, &msg);
475		pci_write_msi_msg(irq, &msg);
476	}
477
478	ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
 
479	vfio_pci_memory_unlock_and_restore(vdev, cmd);
480	if (ret)
481		goto out_put_eventfd_ctx;
 
 
 
482
483	ctx->producer.token = trigger;
484	ctx->producer.irq = irq;
485	ret = irq_bypass_register_producer(&ctx->producer);
486	if (unlikely(ret)) {
487		dev_info(&pdev->dev,
488		"irq bypass producer (token %p) registration fails: %d\n",
489		ctx->producer.token, ret);
490
491		ctx->producer.token = NULL;
492	}
493	ctx->trigger = trigger;
494
495	return 0;
496
497out_put_eventfd_ctx:
498	eventfd_ctx_put(trigger);
499out_free_name:
500	kfree(ctx->name);
501out_free_ctx:
502	vfio_irq_ctx_free(vdev, ctx, vector);
503	return ret;
504}
505
506static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
507			      unsigned count, int32_t *fds, bool msix)
508{
509	unsigned int i, j;
510	int ret = 0;
 
 
511
512	for (i = 0, j = start; i < count && !ret; i++, j++) {
513		int fd = fds ? fds[i] : -1;
514		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
515	}
516
517	if (ret) {
518		for (i = start; i < j; i++)
519			vfio_msi_set_vector_signal(vdev, i, -1, msix);
520	}
521
522	return ret;
523}
524
525static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
526{
527	struct pci_dev *pdev = vdev->pdev;
528	struct vfio_pci_irq_ctx *ctx;
529	unsigned long i;
530	u16 cmd;
531
532	xa_for_each(&vdev->ctx, i, ctx) {
533		vfio_virqfd_disable(&ctx->unmask);
534		vfio_virqfd_disable(&ctx->mask);
535		vfio_msi_set_vector_signal(vdev, i, -1, msix);
536	}
537
 
 
538	cmd = vfio_pci_memory_lock_and_enable(vdev);
539	pci_free_irq_vectors(pdev);
540	vfio_pci_memory_unlock_and_restore(vdev, cmd);
541
542	/*
543	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
544	 * via their shutdown paths.  Restore for NoINTx devices.
545	 */
546	if (vdev->nointx)
547		pci_intx(pdev, 0);
548
549	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 
 
550}
551
552/*
553 * IOCTL support
554 */
555static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
556				    unsigned index, unsigned start,
557				    unsigned count, uint32_t flags, void *data)
558{
559	if (!is_intx(vdev) || start != 0 || count != 1)
560		return -EINVAL;
561
562	if (flags & VFIO_IRQ_SET_DATA_NONE) {
563		vfio_pci_intx_unmask(vdev);
564	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
565		uint8_t unmask = *(uint8_t *)data;
566		if (unmask)
567			vfio_pci_intx_unmask(vdev);
568	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
569		struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
570		int32_t fd = *(int32_t *)data;
571
572		if (WARN_ON_ONCE(!ctx))
573			return -EINVAL;
574		if (fd >= 0)
575			return vfio_virqfd_enable((void *) vdev,
576						  vfio_pci_intx_unmask_handler,
577						  vfio_send_intx_eventfd, NULL,
578						  &ctx->unmask, fd);
579
580		vfio_virqfd_disable(&ctx->unmask);
581	}
582
583	return 0;
584}
585
586static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
587				  unsigned index, unsigned start,
588				  unsigned count, uint32_t flags, void *data)
589{
590	if (!is_intx(vdev) || start != 0 || count != 1)
591		return -EINVAL;
592
593	if (flags & VFIO_IRQ_SET_DATA_NONE) {
594		vfio_pci_intx_mask(vdev);
595	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
596		uint8_t mask = *(uint8_t *)data;
597		if (mask)
598			vfio_pci_intx_mask(vdev);
599	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
600		return -ENOTTY; /* XXX implement me */
601	}
602
603	return 0;
604}
605
606static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
607				     unsigned index, unsigned start,
608				     unsigned count, uint32_t flags, void *data)
609{
610	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
611		vfio_intx_disable(vdev);
612		return 0;
613	}
614
615	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
616		return -EINVAL;
617
618	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
619		int32_t fd = *(int32_t *)data;
620		int ret;
621
622		if (is_intx(vdev))
623			return vfio_intx_set_signal(vdev, fd);
624
625		ret = vfio_intx_enable(vdev);
626		if (ret)
627			return ret;
628
629		ret = vfio_intx_set_signal(vdev, fd);
630		if (ret)
631			vfio_intx_disable(vdev);
632
633		return ret;
634	}
635
636	if (!is_intx(vdev))
637		return -EINVAL;
638
639	if (flags & VFIO_IRQ_SET_DATA_NONE) {
640		vfio_send_intx_eventfd(vdev, NULL);
641	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
642		uint8_t trigger = *(uint8_t *)data;
643		if (trigger)
644			vfio_send_intx_eventfd(vdev, NULL);
645	}
646	return 0;
647}
648
649static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
650				    unsigned index, unsigned start,
651				    unsigned count, uint32_t flags, void *data)
652{
653	struct vfio_pci_irq_ctx *ctx;
654	unsigned int i;
655	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
656
657	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
658		vfio_msi_disable(vdev, msix);
659		return 0;
660	}
661
662	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
663		return -EINVAL;
664
665	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
666		int32_t *fds = data;
667		int ret;
668
669		if (vdev->irq_type == index)
670			return vfio_msi_set_block(vdev, start, count,
671						  fds, msix);
672
673		ret = vfio_msi_enable(vdev, start + count, msix);
674		if (ret)
675			return ret;
676
677		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
678		if (ret)
679			vfio_msi_disable(vdev, msix);
680
681		return ret;
682	}
683
684	if (!irq_is(vdev, index))
685		return -EINVAL;
686
687	for (i = start; i < start + count; i++) {
688		ctx = vfio_irq_ctx_get(vdev, i);
689		if (!ctx)
690			continue;
691		if (flags & VFIO_IRQ_SET_DATA_NONE) {
692			eventfd_signal(ctx->trigger);
693		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
694			uint8_t *bools = data;
695			if (bools[i - start])
696				eventfd_signal(ctx->trigger);
697		}
698	}
699	return 0;
700}
701
702static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
703					   unsigned int count, uint32_t flags,
704					   void *data)
705{
706	/* DATA_NONE/DATA_BOOL enables loopback testing */
707	if (flags & VFIO_IRQ_SET_DATA_NONE) {
708		if (*ctx) {
709			if (count) {
710				eventfd_signal(*ctx);
711			} else {
712				eventfd_ctx_put(*ctx);
713				*ctx = NULL;
714			}
715			return 0;
716		}
717	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
718		uint8_t trigger;
719
720		if (!count)
721			return -EINVAL;
722
723		trigger = *(uint8_t *)data;
724		if (trigger && *ctx)
725			eventfd_signal(*ctx);
726
727		return 0;
728	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
729		int32_t fd;
730
731		if (!count)
732			return -EINVAL;
733
734		fd = *(int32_t *)data;
735		if (fd == -1) {
736			if (*ctx)
737				eventfd_ctx_put(*ctx);
738			*ctx = NULL;
739		} else if (fd >= 0) {
740			struct eventfd_ctx *efdctx;
741
742			efdctx = eventfd_ctx_fdget(fd);
743			if (IS_ERR(efdctx))
744				return PTR_ERR(efdctx);
745
746			if (*ctx)
747				eventfd_ctx_put(*ctx);
748
749			*ctx = efdctx;
750		}
751		return 0;
752	}
753
754	return -EINVAL;
755}
756
757static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
758				    unsigned index, unsigned start,
759				    unsigned count, uint32_t flags, void *data)
760{
761	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
762		return -EINVAL;
763
764	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
765					       count, flags, data);
766}
767
768static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
769				    unsigned index, unsigned start,
770				    unsigned count, uint32_t flags, void *data)
771{
772	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
773		return -EINVAL;
774
775	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
776					       count, flags, data);
777}
778
779int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
780			    unsigned index, unsigned start, unsigned count,
781			    void *data)
782{
783	int (*func)(struct vfio_pci_core_device *vdev, unsigned index,
784		    unsigned start, unsigned count, uint32_t flags,
785		    void *data) = NULL;
786
787	switch (index) {
788	case VFIO_PCI_INTX_IRQ_INDEX:
789		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
790		case VFIO_IRQ_SET_ACTION_MASK:
791			func = vfio_pci_set_intx_mask;
792			break;
793		case VFIO_IRQ_SET_ACTION_UNMASK:
794			func = vfio_pci_set_intx_unmask;
795			break;
796		case VFIO_IRQ_SET_ACTION_TRIGGER:
797			func = vfio_pci_set_intx_trigger;
798			break;
799		}
800		break;
801	case VFIO_PCI_MSI_IRQ_INDEX:
802	case VFIO_PCI_MSIX_IRQ_INDEX:
803		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
804		case VFIO_IRQ_SET_ACTION_MASK:
805		case VFIO_IRQ_SET_ACTION_UNMASK:
806			/* XXX Need masking support exported */
807			break;
808		case VFIO_IRQ_SET_ACTION_TRIGGER:
809			func = vfio_pci_set_msi_trigger;
810			break;
811		}
812		break;
813	case VFIO_PCI_ERR_IRQ_INDEX:
814		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
815		case VFIO_IRQ_SET_ACTION_TRIGGER:
816			if (pci_is_pcie(vdev->pdev))
817				func = vfio_pci_set_err_trigger;
818			break;
819		}
820		break;
821	case VFIO_PCI_REQ_IRQ_INDEX:
822		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
823		case VFIO_IRQ_SET_ACTION_TRIGGER:
824			func = vfio_pci_set_req_trigger;
825			break;
826		}
827		break;
828	}
829
830	if (!func)
831		return -ENOTTY;
832
833	return func(vdev, index, start, count, flags, data);
834}
v6.2
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * VFIO PCI interrupt handling
  4 *
  5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  6 *     Author: Alex Williamson <alex.williamson@redhat.com>
  7 *
  8 * Derived from original vfio:
  9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 10 * Author: Tom Lyon, pugs@cisco.com
 11 */
 12
 13#include <linux/device.h>
 14#include <linux/interrupt.h>
 15#include <linux/eventfd.h>
 16#include <linux/msi.h>
 17#include <linux/pci.h>
 18#include <linux/file.h>
 19#include <linux/vfio.h>
 20#include <linux/wait.h>
 21#include <linux/slab.h>
 22
 23#include "vfio_pci_priv.h"
 24
 25struct vfio_pci_irq_ctx {
 26	struct eventfd_ctx	*trigger;
 27	struct virqfd		*unmask;
 28	struct virqfd		*mask;
 29	char			*name;
 30	bool			masked;
 31	struct irq_bypass_producer	producer;
 32};
 33
 34static bool irq_is(struct vfio_pci_core_device *vdev, int type)
 35{
 36	return vdev->irq_type == type;
 37}
 38
 39static bool is_intx(struct vfio_pci_core_device *vdev)
 40{
 41	return vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX;
 42}
 43
 44static bool is_irq_none(struct vfio_pci_core_device *vdev)
 45{
 46	return !(vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX ||
 47		 vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX ||
 48		 vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
 49}
 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 51/*
 52 * INTx
 53 */
 54static void vfio_send_intx_eventfd(void *opaque, void *unused)
 55{
 56	struct vfio_pci_core_device *vdev = opaque;
 57
 58	if (likely(is_intx(vdev) && !vdev->virq_disabled))
 59		eventfd_signal(vdev->ctx[0].trigger, 1);
 
 
 
 
 
 
 60}
 61
 62/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
 63bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
 64{
 65	struct pci_dev *pdev = vdev->pdev;
 
 66	unsigned long flags;
 67	bool masked_changed = false;
 68
 69	spin_lock_irqsave(&vdev->irqlock, flags);
 70
 71	/*
 72	 * Masking can come from interrupt, ioctl, or config space
 73	 * via INTx disable.  The latter means this can get called
 74	 * even when not using intx delivery.  In this case, just
 75	 * try to have the physical bit follow the virtual bit.
 76	 */
 77	if (unlikely(!is_intx(vdev))) {
 78		if (vdev->pci_2_3)
 79			pci_intx(pdev, 0);
 80	} else if (!vdev->ctx[0].masked) {
 
 
 
 
 
 
 
 81		/*
 82		 * Can't use check_and_mask here because we always want to
 83		 * mask, not just when something is pending.
 84		 */
 85		if (vdev->pci_2_3)
 86			pci_intx(pdev, 0);
 87		else
 88			disable_irq_nosync(pdev->irq);
 89
 90		vdev->ctx[0].masked = true;
 91		masked_changed = true;
 92	}
 93
 
 94	spin_unlock_irqrestore(&vdev->irqlock, flags);
 95	return masked_changed;
 96}
 97
 98/*
 99 * If this is triggered by an eventfd, we can't call eventfd_signal
100 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
101 * a signal is necessary, which can then be handled via a work queue
102 * or directly depending on the caller.
103 */
104static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
105{
106	struct vfio_pci_core_device *vdev = opaque;
107	struct pci_dev *pdev = vdev->pdev;
 
108	unsigned long flags;
109	int ret = 0;
110
111	spin_lock_irqsave(&vdev->irqlock, flags);
112
113	/*
114	 * Unmasking comes from ioctl or config, so again, have the
115	 * physical bit follow the virtual even when not using INTx.
116	 */
117	if (unlikely(!is_intx(vdev))) {
118		if (vdev->pci_2_3)
119			pci_intx(pdev, 1);
120	} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
 
 
 
 
 
 
 
121		/*
122		 * A pending interrupt here would immediately trigger,
123		 * but we can avoid that overhead by just re-sending
124		 * the interrupt to the user.
125		 */
126		if (vdev->pci_2_3) {
127			if (!pci_check_and_unmask_intx(pdev))
128				ret = 1;
129		} else
130			enable_irq(pdev->irq);
131
132		vdev->ctx[0].masked = (ret > 0);
133	}
134
 
135	spin_unlock_irqrestore(&vdev->irqlock, flags);
136
137	return ret;
138}
139
140void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
141{
142	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
143		vfio_send_intx_eventfd(vdev, NULL);
144}
145
146static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
147{
148	struct vfio_pci_core_device *vdev = dev_id;
 
149	unsigned long flags;
150	int ret = IRQ_NONE;
151
 
 
 
 
152	spin_lock_irqsave(&vdev->irqlock, flags);
153
154	if (!vdev->pci_2_3) {
155		disable_irq_nosync(vdev->pdev->irq);
156		vdev->ctx[0].masked = true;
157		ret = IRQ_HANDLED;
158	} else if (!vdev->ctx[0].masked &&  /* may be shared */
159		   pci_check_and_mask_intx(vdev->pdev)) {
160		vdev->ctx[0].masked = true;
161		ret = IRQ_HANDLED;
162	}
163
164	spin_unlock_irqrestore(&vdev->irqlock, flags);
165
166	if (ret == IRQ_HANDLED)
167		vfio_send_intx_eventfd(vdev, NULL);
168
169	return ret;
170}
171
172static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
173{
 
 
174	if (!is_irq_none(vdev))
175		return -EINVAL;
176
177	if (!vdev->pdev->irq)
178		return -ENODEV;
179
180	vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
181	if (!vdev->ctx)
182		return -ENOMEM;
183
184	vdev->num_ctx = 1;
185
186	/*
187	 * If the virtual interrupt is masked, restore it.  Devices
188	 * supporting DisINTx can be masked at the hardware level
189	 * here, non-PCI-2.3 devices will have to wait until the
190	 * interrupt is enabled.
191	 */
192	vdev->ctx[0].masked = vdev->virq_disabled;
193	if (vdev->pci_2_3)
194		pci_intx(vdev->pdev, !vdev->ctx[0].masked);
195
196	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
197
198	return 0;
199}
200
201static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
202{
203	struct pci_dev *pdev = vdev->pdev;
204	unsigned long irqflags = IRQF_SHARED;
 
205	struct eventfd_ctx *trigger;
206	unsigned long flags;
207	int ret;
208
209	if (vdev->ctx[0].trigger) {
 
 
 
 
210		free_irq(pdev->irq, vdev);
211		kfree(vdev->ctx[0].name);
212		eventfd_ctx_put(vdev->ctx[0].trigger);
213		vdev->ctx[0].trigger = NULL;
214	}
215
216	if (fd < 0) /* Disable only */
217		return 0;
218
219	vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
220				      pci_name(pdev));
221	if (!vdev->ctx[0].name)
222		return -ENOMEM;
223
224	trigger = eventfd_ctx_fdget(fd);
225	if (IS_ERR(trigger)) {
226		kfree(vdev->ctx[0].name);
227		return PTR_ERR(trigger);
228	}
229
230	vdev->ctx[0].trigger = trigger;
231
232	if (!vdev->pci_2_3)
233		irqflags = 0;
234
235	ret = request_irq(pdev->irq, vfio_intx_handler,
236			  irqflags, vdev->ctx[0].name, vdev);
237	if (ret) {
238		vdev->ctx[0].trigger = NULL;
239		kfree(vdev->ctx[0].name);
240		eventfd_ctx_put(trigger);
241		return ret;
242	}
243
244	/*
245	 * INTx disable will stick across the new irq setup,
246	 * disable_irq won't.
247	 */
248	spin_lock_irqsave(&vdev->irqlock, flags);
249	if (!vdev->pci_2_3 && vdev->ctx[0].masked)
250		disable_irq_nosync(pdev->irq);
251	spin_unlock_irqrestore(&vdev->irqlock, flags);
252
253	return 0;
254}
255
256static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
257{
258	vfio_virqfd_disable(&vdev->ctx[0].unmask);
259	vfio_virqfd_disable(&vdev->ctx[0].mask);
 
 
 
 
 
 
260	vfio_intx_set_signal(vdev, -1);
261	vdev->irq_type = VFIO_PCI_NUM_IRQS;
262	vdev->num_ctx = 0;
263	kfree(vdev->ctx);
264}
265
266/*
267 * MSI/MSI-X
268 */
269static irqreturn_t vfio_msihandler(int irq, void *arg)
270{
271	struct eventfd_ctx *trigger = arg;
272
273	eventfd_signal(trigger, 1);
274	return IRQ_HANDLED;
275}
276
277static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix)
278{
279	struct pci_dev *pdev = vdev->pdev;
280	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
281	int ret;
282	u16 cmd;
283
284	if (!is_irq_none(vdev))
285		return -EINVAL;
286
287	vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
288	if (!vdev->ctx)
289		return -ENOMEM;
290
291	/* return the number of supported vectors if we can't get all: */
292	cmd = vfio_pci_memory_lock_and_enable(vdev);
293	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
294	if (ret < nvec) {
295		if (ret > 0)
296			pci_free_irq_vectors(pdev);
297		vfio_pci_memory_unlock_and_restore(vdev, cmd);
298		kfree(vdev->ctx);
299		return ret;
300	}
301	vfio_pci_memory_unlock_and_restore(vdev, cmd);
302
303	vdev->num_ctx = nvec;
304	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
305				VFIO_PCI_MSI_IRQ_INDEX;
306
307	if (!msix) {
308		/*
309		 * Compute the virtual hardware field for max msi vectors -
310		 * it is the log base 2 of the number of vectors.
311		 */
312		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
313	}
314
315	return 0;
316}
317
318static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
319				      int vector, int fd, bool msix)
 
 
 
 
 
 
 
 
 
 
320{
321	struct pci_dev *pdev = vdev->pdev;
322	struct eventfd_ctx *trigger;
323	int irq, ret;
324	u16 cmd;
325
326	if (vector < 0 || vector >= vdev->num_ctx)
 
327		return -EINVAL;
 
 
 
 
 
 
 
 
 
328
329	irq = pci_irq_vector(pdev, vector);
 
 
 
 
 
 
 
330
331	if (vdev->ctx[vector].trigger) {
332		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
333
 
 
 
334		cmd = vfio_pci_memory_lock_and_enable(vdev);
335		free_irq(irq, vdev->ctx[vector].trigger);
336		vfio_pci_memory_unlock_and_restore(vdev, cmd);
337
338		kfree(vdev->ctx[vector].name);
339		eventfd_ctx_put(vdev->ctx[vector].trigger);
340		vdev->ctx[vector].trigger = NULL;
341	}
342
343	if (fd < 0)
344		return 0;
345
346	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
347					   msix ? "x" : "", vector,
348					   pci_name(pdev));
349	if (!vdev->ctx[vector].name)
 
 
 
 
 
350		return -ENOMEM;
351
 
 
 
 
 
 
 
352	trigger = eventfd_ctx_fdget(fd);
353	if (IS_ERR(trigger)) {
354		kfree(vdev->ctx[vector].name);
355		return PTR_ERR(trigger);
356	}
357
358	/*
359	 * The MSIx vector table resides in device memory which may be cleared
360	 * via backdoor resets. We don't allow direct access to the vector
361	 * table so even if a userspace driver attempts to save/restore around
362	 * such a reset it would be unsuccessful. To avoid this, restore the
363	 * cached value of the message prior to enabling.
364	 */
365	cmd = vfio_pci_memory_lock_and_enable(vdev);
366	if (msix) {
367		struct msi_msg msg;
368
369		get_cached_msi_msg(irq, &msg);
370		pci_write_msi_msg(irq, &msg);
371	}
372
373	ret = request_irq(irq, vfio_msihandler, 0,
374			  vdev->ctx[vector].name, trigger);
375	vfio_pci_memory_unlock_and_restore(vdev, cmd);
376	if (ret) {
377		kfree(vdev->ctx[vector].name);
378		eventfd_ctx_put(trigger);
379		return ret;
380	}
381
382	vdev->ctx[vector].producer.token = trigger;
383	vdev->ctx[vector].producer.irq = irq;
384	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
385	if (unlikely(ret)) {
386		dev_info(&pdev->dev,
387		"irq bypass producer (token %p) registration fails: %d\n",
388		vdev->ctx[vector].producer.token, ret);
389
390		vdev->ctx[vector].producer.token = NULL;
391	}
392	vdev->ctx[vector].trigger = trigger;
393
394	return 0;
 
 
 
 
 
 
 
 
395}
396
397static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
398			      unsigned count, int32_t *fds, bool msix)
399{
400	int i, j, ret = 0;
401
402	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
403		return -EINVAL;
404
405	for (i = 0, j = start; i < count && !ret; i++, j++) {
406		int fd = fds ? fds[i] : -1;
407		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
408	}
409
410	if (ret) {
411		for (--j; j >= (int)start; j--)
412			vfio_msi_set_vector_signal(vdev, j, -1, msix);
413	}
414
415	return ret;
416}
417
418static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
419{
420	struct pci_dev *pdev = vdev->pdev;
421	int i;
 
422	u16 cmd;
423
424	for (i = 0; i < vdev->num_ctx; i++) {
425		vfio_virqfd_disable(&vdev->ctx[i].unmask);
426		vfio_virqfd_disable(&vdev->ctx[i].mask);
 
427	}
428
429	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
430
431	cmd = vfio_pci_memory_lock_and_enable(vdev);
432	pci_free_irq_vectors(pdev);
433	vfio_pci_memory_unlock_and_restore(vdev, cmd);
434
435	/*
436	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
437	 * via their shutdown paths.  Restore for NoINTx devices.
438	 */
439	if (vdev->nointx)
440		pci_intx(pdev, 0);
441
442	vdev->irq_type = VFIO_PCI_NUM_IRQS;
443	vdev->num_ctx = 0;
444	kfree(vdev->ctx);
445}
446
447/*
448 * IOCTL support
449 */
450static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
451				    unsigned index, unsigned start,
452				    unsigned count, uint32_t flags, void *data)
453{
454	if (!is_intx(vdev) || start != 0 || count != 1)
455		return -EINVAL;
456
457	if (flags & VFIO_IRQ_SET_DATA_NONE) {
458		vfio_pci_intx_unmask(vdev);
459	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
460		uint8_t unmask = *(uint8_t *)data;
461		if (unmask)
462			vfio_pci_intx_unmask(vdev);
463	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 
464		int32_t fd = *(int32_t *)data;
 
 
 
465		if (fd >= 0)
466			return vfio_virqfd_enable((void *) vdev,
467						  vfio_pci_intx_unmask_handler,
468						  vfio_send_intx_eventfd, NULL,
469						  &vdev->ctx[0].unmask, fd);
470
471		vfio_virqfd_disable(&vdev->ctx[0].unmask);
472	}
473
474	return 0;
475}
476
477static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
478				  unsigned index, unsigned start,
479				  unsigned count, uint32_t flags, void *data)
480{
481	if (!is_intx(vdev) || start != 0 || count != 1)
482		return -EINVAL;
483
484	if (flags & VFIO_IRQ_SET_DATA_NONE) {
485		vfio_pci_intx_mask(vdev);
486	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
487		uint8_t mask = *(uint8_t *)data;
488		if (mask)
489			vfio_pci_intx_mask(vdev);
490	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
491		return -ENOTTY; /* XXX implement me */
492	}
493
494	return 0;
495}
496
497static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
498				     unsigned index, unsigned start,
499				     unsigned count, uint32_t flags, void *data)
500{
501	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
502		vfio_intx_disable(vdev);
503		return 0;
504	}
505
506	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
507		return -EINVAL;
508
509	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
510		int32_t fd = *(int32_t *)data;
511		int ret;
512
513		if (is_intx(vdev))
514			return vfio_intx_set_signal(vdev, fd);
515
516		ret = vfio_intx_enable(vdev);
517		if (ret)
518			return ret;
519
520		ret = vfio_intx_set_signal(vdev, fd);
521		if (ret)
522			vfio_intx_disable(vdev);
523
524		return ret;
525	}
526
527	if (!is_intx(vdev))
528		return -EINVAL;
529
530	if (flags & VFIO_IRQ_SET_DATA_NONE) {
531		vfio_send_intx_eventfd(vdev, NULL);
532	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
533		uint8_t trigger = *(uint8_t *)data;
534		if (trigger)
535			vfio_send_intx_eventfd(vdev, NULL);
536	}
537	return 0;
538}
539
540static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
541				    unsigned index, unsigned start,
542				    unsigned count, uint32_t flags, void *data)
543{
544	int i;
 
545	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
546
547	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
548		vfio_msi_disable(vdev, msix);
549		return 0;
550	}
551
552	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
553		return -EINVAL;
554
555	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
556		int32_t *fds = data;
557		int ret;
558
559		if (vdev->irq_type == index)
560			return vfio_msi_set_block(vdev, start, count,
561						  fds, msix);
562
563		ret = vfio_msi_enable(vdev, start + count, msix);
564		if (ret)
565			return ret;
566
567		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
568		if (ret)
569			vfio_msi_disable(vdev, msix);
570
571		return ret;
572	}
573
574	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
575		return -EINVAL;
576
577	for (i = start; i < start + count; i++) {
578		if (!vdev->ctx[i].trigger)
 
579			continue;
580		if (flags & VFIO_IRQ_SET_DATA_NONE) {
581			eventfd_signal(vdev->ctx[i].trigger, 1);
582		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
583			uint8_t *bools = data;
584			if (bools[i - start])
585				eventfd_signal(vdev->ctx[i].trigger, 1);
586		}
587	}
588	return 0;
589}
590
591static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
592					   unsigned int count, uint32_t flags,
593					   void *data)
594{
595	/* DATA_NONE/DATA_BOOL enables loopback testing */
596	if (flags & VFIO_IRQ_SET_DATA_NONE) {
597		if (*ctx) {
598			if (count) {
599				eventfd_signal(*ctx, 1);
600			} else {
601				eventfd_ctx_put(*ctx);
602				*ctx = NULL;
603			}
604			return 0;
605		}
606	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
607		uint8_t trigger;
608
609		if (!count)
610			return -EINVAL;
611
612		trigger = *(uint8_t *)data;
613		if (trigger && *ctx)
614			eventfd_signal(*ctx, 1);
615
616		return 0;
617	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
618		int32_t fd;
619
620		if (!count)
621			return -EINVAL;
622
623		fd = *(int32_t *)data;
624		if (fd == -1) {
625			if (*ctx)
626				eventfd_ctx_put(*ctx);
627			*ctx = NULL;
628		} else if (fd >= 0) {
629			struct eventfd_ctx *efdctx;
630
631			efdctx = eventfd_ctx_fdget(fd);
632			if (IS_ERR(efdctx))
633				return PTR_ERR(efdctx);
634
635			if (*ctx)
636				eventfd_ctx_put(*ctx);
637
638			*ctx = efdctx;
639		}
640		return 0;
641	}
642
643	return -EINVAL;
644}
645
646static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
647				    unsigned index, unsigned start,
648				    unsigned count, uint32_t flags, void *data)
649{
650	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
651		return -EINVAL;
652
653	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
654					       count, flags, data);
655}
656
657static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
658				    unsigned index, unsigned start,
659				    unsigned count, uint32_t flags, void *data)
660{
661	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
662		return -EINVAL;
663
664	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
665					       count, flags, data);
666}
667
668int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
669			    unsigned index, unsigned start, unsigned count,
670			    void *data)
671{
672	int (*func)(struct vfio_pci_core_device *vdev, unsigned index,
673		    unsigned start, unsigned count, uint32_t flags,
674		    void *data) = NULL;
675
676	switch (index) {
677	case VFIO_PCI_INTX_IRQ_INDEX:
678		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
679		case VFIO_IRQ_SET_ACTION_MASK:
680			func = vfio_pci_set_intx_mask;
681			break;
682		case VFIO_IRQ_SET_ACTION_UNMASK:
683			func = vfio_pci_set_intx_unmask;
684			break;
685		case VFIO_IRQ_SET_ACTION_TRIGGER:
686			func = vfio_pci_set_intx_trigger;
687			break;
688		}
689		break;
690	case VFIO_PCI_MSI_IRQ_INDEX:
691	case VFIO_PCI_MSIX_IRQ_INDEX:
692		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
693		case VFIO_IRQ_SET_ACTION_MASK:
694		case VFIO_IRQ_SET_ACTION_UNMASK:
695			/* XXX Need masking support exported */
696			break;
697		case VFIO_IRQ_SET_ACTION_TRIGGER:
698			func = vfio_pci_set_msi_trigger;
699			break;
700		}
701		break;
702	case VFIO_PCI_ERR_IRQ_INDEX:
703		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
704		case VFIO_IRQ_SET_ACTION_TRIGGER:
705			if (pci_is_pcie(vdev->pdev))
706				func = vfio_pci_set_err_trigger;
707			break;
708		}
709		break;
710	case VFIO_PCI_REQ_IRQ_INDEX:
711		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
712		case VFIO_IRQ_SET_ACTION_TRIGGER:
713			func = vfio_pci_set_req_trigger;
714			break;
715		}
716		break;
717	}
718
719	if (!func)
720		return -ENOTTY;
721
722	return func(vdev, index, start, count, flags, data);
723}