Linux Audio

Check our new training course

Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * VFIO PCI interrupt handling
  4 *
  5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  6 *     Author: Alex Williamson <alex.williamson@redhat.com>
  7 *
  8 * Derived from original vfio:
  9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 10 * Author: Tom Lyon, pugs@cisco.com
 11 */
 12
 13#include <linux/device.h>
 14#include <linux/interrupt.h>
 15#include <linux/eventfd.h>
 16#include <linux/msi.h>
 17#include <linux/pci.h>
 18#include <linux/file.h>
 19#include <linux/vfio.h>
 20#include <linux/wait.h>
 21#include <linux/slab.h>
 22
 23#include "vfio_pci_priv.h"
 24
 25struct vfio_pci_irq_ctx {
 26	struct vfio_pci_core_device	*vdev;
 27	struct eventfd_ctx		*trigger;
 28	struct virqfd			*unmask;
 29	struct virqfd			*mask;
 30	char				*name;
 31	bool				masked;
 32	struct irq_bypass_producer	producer;
 33};
 34
 35static bool irq_is(struct vfio_pci_core_device *vdev, int type)
 36{
 37	return vdev->irq_type == type;
 38}
 39
 40static bool is_intx(struct vfio_pci_core_device *vdev)
 41{
 42	return vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX;
 43}
 44
 45static bool is_irq_none(struct vfio_pci_core_device *vdev)
 46{
 47	return !(vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX ||
 48		 vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX ||
 49		 vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
 50}
 51
 52static
 53struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
 54					  unsigned long index)
 55{
 56	return xa_load(&vdev->ctx, index);
 57}
 58
 59static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
 60			      struct vfio_pci_irq_ctx *ctx, unsigned long index)
 61{
 62	xa_erase(&vdev->ctx, index);
 63	kfree(ctx);
 64}
 65
 66static struct vfio_pci_irq_ctx *
 67vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
 68{
 69	struct vfio_pci_irq_ctx *ctx;
 70	int ret;
 71
 72	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
 73	if (!ctx)
 74		return NULL;
 75
 76	ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
 77	if (ret) {
 78		kfree(ctx);
 79		return NULL;
 80	}
 81
 82	return ctx;
 83}
 84
 85/*
 86 * INTx
 87 */
 88static void vfio_send_intx_eventfd(void *opaque, void *data)
 89{
 90	struct vfio_pci_core_device *vdev = opaque;
 91
 92	if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
 93		struct vfio_pci_irq_ctx *ctx = data;
 94		struct eventfd_ctx *trigger = READ_ONCE(ctx->trigger);
 95
 96		if (likely(trigger))
 97			eventfd_signal(trigger);
 98	}
 99}
100
101/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
102static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
103{
104	struct pci_dev *pdev = vdev->pdev;
105	struct vfio_pci_irq_ctx *ctx;
106	unsigned long flags;
107	bool masked_changed = false;
108
109	lockdep_assert_held(&vdev->igate);
110
111	spin_lock_irqsave(&vdev->irqlock, flags);
112
113	/*
114	 * Masking can come from interrupt, ioctl, or config space
115	 * via INTx disable.  The latter means this can get called
116	 * even when not using intx delivery.  In this case, just
117	 * try to have the physical bit follow the virtual bit.
118	 */
119	if (unlikely(!is_intx(vdev))) {
120		if (vdev->pci_2_3)
121			pci_intx(pdev, 0);
122		goto out_unlock;
123	}
124
125	ctx = vfio_irq_ctx_get(vdev, 0);
126	if (WARN_ON_ONCE(!ctx))
127		goto out_unlock;
128
129	if (!ctx->masked) {
130		/*
131		 * Can't use check_and_mask here because we always want to
132		 * mask, not just when something is pending.
133		 */
134		if (vdev->pci_2_3)
135			pci_intx(pdev, 0);
136		else
137			disable_irq_nosync(pdev->irq);
138
139		ctx->masked = true;
140		masked_changed = true;
141	}
142
143out_unlock:
144	spin_unlock_irqrestore(&vdev->irqlock, flags);
145	return masked_changed;
146}
147
148bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
149{
150	bool mask_changed;
151
152	mutex_lock(&vdev->igate);
153	mask_changed = __vfio_pci_intx_mask(vdev);
154	mutex_unlock(&vdev->igate);
155
156	return mask_changed;
157}
158
159/*
160 * If this is triggered by an eventfd, we can't call eventfd_signal
161 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
162 * a signal is necessary, which can then be handled via a work queue
163 * or directly depending on the caller.
164 */
165static int vfio_pci_intx_unmask_handler(void *opaque, void *data)
166{
167	struct vfio_pci_core_device *vdev = opaque;
168	struct pci_dev *pdev = vdev->pdev;
169	struct vfio_pci_irq_ctx *ctx = data;
170	unsigned long flags;
171	int ret = 0;
172
173	spin_lock_irqsave(&vdev->irqlock, flags);
174
175	/*
176	 * Unmasking comes from ioctl or config, so again, have the
177	 * physical bit follow the virtual even when not using INTx.
178	 */
179	if (unlikely(!is_intx(vdev))) {
180		if (vdev->pci_2_3)
181			pci_intx(pdev, 1);
182		goto out_unlock;
183	}
184
185	if (ctx->masked && !vdev->virq_disabled) {
186		/*
187		 * A pending interrupt here would immediately trigger,
188		 * but we can avoid that overhead by just re-sending
189		 * the interrupt to the user.
190		 */
191		if (vdev->pci_2_3) {
192			if (!pci_check_and_unmask_intx(pdev))
193				ret = 1;
194		} else
195			enable_irq(pdev->irq);
196
197		ctx->masked = (ret > 0);
198	}
199
200out_unlock:
201	spin_unlock_irqrestore(&vdev->irqlock, flags);
202
203	return ret;
204}
205
206static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
207{
208	struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
209
210	lockdep_assert_held(&vdev->igate);
211
212	if (vfio_pci_intx_unmask_handler(vdev, ctx) > 0)
213		vfio_send_intx_eventfd(vdev, ctx);
214}
215
216void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
217{
218	mutex_lock(&vdev->igate);
219	__vfio_pci_intx_unmask(vdev);
220	mutex_unlock(&vdev->igate);
221}
222
223static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
224{
225	struct vfio_pci_irq_ctx *ctx = dev_id;
226	struct vfio_pci_core_device *vdev = ctx->vdev;
227	unsigned long flags;
228	int ret = IRQ_NONE;
229
230	spin_lock_irqsave(&vdev->irqlock, flags);
231
232	if (!vdev->pci_2_3) {
233		disable_irq_nosync(vdev->pdev->irq);
234		ctx->masked = true;
235		ret = IRQ_HANDLED;
236	} else if (!ctx->masked &&  /* may be shared */
237		   pci_check_and_mask_intx(vdev->pdev)) {
238		ctx->masked = true;
239		ret = IRQ_HANDLED;
240	}
241
242	spin_unlock_irqrestore(&vdev->irqlock, flags);
243
244	if (ret == IRQ_HANDLED)
245		vfio_send_intx_eventfd(vdev, ctx);
246
247	return ret;
248}
249
250static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
251			    struct eventfd_ctx *trigger)
252{
253	struct pci_dev *pdev = vdev->pdev;
254	struct vfio_pci_irq_ctx *ctx;
255	unsigned long irqflags;
256	char *name;
257	int ret;
258
259	if (!is_irq_none(vdev))
260		return -EINVAL;
261
262	if (!pdev->irq)
263		return -ENODEV;
264
265	name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
266	if (!name)
267		return -ENOMEM;
268
269	ctx = vfio_irq_ctx_alloc(vdev, 0);
270	if (!ctx) {
271		kfree(name);
272		return -ENOMEM;
273	}
274
275	ctx->name = name;
276	ctx->trigger = trigger;
277	ctx->vdev = vdev;
278
279	/*
280	 * Fill the initial masked state based on virq_disabled.  After
281	 * enable, changing the DisINTx bit in vconfig directly changes INTx
282	 * masking.  igate prevents races during setup, once running masked
283	 * is protected via irqlock.
284	 *
285	 * Devices supporting DisINTx also reflect the current mask state in
286	 * the physical DisINTx bit, which is not affected during IRQ setup.
287	 *
288	 * Devices without DisINTx support require an exclusive interrupt.
289	 * IRQ masking is performed at the IRQ chip.  Again, igate protects
290	 * against races during setup and IRQ handlers and irqfds are not
291	 * yet active, therefore masked is stable and can be used to
292	 * conditionally auto-enable the IRQ.
293	 *
294	 * irq_type must be stable while the IRQ handler is registered,
295	 * therefore it must be set before request_irq().
296	 */
297	ctx->masked = vdev->virq_disabled;
298	if (vdev->pci_2_3) {
299		pci_intx(pdev, !ctx->masked);
300		irqflags = IRQF_SHARED;
301	} else {
302		irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
303	}
304
305	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
306
307	ret = request_irq(pdev->irq, vfio_intx_handler,
308			  irqflags, ctx->name, ctx);
309	if (ret) {
310		vdev->irq_type = VFIO_PCI_NUM_IRQS;
311		kfree(name);
312		vfio_irq_ctx_free(vdev, ctx, 0);
313		return ret;
314	}
315
316	return 0;
317}
318
319static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
320				struct eventfd_ctx *trigger)
321{
322	struct pci_dev *pdev = vdev->pdev;
323	struct vfio_pci_irq_ctx *ctx;
324	struct eventfd_ctx *old;
 
 
325
326	ctx = vfio_irq_ctx_get(vdev, 0);
327	if (WARN_ON_ONCE(!ctx))
328		return -EINVAL;
 
 
 
329
330	old = ctx->trigger;
 
331
332	WRITE_ONCE(ctx->trigger, trigger);
 
 
 
333
334	/* Releasing an old ctx requires synchronizing in-flight users */
335	if (old) {
336		synchronize_irq(pdev->irq);
337		vfio_virqfd_flush_thread(&ctx->unmask);
338		eventfd_ctx_put(old);
339	}
340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341	return 0;
342}
343
344static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
345{
346	struct pci_dev *pdev = vdev->pdev;
347	struct vfio_pci_irq_ctx *ctx;
348
349	ctx = vfio_irq_ctx_get(vdev, 0);
350	WARN_ON_ONCE(!ctx);
351	if (ctx) {
352		vfio_virqfd_disable(&ctx->unmask);
353		vfio_virqfd_disable(&ctx->mask);
354		free_irq(pdev->irq, ctx);
355		if (ctx->trigger)
356			eventfd_ctx_put(ctx->trigger);
357		kfree(ctx->name);
358		vfio_irq_ctx_free(vdev, ctx, 0);
359	}
360	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 
 
361}
362
363/*
364 * MSI/MSI-X
365 */
366static irqreturn_t vfio_msihandler(int irq, void *arg)
367{
368	struct eventfd_ctx *trigger = arg;
369
370	eventfd_signal(trigger);
371	return IRQ_HANDLED;
372}
373
374static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix)
375{
376	struct pci_dev *pdev = vdev->pdev;
377	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
378	int ret;
379	u16 cmd;
380
381	if (!is_irq_none(vdev))
382		return -EINVAL;
383
 
 
 
 
384	/* return the number of supported vectors if we can't get all: */
385	cmd = vfio_pci_memory_lock_and_enable(vdev);
386	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
387	if (ret < nvec) {
388		if (ret > 0)
389			pci_free_irq_vectors(pdev);
390		vfio_pci_memory_unlock_and_restore(vdev, cmd);
391		return ret;
392	}
393	vfio_pci_memory_unlock_and_restore(vdev, cmd);
394
 
395	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
396				VFIO_PCI_MSI_IRQ_INDEX;
397
398	if (!msix) {
399		/*
400		 * Compute the virtual hardware field for max msi vectors -
401		 * it is the log base 2 of the number of vectors.
402		 */
403		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
404	}
405
406	return 0;
407}
408
409/*
410 * vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
411 * interrupt vector. If a Linux IRQ number is not available then a new
412 * interrupt is allocated if dynamic MSI-X is supported.
413 *
414 * Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
415 * essentially forming a cache that subsequent allocations can draw from.
416 * Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
417 * disabled.
418 */
419static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
420			      unsigned int vector, bool msix)
421{
422	struct pci_dev *pdev = vdev->pdev;
423	struct msi_map map;
424	int irq;
425	u16 cmd;
426
427	irq = pci_irq_vector(pdev, vector);
428	if (WARN_ON_ONCE(irq == 0))
429		return -EINVAL;
430	if (irq > 0 || !msix || !vdev->has_dyn_msix)
431		return irq;
432
433	cmd = vfio_pci_memory_lock_and_enable(vdev);
434	map = pci_msix_alloc_irq_at(pdev, vector, NULL);
435	vfio_pci_memory_unlock_and_restore(vdev, cmd);
436
437	return map.index < 0 ? map.index : map.virq;
438}
439
440static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
441				      unsigned int vector, int fd, bool msix)
442{
443	struct pci_dev *pdev = vdev->pdev;
444	struct vfio_pci_irq_ctx *ctx;
445	struct eventfd_ctx *trigger;
446	int irq = -EINVAL, ret;
447	u16 cmd;
448
449	ctx = vfio_irq_ctx_get(vdev, vector);
450
451	if (ctx) {
452		irq_bypass_unregister_producer(&ctx->producer);
453		irq = pci_irq_vector(pdev, vector);
454		cmd = vfio_pci_memory_lock_and_enable(vdev);
455		free_irq(irq, ctx->trigger);
456		vfio_pci_memory_unlock_and_restore(vdev, cmd);
457		/* Interrupt stays allocated, will be freed at MSI-X disable. */
458		kfree(ctx->name);
459		eventfd_ctx_put(ctx->trigger);
460		vfio_irq_ctx_free(vdev, ctx, vector);
461	}
462
463	if (fd < 0)
464		return 0;
465
466	if (irq == -EINVAL) {
467		/* Interrupt stays allocated, will be freed at MSI-X disable. */
468		irq = vfio_msi_alloc_irq(vdev, vector, msix);
469		if (irq < 0)
470			return irq;
471	}
472
473	ctx = vfio_irq_ctx_alloc(vdev, vector);
474	if (!ctx)
475		return -ENOMEM;
476
477	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
478			      msix ? "x" : "", vector, pci_name(pdev));
479	if (!ctx->name) {
480		ret = -ENOMEM;
481		goto out_free_ctx;
482	}
483
484	trigger = eventfd_ctx_fdget(fd);
485	if (IS_ERR(trigger)) {
486		ret = PTR_ERR(trigger);
487		goto out_free_name;
488	}
489
490	/*
491	 * If the vector was previously allocated, refresh the on-device
492	 * message data before enabling in case it had been cleared or
493	 * corrupted (e.g. due to backdoor resets) since writing.
 
 
494	 */
495	cmd = vfio_pci_memory_lock_and_enable(vdev);
496	if (msix) {
497		struct msi_msg msg;
498
499		get_cached_msi_msg(irq, &msg);
500		pci_write_msi_msg(irq, &msg);
501	}
502
503	ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
504	vfio_pci_memory_unlock_and_restore(vdev, cmd);
505	if (ret)
506		goto out_put_eventfd_ctx;
507
508	ctx->producer.token = trigger;
509	ctx->producer.irq = irq;
510	ret = irq_bypass_register_producer(&ctx->producer);
511	if (unlikely(ret)) {
 
 
 
512		dev_info(&pdev->dev,
513		"irq bypass producer (token %p) registration fails: %d\n",
514		ctx->producer.token, ret);
515
516		ctx->producer.token = NULL;
517	}
518	ctx->trigger = trigger;
519
520	return 0;
521
522out_put_eventfd_ctx:
523	eventfd_ctx_put(trigger);
524out_free_name:
525	kfree(ctx->name);
526out_free_ctx:
527	vfio_irq_ctx_free(vdev, ctx, vector);
528	return ret;
529}
530
531static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
532			      unsigned count, int32_t *fds, bool msix)
533{
534	unsigned int i, j;
535	int ret = 0;
 
 
536
537	for (i = 0, j = start; i < count && !ret; i++, j++) {
538		int fd = fds ? fds[i] : -1;
539		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
540	}
541
542	if (ret) {
543		for (i = start; i < j; i++)
544			vfio_msi_set_vector_signal(vdev, i, -1, msix);
545	}
546
547	return ret;
548}
549
550static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
551{
552	struct pci_dev *pdev = vdev->pdev;
553	struct vfio_pci_irq_ctx *ctx;
554	unsigned long i;
555	u16 cmd;
556
557	xa_for_each(&vdev->ctx, i, ctx) {
558		vfio_virqfd_disable(&ctx->unmask);
559		vfio_virqfd_disable(&ctx->mask);
560		vfio_msi_set_vector_signal(vdev, i, -1, msix);
561	}
562
563	cmd = vfio_pci_memory_lock_and_enable(vdev);
 
564	pci_free_irq_vectors(pdev);
565	vfio_pci_memory_unlock_and_restore(vdev, cmd);
566
567	/*
568	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
569	 * via their shutdown paths.  Restore for NoINTx devices.
570	 */
571	if (vdev->nointx)
572		pci_intx(pdev, 0);
573
574	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 
 
575}
576
577/*
578 * IOCTL support
579 */
580static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
581				    unsigned index, unsigned start,
582				    unsigned count, uint32_t flags, void *data)
583{
584	if (!is_intx(vdev) || start != 0 || count != 1)
585		return -EINVAL;
586
587	if (flags & VFIO_IRQ_SET_DATA_NONE) {
588		__vfio_pci_intx_unmask(vdev);
589	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
590		uint8_t unmask = *(uint8_t *)data;
591		if (unmask)
592			__vfio_pci_intx_unmask(vdev);
593	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
594		struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
595		int32_t fd = *(int32_t *)data;
596
597		if (WARN_ON_ONCE(!ctx))
598			return -EINVAL;
599		if (fd >= 0)
600			return vfio_virqfd_enable((void *) vdev,
601						  vfio_pci_intx_unmask_handler,
602						  vfio_send_intx_eventfd, ctx,
603						  &ctx->unmask, fd);
604
605		vfio_virqfd_disable(&ctx->unmask);
606	}
607
608	return 0;
609}
610
611static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
612				  unsigned index, unsigned start,
613				  unsigned count, uint32_t flags, void *data)
614{
615	if (!is_intx(vdev) || start != 0 || count != 1)
616		return -EINVAL;
617
618	if (flags & VFIO_IRQ_SET_DATA_NONE) {
619		__vfio_pci_intx_mask(vdev);
620	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
621		uint8_t mask = *(uint8_t *)data;
622		if (mask)
623			__vfio_pci_intx_mask(vdev);
624	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
625		return -ENOTTY; /* XXX implement me */
626	}
627
628	return 0;
629}
630
631static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
632				     unsigned index, unsigned start,
633				     unsigned count, uint32_t flags, void *data)
634{
635	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
636		vfio_intx_disable(vdev);
637		return 0;
638	}
639
640	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
641		return -EINVAL;
642
643	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
644		struct eventfd_ctx *trigger = NULL;
645		int32_t fd = *(int32_t *)data;
646		int ret;
647
648		if (fd >= 0) {
649			trigger = eventfd_ctx_fdget(fd);
650			if (IS_ERR(trigger))
651				return PTR_ERR(trigger);
652		}
653
654		if (is_intx(vdev))
655			ret = vfio_intx_set_signal(vdev, trigger);
656		else
657			ret = vfio_intx_enable(vdev, trigger);
658
659		if (ret && trigger)
660			eventfd_ctx_put(trigger);
 
 
 
 
 
661
662		return ret;
663	}
664
665	if (!is_intx(vdev))
666		return -EINVAL;
667
668	if (flags & VFIO_IRQ_SET_DATA_NONE) {
669		vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0));
670	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
671		uint8_t trigger = *(uint8_t *)data;
672		if (trigger)
673			vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0));
674	}
675	return 0;
676}
677
678static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
679				    unsigned index, unsigned start,
680				    unsigned count, uint32_t flags, void *data)
681{
682	struct vfio_pci_irq_ctx *ctx;
683	unsigned int i;
684	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
685
686	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
687		vfio_msi_disable(vdev, msix);
688		return 0;
689	}
690
691	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
692		return -EINVAL;
693
694	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
695		int32_t *fds = data;
696		int ret;
697
698		if (vdev->irq_type == index)
699			return vfio_msi_set_block(vdev, start, count,
700						  fds, msix);
701
702		ret = vfio_msi_enable(vdev, start + count, msix);
703		if (ret)
704			return ret;
705
706		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
707		if (ret)
708			vfio_msi_disable(vdev, msix);
709
710		return ret;
711	}
712
713	if (!irq_is(vdev, index))
714		return -EINVAL;
715
716	for (i = start; i < start + count; i++) {
717		ctx = vfio_irq_ctx_get(vdev, i);
718		if (!ctx)
719			continue;
720		if (flags & VFIO_IRQ_SET_DATA_NONE) {
721			eventfd_signal(ctx->trigger);
722		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
723			uint8_t *bools = data;
724			if (bools[i - start])
725				eventfd_signal(ctx->trigger);
726		}
727	}
728	return 0;
729}
730
731static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
732					   unsigned int count, uint32_t flags,
733					   void *data)
734{
735	/* DATA_NONE/DATA_BOOL enables loopback testing */
736	if (flags & VFIO_IRQ_SET_DATA_NONE) {
737		if (*ctx) {
738			if (count) {
739				eventfd_signal(*ctx);
740			} else {
741				eventfd_ctx_put(*ctx);
742				*ctx = NULL;
743			}
744			return 0;
745		}
746	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
747		uint8_t trigger;
748
749		if (!count)
750			return -EINVAL;
751
752		trigger = *(uint8_t *)data;
753		if (trigger && *ctx)
754			eventfd_signal(*ctx);
755
756		return 0;
757	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
758		int32_t fd;
759
760		if (!count)
761			return -EINVAL;
762
763		fd = *(int32_t *)data;
764		if (fd == -1) {
765			if (*ctx)
766				eventfd_ctx_put(*ctx);
767			*ctx = NULL;
768		} else if (fd >= 0) {
769			struct eventfd_ctx *efdctx;
770
771			efdctx = eventfd_ctx_fdget(fd);
772			if (IS_ERR(efdctx))
773				return PTR_ERR(efdctx);
774
775			if (*ctx)
776				eventfd_ctx_put(*ctx);
777
778			*ctx = efdctx;
779		}
780		return 0;
781	}
782
783	return -EINVAL;
784}
785
786static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
787				    unsigned index, unsigned start,
788				    unsigned count, uint32_t flags, void *data)
789{
790	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
791		return -EINVAL;
792
793	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
794					       count, flags, data);
795}
796
797static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
798				    unsigned index, unsigned start,
799				    unsigned count, uint32_t flags, void *data)
800{
801	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
802		return -EINVAL;
803
804	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
805					       count, flags, data);
806}
807
808int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
809			    unsigned index, unsigned start, unsigned count,
810			    void *data)
811{
812	int (*func)(struct vfio_pci_core_device *vdev, unsigned index,
813		    unsigned start, unsigned count, uint32_t flags,
814		    void *data) = NULL;
815
816	switch (index) {
817	case VFIO_PCI_INTX_IRQ_INDEX:
818		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
819		case VFIO_IRQ_SET_ACTION_MASK:
820			func = vfio_pci_set_intx_mask;
821			break;
822		case VFIO_IRQ_SET_ACTION_UNMASK:
823			func = vfio_pci_set_intx_unmask;
824			break;
825		case VFIO_IRQ_SET_ACTION_TRIGGER:
826			func = vfio_pci_set_intx_trigger;
827			break;
828		}
829		break;
830	case VFIO_PCI_MSI_IRQ_INDEX:
831	case VFIO_PCI_MSIX_IRQ_INDEX:
832		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
833		case VFIO_IRQ_SET_ACTION_MASK:
834		case VFIO_IRQ_SET_ACTION_UNMASK:
835			/* XXX Need masking support exported */
836			break;
837		case VFIO_IRQ_SET_ACTION_TRIGGER:
838			func = vfio_pci_set_msi_trigger;
839			break;
840		}
841		break;
842	case VFIO_PCI_ERR_IRQ_INDEX:
843		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
844		case VFIO_IRQ_SET_ACTION_TRIGGER:
845			if (pci_is_pcie(vdev->pdev))
846				func = vfio_pci_set_err_trigger;
847			break;
848		}
849		break;
850	case VFIO_PCI_REQ_IRQ_INDEX:
851		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
852		case VFIO_IRQ_SET_ACTION_TRIGGER:
853			func = vfio_pci_set_req_trigger;
854			break;
855		}
856		break;
857	}
858
859	if (!func)
860		return -ENOTTY;
861
862	return func(vdev, index, start, count, flags, data);
863}
v5.4
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * VFIO PCI interrupt handling
  4 *
  5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  6 *     Author: Alex Williamson <alex.williamson@redhat.com>
  7 *
  8 * Derived from original vfio:
  9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 10 * Author: Tom Lyon, pugs@cisco.com
 11 */
 12
 13#include <linux/device.h>
 14#include <linux/interrupt.h>
 15#include <linux/eventfd.h>
 16#include <linux/msi.h>
 17#include <linux/pci.h>
 18#include <linux/file.h>
 19#include <linux/vfio.h>
 20#include <linux/wait.h>
 21#include <linux/slab.h>
 22
 23#include "vfio_pci_private.h"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 24
 25/*
 26 * INTx
 27 */
 28static void vfio_send_intx_eventfd(void *opaque, void *unused)
 29{
 30	struct vfio_pci_device *vdev = opaque;
 31
 32	if (likely(is_intx(vdev) && !vdev->virq_disabled))
 33		eventfd_signal(vdev->ctx[0].trigger, 1);
 
 
 
 
 
 34}
 35
 36void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
 
 37{
 38	struct pci_dev *pdev = vdev->pdev;
 
 39	unsigned long flags;
 
 
 
 40
 41	spin_lock_irqsave(&vdev->irqlock, flags);
 42
 43	/*
 44	 * Masking can come from interrupt, ioctl, or config space
 45	 * via INTx disable.  The latter means this can get called
 46	 * even when not using intx delivery.  In this case, just
 47	 * try to have the physical bit follow the virtual bit.
 48	 */
 49	if (unlikely(!is_intx(vdev))) {
 50		if (vdev->pci_2_3)
 51			pci_intx(pdev, 0);
 52	} else if (!vdev->ctx[0].masked) {
 
 
 
 
 
 
 
 53		/*
 54		 * Can't use check_and_mask here because we always want to
 55		 * mask, not just when something is pending.
 56		 */
 57		if (vdev->pci_2_3)
 58			pci_intx(pdev, 0);
 59		else
 60			disable_irq_nosync(pdev->irq);
 61
 62		vdev->ctx[0].masked = true;
 
 63	}
 64
 
 65	spin_unlock_irqrestore(&vdev->irqlock, flags);
 
 
 
 
 
 
 
 
 
 
 
 
 66}
 67
 68/*
 69 * If this is triggered by an eventfd, we can't call eventfd_signal
 70 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
 71 * a signal is necessary, which can then be handled via a work queue
 72 * or directly depending on the caller.
 73 */
 74static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
 75{
 76	struct vfio_pci_device *vdev = opaque;
 77	struct pci_dev *pdev = vdev->pdev;
 
 78	unsigned long flags;
 79	int ret = 0;
 80
 81	spin_lock_irqsave(&vdev->irqlock, flags);
 82
 83	/*
 84	 * Unmasking comes from ioctl or config, so again, have the
 85	 * physical bit follow the virtual even when not using INTx.
 86	 */
 87	if (unlikely(!is_intx(vdev))) {
 88		if (vdev->pci_2_3)
 89			pci_intx(pdev, 1);
 90	} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
 
 
 
 91		/*
 92		 * A pending interrupt here would immediately trigger,
 93		 * but we can avoid that overhead by just re-sending
 94		 * the interrupt to the user.
 95		 */
 96		if (vdev->pci_2_3) {
 97			if (!pci_check_and_unmask_intx(pdev))
 98				ret = 1;
 99		} else
100			enable_irq(pdev->irq);
101
102		vdev->ctx[0].masked = (ret > 0);
103	}
104
 
105	spin_unlock_irqrestore(&vdev->irqlock, flags);
106
107	return ret;
108}
109
110void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
111{
112	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
113		vfio_send_intx_eventfd(vdev, NULL);
 
 
 
 
 
 
 
 
 
 
 
114}
115
116static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
117{
118	struct vfio_pci_device *vdev = dev_id;
 
119	unsigned long flags;
120	int ret = IRQ_NONE;
121
122	spin_lock_irqsave(&vdev->irqlock, flags);
123
124	if (!vdev->pci_2_3) {
125		disable_irq_nosync(vdev->pdev->irq);
126		vdev->ctx[0].masked = true;
127		ret = IRQ_HANDLED;
128	} else if (!vdev->ctx[0].masked &&  /* may be shared */
129		   pci_check_and_mask_intx(vdev->pdev)) {
130		vdev->ctx[0].masked = true;
131		ret = IRQ_HANDLED;
132	}
133
134	spin_unlock_irqrestore(&vdev->irqlock, flags);
135
136	if (ret == IRQ_HANDLED)
137		vfio_send_intx_eventfd(vdev, NULL);
138
139	return ret;
140}
141
142static int vfio_intx_enable(struct vfio_pci_device *vdev)
 
143{
 
 
 
 
 
 
144	if (!is_irq_none(vdev))
145		return -EINVAL;
146
147	if (!vdev->pdev->irq)
148		return -ENODEV;
149
150	vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
151	if (!vdev->ctx)
 
 
 
 
 
152		return -ENOMEM;
 
153
154	vdev->num_ctx = 1;
 
 
155
156	/*
157	 * If the virtual interrupt is masked, restore it.  Devices
158	 * supporting DisINTx can be masked at the hardware level
159	 * here, non-PCI-2.3 devices will have to wait until the
160	 * interrupt is enabled.
 
 
 
 
 
 
 
 
 
 
 
 
161	 */
162	vdev->ctx[0].masked = vdev->virq_disabled;
163	if (vdev->pci_2_3)
164		pci_intx(vdev->pdev, !vdev->ctx[0].masked);
 
 
 
 
165
166	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
167
 
 
 
 
 
 
 
 
 
168	return 0;
169}
170
171static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
 
172{
173	struct pci_dev *pdev = vdev->pdev;
174	unsigned long irqflags = IRQF_SHARED;
175	struct eventfd_ctx *trigger;
176	unsigned long flags;
177	int ret;
178
179	if (vdev->ctx[0].trigger) {
180		free_irq(pdev->irq, vdev);
181		kfree(vdev->ctx[0].name);
182		eventfd_ctx_put(vdev->ctx[0].trigger);
183		vdev->ctx[0].trigger = NULL;
184	}
185
186	if (fd < 0) /* Disable only */
187		return 0;
188
189	vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
190				      pci_name(pdev));
191	if (!vdev->ctx[0].name)
192		return -ENOMEM;
193
194	trigger = eventfd_ctx_fdget(fd);
195	if (IS_ERR(trigger)) {
196		kfree(vdev->ctx[0].name);
197		return PTR_ERR(trigger);
 
198	}
199
200	vdev->ctx[0].trigger = trigger;
201
202	if (!vdev->pci_2_3)
203		irqflags = 0;
204
205	ret = request_irq(pdev->irq, vfio_intx_handler,
206			  irqflags, vdev->ctx[0].name, vdev);
207	if (ret) {
208		vdev->ctx[0].trigger = NULL;
209		kfree(vdev->ctx[0].name);
210		eventfd_ctx_put(trigger);
211		return ret;
212	}
213
214	/*
215	 * INTx disable will stick across the new irq setup,
216	 * disable_irq won't.
217	 */
218	spin_lock_irqsave(&vdev->irqlock, flags);
219	if (!vdev->pci_2_3 && vdev->ctx[0].masked)
220		disable_irq_nosync(pdev->irq);
221	spin_unlock_irqrestore(&vdev->irqlock, flags);
222
223	return 0;
224}
225
226static void vfio_intx_disable(struct vfio_pci_device *vdev)
227{
228	vfio_virqfd_disable(&vdev->ctx[0].unmask);
229	vfio_virqfd_disable(&vdev->ctx[0].mask);
230	vfio_intx_set_signal(vdev, -1);
 
 
 
 
 
 
 
 
 
 
 
231	vdev->irq_type = VFIO_PCI_NUM_IRQS;
232	vdev->num_ctx = 0;
233	kfree(vdev->ctx);
234}
235
236/*
237 * MSI/MSI-X
238 */
239static irqreturn_t vfio_msihandler(int irq, void *arg)
240{
241	struct eventfd_ctx *trigger = arg;
242
243	eventfd_signal(trigger, 1);
244	return IRQ_HANDLED;
245}
246
247static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
248{
249	struct pci_dev *pdev = vdev->pdev;
250	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
251	int ret;
 
252
253	if (!is_irq_none(vdev))
254		return -EINVAL;
255
256	vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
257	if (!vdev->ctx)
258		return -ENOMEM;
259
260	/* return the number of supported vectors if we can't get all: */
 
261	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
262	if (ret < nvec) {
263		if (ret > 0)
264			pci_free_irq_vectors(pdev);
265		kfree(vdev->ctx);
266		return ret;
267	}
 
268
269	vdev->num_ctx = nvec;
270	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
271				VFIO_PCI_MSI_IRQ_INDEX;
272
273	if (!msix) {
274		/*
275		 * Compute the virtual hardware field for max msi vectors -
276		 * it is the log base 2 of the number of vectors.
277		 */
278		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
279	}
280
281	return 0;
282}
283
284static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
285				      int vector, int fd, bool msix)
 
 
 
 
 
 
 
 
 
 
286{
287	struct pci_dev *pdev = vdev->pdev;
288	struct eventfd_ctx *trigger;
289	int irq, ret;
 
290
291	if (vector < 0 || vector >= vdev->num_ctx)
 
292		return -EINVAL;
 
 
 
 
 
 
 
 
 
293
294	irq = pci_irq_vector(pdev, vector);
 
 
 
 
 
 
 
 
 
295
296	if (vdev->ctx[vector].trigger) {
297		free_irq(irq, vdev->ctx[vector].trigger);
298		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
299		kfree(vdev->ctx[vector].name);
300		eventfd_ctx_put(vdev->ctx[vector].trigger);
301		vdev->ctx[vector].trigger = NULL;
 
 
 
 
302	}
303
304	if (fd < 0)
305		return 0;
306
307	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
308					   msix ? "x" : "", vector,
309					   pci_name(pdev));
310	if (!vdev->ctx[vector].name)
 
 
 
 
 
311		return -ENOMEM;
312
 
 
 
 
 
 
 
313	trigger = eventfd_ctx_fdget(fd);
314	if (IS_ERR(trigger)) {
315		kfree(vdev->ctx[vector].name);
316		return PTR_ERR(trigger);
317	}
318
319	/*
320	 * The MSIx vector table resides in device memory which may be cleared
321	 * via backdoor resets. We don't allow direct access to the vector
322	 * table so even if a userspace driver attempts to save/restore around
323	 * such a reset it would be unsuccessful. To avoid this, restore the
324	 * cached value of the message prior to enabling.
325	 */
 
326	if (msix) {
327		struct msi_msg msg;
328
329		get_cached_msi_msg(irq, &msg);
330		pci_write_msi_msg(irq, &msg);
331	}
332
333	ret = request_irq(irq, vfio_msihandler, 0,
334			  vdev->ctx[vector].name, trigger);
335	if (ret) {
336		kfree(vdev->ctx[vector].name);
337		eventfd_ctx_put(trigger);
338		return ret;
339	}
340
341	vdev->ctx[vector].producer.token = trigger;
342	vdev->ctx[vector].producer.irq = irq;
343	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
344	if (unlikely(ret))
345		dev_info(&pdev->dev,
346		"irq bypass producer (token %p) registration fails: %d\n",
347		vdev->ctx[vector].producer.token, ret);
348
349	vdev->ctx[vector].trigger = trigger;
 
 
350
351	return 0;
 
 
 
 
 
 
 
 
352}
353
354static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
355			      unsigned count, int32_t *fds, bool msix)
356{
357	int i, j, ret = 0;
358
359	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
360		return -EINVAL;
361
362	for (i = 0, j = start; i < count && !ret; i++, j++) {
363		int fd = fds ? fds[i] : -1;
364		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
365	}
366
367	if (ret) {
368		for (--j; j >= (int)start; j--)
369			vfio_msi_set_vector_signal(vdev, j, -1, msix);
370	}
371
372	return ret;
373}
374
375static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
376{
377	struct pci_dev *pdev = vdev->pdev;
378	int i;
 
 
379
380	for (i = 0; i < vdev->num_ctx; i++) {
381		vfio_virqfd_disable(&vdev->ctx[i].unmask);
382		vfio_virqfd_disable(&vdev->ctx[i].mask);
 
383	}
384
385	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
386
387	pci_free_irq_vectors(pdev);
 
388
389	/*
390	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
391	 * via their shutdown paths.  Restore for NoINTx devices.
392	 */
393	if (vdev->nointx)
394		pci_intx(pdev, 0);
395
396	vdev->irq_type = VFIO_PCI_NUM_IRQS;
397	vdev->num_ctx = 0;
398	kfree(vdev->ctx);
399}
400
401/*
402 * IOCTL support
403 */
404static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
405				    unsigned index, unsigned start,
406				    unsigned count, uint32_t flags, void *data)
407{
408	if (!is_intx(vdev) || start != 0 || count != 1)
409		return -EINVAL;
410
411	if (flags & VFIO_IRQ_SET_DATA_NONE) {
412		vfio_pci_intx_unmask(vdev);
413	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
414		uint8_t unmask = *(uint8_t *)data;
415		if (unmask)
416			vfio_pci_intx_unmask(vdev);
417	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 
418		int32_t fd = *(int32_t *)data;
 
 
 
419		if (fd >= 0)
420			return vfio_virqfd_enable((void *) vdev,
421						  vfio_pci_intx_unmask_handler,
422						  vfio_send_intx_eventfd, NULL,
423						  &vdev->ctx[0].unmask, fd);
424
425		vfio_virqfd_disable(&vdev->ctx[0].unmask);
426	}
427
428	return 0;
429}
430
431static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
432				  unsigned index, unsigned start,
433				  unsigned count, uint32_t flags, void *data)
434{
435	if (!is_intx(vdev) || start != 0 || count != 1)
436		return -EINVAL;
437
438	if (flags & VFIO_IRQ_SET_DATA_NONE) {
439		vfio_pci_intx_mask(vdev);
440	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
441		uint8_t mask = *(uint8_t *)data;
442		if (mask)
443			vfio_pci_intx_mask(vdev);
444	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
445		return -ENOTTY; /* XXX implement me */
446	}
447
448	return 0;
449}
450
451static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
452				     unsigned index, unsigned start,
453				     unsigned count, uint32_t flags, void *data)
454{
455	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
456		vfio_intx_disable(vdev);
457		return 0;
458	}
459
460	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
461		return -EINVAL;
462
463	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 
464		int32_t fd = *(int32_t *)data;
465		int ret;
466
 
 
 
 
 
 
467		if (is_intx(vdev))
468			return vfio_intx_set_signal(vdev, fd);
 
 
469
470		ret = vfio_intx_enable(vdev);
471		if (ret)
472			return ret;
473
474		ret = vfio_intx_set_signal(vdev, fd);
475		if (ret)
476			vfio_intx_disable(vdev);
477
478		return ret;
479	}
480
481	if (!is_intx(vdev))
482		return -EINVAL;
483
484	if (flags & VFIO_IRQ_SET_DATA_NONE) {
485		vfio_send_intx_eventfd(vdev, NULL);
486	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
487		uint8_t trigger = *(uint8_t *)data;
488		if (trigger)
489			vfio_send_intx_eventfd(vdev, NULL);
490	}
491	return 0;
492}
493
494static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
495				    unsigned index, unsigned start,
496				    unsigned count, uint32_t flags, void *data)
497{
498	int i;
 
499	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
500
501	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
502		vfio_msi_disable(vdev, msix);
503		return 0;
504	}
505
506	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
507		return -EINVAL;
508
509	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
510		int32_t *fds = data;
511		int ret;
512
513		if (vdev->irq_type == index)
514			return vfio_msi_set_block(vdev, start, count,
515						  fds, msix);
516
517		ret = vfio_msi_enable(vdev, start + count, msix);
518		if (ret)
519			return ret;
520
521		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
522		if (ret)
523			vfio_msi_disable(vdev, msix);
524
525		return ret;
526	}
527
528	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
529		return -EINVAL;
530
531	for (i = start; i < start + count; i++) {
532		if (!vdev->ctx[i].trigger)
 
533			continue;
534		if (flags & VFIO_IRQ_SET_DATA_NONE) {
535			eventfd_signal(vdev->ctx[i].trigger, 1);
536		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
537			uint8_t *bools = data;
538			if (bools[i - start])
539				eventfd_signal(vdev->ctx[i].trigger, 1);
540		}
541	}
542	return 0;
543}
544
545static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
546					   unsigned int count, uint32_t flags,
547					   void *data)
548{
549	/* DATA_NONE/DATA_BOOL enables loopback testing */
550	if (flags & VFIO_IRQ_SET_DATA_NONE) {
551		if (*ctx) {
552			if (count) {
553				eventfd_signal(*ctx, 1);
554			} else {
555				eventfd_ctx_put(*ctx);
556				*ctx = NULL;
557			}
558			return 0;
559		}
560	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
561		uint8_t trigger;
562
563		if (!count)
564			return -EINVAL;
565
566		trigger = *(uint8_t *)data;
567		if (trigger && *ctx)
568			eventfd_signal(*ctx, 1);
569
570		return 0;
571	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
572		int32_t fd;
573
574		if (!count)
575			return -EINVAL;
576
577		fd = *(int32_t *)data;
578		if (fd == -1) {
579			if (*ctx)
580				eventfd_ctx_put(*ctx);
581			*ctx = NULL;
582		} else if (fd >= 0) {
583			struct eventfd_ctx *efdctx;
584
585			efdctx = eventfd_ctx_fdget(fd);
586			if (IS_ERR(efdctx))
587				return PTR_ERR(efdctx);
588
589			if (*ctx)
590				eventfd_ctx_put(*ctx);
591
592			*ctx = efdctx;
593		}
594		return 0;
595	}
596
597	return -EINVAL;
598}
599
600static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
601				    unsigned index, unsigned start,
602				    unsigned count, uint32_t flags, void *data)
603{
604	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
605		return -EINVAL;
606
607	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
608					       count, flags, data);
609}
610
611static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
612				    unsigned index, unsigned start,
613				    unsigned count, uint32_t flags, void *data)
614{
615	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
616		return -EINVAL;
617
618	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
619					       count, flags, data);
620}
621
622int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
623			    unsigned index, unsigned start, unsigned count,
624			    void *data)
625{
626	int (*func)(struct vfio_pci_device *vdev, unsigned index,
627		    unsigned start, unsigned count, uint32_t flags,
628		    void *data) = NULL;
629
630	switch (index) {
631	case VFIO_PCI_INTX_IRQ_INDEX:
632		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
633		case VFIO_IRQ_SET_ACTION_MASK:
634			func = vfio_pci_set_intx_mask;
635			break;
636		case VFIO_IRQ_SET_ACTION_UNMASK:
637			func = vfio_pci_set_intx_unmask;
638			break;
639		case VFIO_IRQ_SET_ACTION_TRIGGER:
640			func = vfio_pci_set_intx_trigger;
641			break;
642		}
643		break;
644	case VFIO_PCI_MSI_IRQ_INDEX:
645	case VFIO_PCI_MSIX_IRQ_INDEX:
646		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
647		case VFIO_IRQ_SET_ACTION_MASK:
648		case VFIO_IRQ_SET_ACTION_UNMASK:
649			/* XXX Need masking support exported */
650			break;
651		case VFIO_IRQ_SET_ACTION_TRIGGER:
652			func = vfio_pci_set_msi_trigger;
653			break;
654		}
655		break;
656	case VFIO_PCI_ERR_IRQ_INDEX:
657		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
658		case VFIO_IRQ_SET_ACTION_TRIGGER:
659			if (pci_is_pcie(vdev->pdev))
660				func = vfio_pci_set_err_trigger;
661			break;
662		}
663		break;
664	case VFIO_PCI_REQ_IRQ_INDEX:
665		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
666		case VFIO_IRQ_SET_ACTION_TRIGGER:
667			func = vfio_pci_set_req_trigger;
668			break;
669		}
670		break;
671	}
672
673	if (!func)
674		return -ENOTTY;
675
676	return func(vdev, index, start, count, flags, data);
677}