Linux Audio

Check our new training course

Loading...
v4.6
 
  1/*
  2 * VFIO PCI interrupt handling
  3 *
  4 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  5 *     Author: Alex Williamson <alex.williamson@redhat.com>
  6 *
  7 * This program is free software; you can redistribute it and/or modify
  8 * it under the terms of the GNU General Public License version 2 as
  9 * published by the Free Software Foundation.
 10 *
 11 * Derived from original vfio:
 12 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 13 * Author: Tom Lyon, pugs@cisco.com
 14 */
 15
 16#include <linux/device.h>
 17#include <linux/interrupt.h>
 18#include <linux/eventfd.h>
 19#include <linux/msi.h>
 20#include <linux/pci.h>
 21#include <linux/file.h>
 22#include <linux/vfio.h>
 23#include <linux/wait.h>
 24#include <linux/slab.h>
 25
 26#include "vfio_pci_private.h"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 27
 28/*
 29 * INTx
 30 */
 31static void vfio_send_intx_eventfd(void *opaque, void *unused)
 32{
 33	struct vfio_pci_device *vdev = opaque;
 
 
 
 
 34
 35	if (likely(is_intx(vdev) && !vdev->virq_disabled))
 36		eventfd_signal(vdev->ctx[0].trigger, 1);
 
 37}
 38
 39void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
 
 40{
 41	struct pci_dev *pdev = vdev->pdev;
 
 42	unsigned long flags;
 
 
 
 43
 44	spin_lock_irqsave(&vdev->irqlock, flags);
 45
 46	/*
 47	 * Masking can come from interrupt, ioctl, or config space
 48	 * via INTx disable.  The latter means this can get called
 49	 * even when not using intx delivery.  In this case, just
 50	 * try to have the physical bit follow the virtual bit.
 51	 */
 52	if (unlikely(!is_intx(vdev))) {
 53		if (vdev->pci_2_3)
 54			pci_intx(pdev, 0);
 55	} else if (!vdev->ctx[0].masked) {
 
 
 
 
 
 
 
 56		/*
 57		 * Can't use check_and_mask here because we always want to
 58		 * mask, not just when something is pending.
 59		 */
 60		if (vdev->pci_2_3)
 61			pci_intx(pdev, 0);
 62		else
 63			disable_irq_nosync(pdev->irq);
 64
 65		vdev->ctx[0].masked = true;
 
 66	}
 67
 
 68	spin_unlock_irqrestore(&vdev->irqlock, flags);
 
 
 
 
 
 
 
 
 
 
 
 
 69}
 70
 71/*
 72 * If this is triggered by an eventfd, we can't call eventfd_signal
 73 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
 74 * a signal is necessary, which can then be handled via a work queue
 75 * or directly depending on the caller.
 76 */
 77static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
 78{
 79	struct vfio_pci_device *vdev = opaque;
 80	struct pci_dev *pdev = vdev->pdev;
 
 81	unsigned long flags;
 82	int ret = 0;
 83
 84	spin_lock_irqsave(&vdev->irqlock, flags);
 85
 86	/*
 87	 * Unmasking comes from ioctl or config, so again, have the
 88	 * physical bit follow the virtual even when not using INTx.
 89	 */
 90	if (unlikely(!is_intx(vdev))) {
 91		if (vdev->pci_2_3)
 92			pci_intx(pdev, 1);
 93	} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
 
 
 
 94		/*
 95		 * A pending interrupt here would immediately trigger,
 96		 * but we can avoid that overhead by just re-sending
 97		 * the interrupt to the user.
 98		 */
 99		if (vdev->pci_2_3) {
100			if (!pci_check_and_unmask_intx(pdev))
101				ret = 1;
102		} else
103			enable_irq(pdev->irq);
104
105		vdev->ctx[0].masked = (ret > 0);
106	}
107
 
108	spin_unlock_irqrestore(&vdev->irqlock, flags);
109
110	return ret;
111}
112
113void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
114{
115	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
116		vfio_send_intx_eventfd(vdev, NULL);
 
 
 
 
 
 
 
 
 
 
 
117}
118
119static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
120{
121	struct vfio_pci_device *vdev = dev_id;
 
122	unsigned long flags;
123	int ret = IRQ_NONE;
124
125	spin_lock_irqsave(&vdev->irqlock, flags);
126
127	if (!vdev->pci_2_3) {
128		disable_irq_nosync(vdev->pdev->irq);
129		vdev->ctx[0].masked = true;
130		ret = IRQ_HANDLED;
131	} else if (!vdev->ctx[0].masked &&  /* may be shared */
132		   pci_check_and_mask_intx(vdev->pdev)) {
133		vdev->ctx[0].masked = true;
134		ret = IRQ_HANDLED;
135	}
136
137	spin_unlock_irqrestore(&vdev->irqlock, flags);
138
139	if (ret == IRQ_HANDLED)
140		vfio_send_intx_eventfd(vdev, NULL);
141
142	return ret;
143}
144
145static int vfio_intx_enable(struct vfio_pci_device *vdev)
 
146{
 
 
 
 
 
 
147	if (!is_irq_none(vdev))
148		return -EINVAL;
149
150	if (!vdev->pdev->irq)
151		return -ENODEV;
152
153	vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
154	if (!vdev->ctx)
 
 
 
 
 
155		return -ENOMEM;
 
156
157	vdev->num_ctx = 1;
 
 
158
159	/*
160	 * If the virtual interrupt is masked, restore it.  Devices
161	 * supporting DisINTx can be masked at the hardware level
162	 * here, non-PCI-2.3 devices will have to wait until the
163	 * interrupt is enabled.
 
 
 
 
 
 
 
 
 
 
 
 
164	 */
165	vdev->ctx[0].masked = vdev->virq_disabled;
166	if (vdev->pci_2_3)
167		pci_intx(vdev->pdev, !vdev->ctx[0].masked);
 
 
 
 
168
169	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
170
 
 
 
 
 
 
 
 
 
171	return 0;
172}
173
174static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
 
175{
176	struct pci_dev *pdev = vdev->pdev;
177	unsigned long irqflags = IRQF_SHARED;
178	struct eventfd_ctx *trigger;
179	unsigned long flags;
180	int ret;
181
182	if (vdev->ctx[0].trigger) {
183		free_irq(pdev->irq, vdev);
184		kfree(vdev->ctx[0].name);
185		eventfd_ctx_put(vdev->ctx[0].trigger);
186		vdev->ctx[0].trigger = NULL;
187	}
188
189	if (fd < 0) /* Disable only */
190		return 0;
191
192	vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
193				      pci_name(pdev));
194	if (!vdev->ctx[0].name)
195		return -ENOMEM;
196
197	trigger = eventfd_ctx_fdget(fd);
198	if (IS_ERR(trigger)) {
199		kfree(vdev->ctx[0].name);
200		return PTR_ERR(trigger);
201	}
202
203	vdev->ctx[0].trigger = trigger;
204
205	if (!vdev->pci_2_3)
206		irqflags = 0;
207
208	ret = request_irq(pdev->irq, vfio_intx_handler,
209			  irqflags, vdev->ctx[0].name, vdev);
210	if (ret) {
211		vdev->ctx[0].trigger = NULL;
212		kfree(vdev->ctx[0].name);
213		eventfd_ctx_put(trigger);
214		return ret;
215	}
216
217	/*
218	 * INTx disable will stick across the new irq setup,
219	 * disable_irq won't.
220	 */
221	spin_lock_irqsave(&vdev->irqlock, flags);
222	if (!vdev->pci_2_3 && vdev->ctx[0].masked)
223		disable_irq_nosync(pdev->irq);
224	spin_unlock_irqrestore(&vdev->irqlock, flags);
225
226	return 0;
227}
228
229static void vfio_intx_disable(struct vfio_pci_device *vdev)
230{
231	vfio_intx_set_signal(vdev, -1);
232	vfio_virqfd_disable(&vdev->ctx[0].unmask);
233	vfio_virqfd_disable(&vdev->ctx[0].mask);
 
 
 
 
 
 
 
 
 
 
 
234	vdev->irq_type = VFIO_PCI_NUM_IRQS;
235	vdev->num_ctx = 0;
236	kfree(vdev->ctx);
237}
238
239/*
240 * MSI/MSI-X
241 */
242static irqreturn_t vfio_msihandler(int irq, void *arg)
243{
244	struct eventfd_ctx *trigger = arg;
245
246	eventfd_signal(trigger, 1);
247	return IRQ_HANDLED;
248}
249
250static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
251{
252	struct pci_dev *pdev = vdev->pdev;
 
253	int ret;
 
254
255	if (!is_irq_none(vdev))
256		return -EINVAL;
257
258	vdev->ctx = kzalloc(nvec * sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
259	if (!vdev->ctx)
260		return -ENOMEM;
261
262	if (msix) {
263		int i;
264
265		vdev->msix = kzalloc(nvec * sizeof(struct msix_entry),
266				     GFP_KERNEL);
267		if (!vdev->msix) {
268			kfree(vdev->ctx);
269			return -ENOMEM;
270		}
271
272		for (i = 0; i < nvec; i++)
273			vdev->msix[i].entry = i;
274
275		ret = pci_enable_msix_range(pdev, vdev->msix, 1, nvec);
276		if (ret < nvec) {
277			if (ret > 0)
278				pci_disable_msix(pdev);
279			kfree(vdev->msix);
280			kfree(vdev->ctx);
281			return ret;
282		}
283	} else {
284		ret = pci_enable_msi_range(pdev, 1, nvec);
285		if (ret < nvec) {
286			if (ret > 0)
287				pci_disable_msi(pdev);
288			kfree(vdev->ctx);
289			return ret;
290		}
291	}
 
292
293	vdev->num_ctx = nvec;
294	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
295				VFIO_PCI_MSI_IRQ_INDEX;
296
297	if (!msix) {
298		/*
299		 * Compute the virtual hardware field for max msi vectors -
300		 * it is the log base 2 of the number of vectors.
301		 */
302		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
303	}
304
305	return 0;
306}
307
308static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
309				      int vector, int fd, bool msix)
 
 
 
 
 
 
 
 
 
 
310{
311	struct pci_dev *pdev = vdev->pdev;
312	struct eventfd_ctx *trigger;
313	int irq, ret;
 
314
315	if (vector < 0 || vector >= vdev->num_ctx)
 
316		return -EINVAL;
 
 
317
318	irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
 
 
319
320	if (vdev->ctx[vector].trigger) {
321		free_irq(irq, vdev->ctx[vector].trigger);
322		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
323		kfree(vdev->ctx[vector].name);
324		eventfd_ctx_put(vdev->ctx[vector].trigger);
325		vdev->ctx[vector].trigger = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326	}
327
328	if (fd < 0)
329		return 0;
330
331	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
332					   msix ? "x" : "", vector,
333					   pci_name(pdev));
334	if (!vdev->ctx[vector].name)
 
 
 
 
 
335		return -ENOMEM;
336
 
 
 
 
 
 
 
337	trigger = eventfd_ctx_fdget(fd);
338	if (IS_ERR(trigger)) {
339		kfree(vdev->ctx[vector].name);
340		return PTR_ERR(trigger);
341	}
342
343	/*
344	 * The MSIx vector table resides in device memory which may be cleared
345	 * via backdoor resets. We don't allow direct access to the vector
346	 * table so even if a userspace driver attempts to save/restore around
347	 * such a reset it would be unsuccessful. To avoid this, restore the
348	 * cached value of the message prior to enabling.
349	 */
 
350	if (msix) {
351		struct msi_msg msg;
352
353		get_cached_msi_msg(irq, &msg);
354		pci_write_msi_msg(irq, &msg);
355	}
356
357	ret = request_irq(irq, vfio_msihandler, 0,
358			  vdev->ctx[vector].name, trigger);
359	if (ret) {
360		kfree(vdev->ctx[vector].name);
361		eventfd_ctx_put(trigger);
362		return ret;
363	}
364
365	vdev->ctx[vector].producer.token = trigger;
366	vdev->ctx[vector].producer.irq = irq;
367	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
368	if (unlikely(ret))
369		dev_info(&pdev->dev,
370		"irq bypass producer (token %p) registration fails: %d\n",
371		vdev->ctx[vector].producer.token, ret);
372
373	vdev->ctx[vector].trigger = trigger;
 
 
374
375	return 0;
 
 
 
 
 
 
 
 
376}
377
378static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
379			      unsigned count, int32_t *fds, bool msix)
380{
381	int i, j, ret = 0;
382
383	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
384		return -EINVAL;
385
386	for (i = 0, j = start; i < count && !ret; i++, j++) {
387		int fd = fds ? fds[i] : -1;
388		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
389	}
390
391	if (ret) {
392		for (--j; j >= (int)start; j--)
393			vfio_msi_set_vector_signal(vdev, j, -1, msix);
394	}
395
396	return ret;
397}
398
399static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
400{
401	struct pci_dev *pdev = vdev->pdev;
402	int i;
403
404	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
405
406	for (i = 0; i < vdev->num_ctx; i++) {
407		vfio_virqfd_disable(&vdev->ctx[i].unmask);
408		vfio_virqfd_disable(&vdev->ctx[i].mask);
 
409	}
410
411	if (msix) {
412		pci_disable_msix(vdev->pdev);
413		kfree(vdev->msix);
414	} else
415		pci_disable_msi(pdev);
 
 
 
 
 
416
417	vdev->irq_type = VFIO_PCI_NUM_IRQS;
418	vdev->num_ctx = 0;
419	kfree(vdev->ctx);
420}
421
422/*
423 * IOCTL support
424 */
425static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
426				    unsigned index, unsigned start,
427				    unsigned count, uint32_t flags, void *data)
428{
429	if (!is_intx(vdev) || start != 0 || count != 1)
430		return -EINVAL;
431
432	if (flags & VFIO_IRQ_SET_DATA_NONE) {
433		vfio_pci_intx_unmask(vdev);
434	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
435		uint8_t unmask = *(uint8_t *)data;
436		if (unmask)
437			vfio_pci_intx_unmask(vdev);
438	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 
439		int32_t fd = *(int32_t *)data;
 
 
 
440		if (fd >= 0)
441			return vfio_virqfd_enable((void *) vdev,
442						  vfio_pci_intx_unmask_handler,
443						  vfio_send_intx_eventfd, NULL,
444						  &vdev->ctx[0].unmask, fd);
445
446		vfio_virqfd_disable(&vdev->ctx[0].unmask);
447	}
448
449	return 0;
450}
451
452static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
453				  unsigned index, unsigned start,
454				  unsigned count, uint32_t flags, void *data)
455{
456	if (!is_intx(vdev) || start != 0 || count != 1)
457		return -EINVAL;
458
459	if (flags & VFIO_IRQ_SET_DATA_NONE) {
460		vfio_pci_intx_mask(vdev);
461	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
462		uint8_t mask = *(uint8_t *)data;
463		if (mask)
464			vfio_pci_intx_mask(vdev);
465	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
466		return -ENOTTY; /* XXX implement me */
467	}
468
469	return 0;
470}
471
472static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
473				     unsigned index, unsigned start,
474				     unsigned count, uint32_t flags, void *data)
475{
476	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
477		vfio_intx_disable(vdev);
478		return 0;
479	}
480
481	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
482		return -EINVAL;
483
484	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 
485		int32_t fd = *(int32_t *)data;
486		int ret;
487
488		if (is_intx(vdev))
489			return vfio_intx_set_signal(vdev, fd);
 
 
 
490
491		ret = vfio_intx_enable(vdev);
492		if (ret)
493			return ret;
 
494
495		ret = vfio_intx_set_signal(vdev, fd);
496		if (ret)
497			vfio_intx_disable(vdev);
498
499		return ret;
500	}
501
502	if (!is_intx(vdev))
503		return -EINVAL;
504
505	if (flags & VFIO_IRQ_SET_DATA_NONE) {
506		vfio_send_intx_eventfd(vdev, NULL);
507	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
508		uint8_t trigger = *(uint8_t *)data;
509		if (trigger)
510			vfio_send_intx_eventfd(vdev, NULL);
511	}
512	return 0;
513}
514
515static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
516				    unsigned index, unsigned start,
517				    unsigned count, uint32_t flags, void *data)
518{
519	int i;
 
520	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
521
522	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
523		vfio_msi_disable(vdev, msix);
524		return 0;
525	}
526
527	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
528		return -EINVAL;
529
530	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
531		int32_t *fds = data;
532		int ret;
533
534		if (vdev->irq_type == index)
535			return vfio_msi_set_block(vdev, start, count,
536						  fds, msix);
537
538		ret = vfio_msi_enable(vdev, start + count, msix);
539		if (ret)
540			return ret;
541
542		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
543		if (ret)
544			vfio_msi_disable(vdev, msix);
545
546		return ret;
547	}
548
549	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
550		return -EINVAL;
551
552	for (i = start; i < start + count; i++) {
553		if (!vdev->ctx[i].trigger)
 
554			continue;
555		if (flags & VFIO_IRQ_SET_DATA_NONE) {
556			eventfd_signal(vdev->ctx[i].trigger, 1);
557		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
558			uint8_t *bools = data;
559			if (bools[i - start])
560				eventfd_signal(vdev->ctx[i].trigger, 1);
561		}
562	}
563	return 0;
564}
565
566static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
567					   uint32_t flags, void *data)
 
568{
569	int32_t fd = *(int32_t *)data;
570
571	if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
572		return -EINVAL;
573
574	/* DATA_NONE/DATA_BOOL enables loopback testing */
575	if (flags & VFIO_IRQ_SET_DATA_NONE) {
576		if (*ctx)
577			eventfd_signal(*ctx, 1);
578		return 0;
 
 
 
 
 
 
579	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
580		uint8_t trigger = *(uint8_t *)data;
 
 
 
 
 
581		if (trigger && *ctx)
582			eventfd_signal(*ctx, 1);
583		return 0;
584	}
585
586	/* Handle SET_DATA_EVENTFD */
587	if (fd == -1) {
588		if (*ctx)
589			eventfd_ctx_put(*ctx);
590		*ctx = NULL;
591		return 0;
592	} else if (fd >= 0) {
593		struct eventfd_ctx *efdctx;
594		efdctx = eventfd_ctx_fdget(fd);
595		if (IS_ERR(efdctx))
596			return PTR_ERR(efdctx);
597		if (*ctx)
598			eventfd_ctx_put(*ctx);
599		*ctx = efdctx;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600		return 0;
601	} else
602		return -EINVAL;
 
603}
604
605static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
606				    unsigned index, unsigned start,
607				    unsigned count, uint32_t flags, void *data)
608{
609	if (index != VFIO_PCI_ERR_IRQ_INDEX)
610		return -EINVAL;
611
612	/*
613	 * We should sanitize start & count, but that wasn't caught
614	 * originally, so this IRQ index must forever ignore them :-(
615	 */
616
617	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data);
618}
619
620static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
621				    unsigned index, unsigned start,
622				    unsigned count, uint32_t flags, void *data)
623{
624	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1)
625		return -EINVAL;
626
627	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data);
 
628}
629
630int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
631			    unsigned index, unsigned start, unsigned count,
632			    void *data)
633{
634	int (*func)(struct vfio_pci_device *vdev, unsigned index,
635		    unsigned start, unsigned count, uint32_t flags,
636		    void *data) = NULL;
637
638	switch (index) {
639	case VFIO_PCI_INTX_IRQ_INDEX:
640		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
641		case VFIO_IRQ_SET_ACTION_MASK:
642			func = vfio_pci_set_intx_mask;
643			break;
644		case VFIO_IRQ_SET_ACTION_UNMASK:
645			func = vfio_pci_set_intx_unmask;
646			break;
647		case VFIO_IRQ_SET_ACTION_TRIGGER:
648			func = vfio_pci_set_intx_trigger;
649			break;
650		}
651		break;
652	case VFIO_PCI_MSI_IRQ_INDEX:
653	case VFIO_PCI_MSIX_IRQ_INDEX:
654		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
655		case VFIO_IRQ_SET_ACTION_MASK:
656		case VFIO_IRQ_SET_ACTION_UNMASK:
657			/* XXX Need masking support exported */
658			break;
659		case VFIO_IRQ_SET_ACTION_TRIGGER:
660			func = vfio_pci_set_msi_trigger;
661			break;
662		}
663		break;
664	case VFIO_PCI_ERR_IRQ_INDEX:
665		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
666		case VFIO_IRQ_SET_ACTION_TRIGGER:
667			if (pci_is_pcie(vdev->pdev))
668				func = vfio_pci_set_err_trigger;
669			break;
670		}
671		break;
672	case VFIO_PCI_REQ_IRQ_INDEX:
673		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
674		case VFIO_IRQ_SET_ACTION_TRIGGER:
675			func = vfio_pci_set_req_trigger;
676			break;
677		}
678		break;
679	}
680
681	if (!func)
682		return -ENOTTY;
683
684	return func(vdev, index, start, count, flags, data);
685}
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * VFIO PCI interrupt handling
  4 *
  5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  6 *     Author: Alex Williamson <alex.williamson@redhat.com>
  7 *
 
 
 
 
  8 * Derived from original vfio:
  9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
 10 * Author: Tom Lyon, pugs@cisco.com
 11 */
 12
 13#include <linux/device.h>
 14#include <linux/interrupt.h>
 15#include <linux/eventfd.h>
 16#include <linux/msi.h>
 17#include <linux/pci.h>
 18#include <linux/file.h>
 19#include <linux/vfio.h>
 20#include <linux/wait.h>
 21#include <linux/slab.h>
 22
 23#include "vfio_pci_priv.h"
 24
 25struct vfio_pci_irq_ctx {
 26	struct vfio_pci_core_device	*vdev;
 27	struct eventfd_ctx		*trigger;
 28	struct virqfd			*unmask;
 29	struct virqfd			*mask;
 30	char				*name;
 31	bool				masked;
 32	struct irq_bypass_producer	producer;
 33};
 34
 35static bool irq_is(struct vfio_pci_core_device *vdev, int type)
 36{
 37	return vdev->irq_type == type;
 38}
 39
 40static bool is_intx(struct vfio_pci_core_device *vdev)
 41{
 42	return vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX;
 43}
 44
 45static bool is_irq_none(struct vfio_pci_core_device *vdev)
 46{
 47	return !(vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX ||
 48		 vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX ||
 49		 vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
 50}
 51
 52static
 53struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
 54					  unsigned long index)
 55{
 56	return xa_load(&vdev->ctx, index);
 57}
 58
 59static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
 60			      struct vfio_pci_irq_ctx *ctx, unsigned long index)
 61{
 62	xa_erase(&vdev->ctx, index);
 63	kfree(ctx);
 64}
 65
 66static struct vfio_pci_irq_ctx *
 67vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
 68{
 69	struct vfio_pci_irq_ctx *ctx;
 70	int ret;
 71
 72	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
 73	if (!ctx)
 74		return NULL;
 75
 76	ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
 77	if (ret) {
 78		kfree(ctx);
 79		return NULL;
 80	}
 81
 82	return ctx;
 83}
 84
 85/*
 86 * INTx
 87 */
 88static void vfio_send_intx_eventfd(void *opaque, void *data)
 89{
 90	struct vfio_pci_core_device *vdev = opaque;
 91
 92	if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
 93		struct vfio_pci_irq_ctx *ctx = data;
 94		struct eventfd_ctx *trigger = READ_ONCE(ctx->trigger);
 95
 96		if (likely(trigger))
 97			eventfd_signal(trigger);
 98	}
 99}
100
101/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
102static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
103{
104	struct pci_dev *pdev = vdev->pdev;
105	struct vfio_pci_irq_ctx *ctx;
106	unsigned long flags;
107	bool masked_changed = false;
108
109	lockdep_assert_held(&vdev->igate);
110
111	spin_lock_irqsave(&vdev->irqlock, flags);
112
113	/*
114	 * Masking can come from interrupt, ioctl, or config space
115	 * via INTx disable.  The latter means this can get called
116	 * even when not using intx delivery.  In this case, just
117	 * try to have the physical bit follow the virtual bit.
118	 */
119	if (unlikely(!is_intx(vdev))) {
120		if (vdev->pci_2_3)
121			pci_intx(pdev, 0);
122		goto out_unlock;
123	}
124
125	ctx = vfio_irq_ctx_get(vdev, 0);
126	if (WARN_ON_ONCE(!ctx))
127		goto out_unlock;
128
129	if (!ctx->masked) {
130		/*
131		 * Can't use check_and_mask here because we always want to
132		 * mask, not just when something is pending.
133		 */
134		if (vdev->pci_2_3)
135			pci_intx(pdev, 0);
136		else
137			disable_irq_nosync(pdev->irq);
138
139		ctx->masked = true;
140		masked_changed = true;
141	}
142
143out_unlock:
144	spin_unlock_irqrestore(&vdev->irqlock, flags);
145	return masked_changed;
146}
147
148bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
149{
150	bool mask_changed;
151
152	mutex_lock(&vdev->igate);
153	mask_changed = __vfio_pci_intx_mask(vdev);
154	mutex_unlock(&vdev->igate);
155
156	return mask_changed;
157}
158
159/*
160 * If this is triggered by an eventfd, we can't call eventfd_signal
161 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
162 * a signal is necessary, which can then be handled via a work queue
163 * or directly depending on the caller.
164 */
165static int vfio_pci_intx_unmask_handler(void *opaque, void *data)
166{
167	struct vfio_pci_core_device *vdev = opaque;
168	struct pci_dev *pdev = vdev->pdev;
169	struct vfio_pci_irq_ctx *ctx = data;
170	unsigned long flags;
171	int ret = 0;
172
173	spin_lock_irqsave(&vdev->irqlock, flags);
174
175	/*
176	 * Unmasking comes from ioctl or config, so again, have the
177	 * physical bit follow the virtual even when not using INTx.
178	 */
179	if (unlikely(!is_intx(vdev))) {
180		if (vdev->pci_2_3)
181			pci_intx(pdev, 1);
182		goto out_unlock;
183	}
184
185	if (ctx->masked && !vdev->virq_disabled) {
186		/*
187		 * A pending interrupt here would immediately trigger,
188		 * but we can avoid that overhead by just re-sending
189		 * the interrupt to the user.
190		 */
191		if (vdev->pci_2_3) {
192			if (!pci_check_and_unmask_intx(pdev))
193				ret = 1;
194		} else
195			enable_irq(pdev->irq);
196
197		ctx->masked = (ret > 0);
198	}
199
200out_unlock:
201	spin_unlock_irqrestore(&vdev->irqlock, flags);
202
203	return ret;
204}
205
206static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
207{
208	struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
209
210	lockdep_assert_held(&vdev->igate);
211
212	if (vfio_pci_intx_unmask_handler(vdev, ctx) > 0)
213		vfio_send_intx_eventfd(vdev, ctx);
214}
215
216void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
217{
218	mutex_lock(&vdev->igate);
219	__vfio_pci_intx_unmask(vdev);
220	mutex_unlock(&vdev->igate);
221}
222
223static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
224{
225	struct vfio_pci_irq_ctx *ctx = dev_id;
226	struct vfio_pci_core_device *vdev = ctx->vdev;
227	unsigned long flags;
228	int ret = IRQ_NONE;
229
230	spin_lock_irqsave(&vdev->irqlock, flags);
231
232	if (!vdev->pci_2_3) {
233		disable_irq_nosync(vdev->pdev->irq);
234		ctx->masked = true;
235		ret = IRQ_HANDLED;
236	} else if (!ctx->masked &&  /* may be shared */
237		   pci_check_and_mask_intx(vdev->pdev)) {
238		ctx->masked = true;
239		ret = IRQ_HANDLED;
240	}
241
242	spin_unlock_irqrestore(&vdev->irqlock, flags);
243
244	if (ret == IRQ_HANDLED)
245		vfio_send_intx_eventfd(vdev, ctx);
246
247	return ret;
248}
249
250static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
251			    struct eventfd_ctx *trigger)
252{
253	struct pci_dev *pdev = vdev->pdev;
254	struct vfio_pci_irq_ctx *ctx;
255	unsigned long irqflags;
256	char *name;
257	int ret;
258
259	if (!is_irq_none(vdev))
260		return -EINVAL;
261
262	if (!pdev->irq)
263		return -ENODEV;
264
265	name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
266	if (!name)
267		return -ENOMEM;
268
269	ctx = vfio_irq_ctx_alloc(vdev, 0);
270	if (!ctx) {
271		kfree(name);
272		return -ENOMEM;
273	}
274
275	ctx->name = name;
276	ctx->trigger = trigger;
277	ctx->vdev = vdev;
278
279	/*
280	 * Fill the initial masked state based on virq_disabled.  After
281	 * enable, changing the DisINTx bit in vconfig directly changes INTx
282	 * masking.  igate prevents races during setup, once running masked
283	 * is protected via irqlock.
284	 *
285	 * Devices supporting DisINTx also reflect the current mask state in
286	 * the physical DisINTx bit, which is not affected during IRQ setup.
287	 *
288	 * Devices without DisINTx support require an exclusive interrupt.
289	 * IRQ masking is performed at the IRQ chip.  Again, igate protects
290	 * against races during setup and IRQ handlers and irqfds are not
291	 * yet active, therefore masked is stable and can be used to
292	 * conditionally auto-enable the IRQ.
293	 *
294	 * irq_type must be stable while the IRQ handler is registered,
295	 * therefore it must be set before request_irq().
296	 */
297	ctx->masked = vdev->virq_disabled;
298	if (vdev->pci_2_3) {
299		pci_intx(pdev, !ctx->masked);
300		irqflags = IRQF_SHARED;
301	} else {
302		irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
303	}
304
305	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
306
307	ret = request_irq(pdev->irq, vfio_intx_handler,
308			  irqflags, ctx->name, ctx);
309	if (ret) {
310		vdev->irq_type = VFIO_PCI_NUM_IRQS;
311		kfree(name);
312		vfio_irq_ctx_free(vdev, ctx, 0);
313		return ret;
314	}
315
316	return 0;
317}
318
319static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
320				struct eventfd_ctx *trigger)
321{
322	struct pci_dev *pdev = vdev->pdev;
323	struct vfio_pci_irq_ctx *ctx;
324	struct eventfd_ctx *old;
 
 
 
 
 
 
 
 
 
 
 
 
325
326	ctx = vfio_irq_ctx_get(vdev, 0);
327	if (WARN_ON_ONCE(!ctx))
328		return -EINVAL;
 
 
 
 
 
 
 
329
330	old = ctx->trigger;
331
332	WRITE_ONCE(ctx->trigger, trigger);
 
333
334	/* Releasing an old ctx requires synchronizing in-flight users */
335	if (old) {
336		synchronize_irq(pdev->irq);
337		vfio_virqfd_flush_thread(&ctx->unmask);
338		eventfd_ctx_put(old);
 
 
339	}
340
 
 
 
 
 
 
 
 
 
341	return 0;
342}
343
344static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
345{
346	struct pci_dev *pdev = vdev->pdev;
347	struct vfio_pci_irq_ctx *ctx;
348
349	ctx = vfio_irq_ctx_get(vdev, 0);
350	WARN_ON_ONCE(!ctx);
351	if (ctx) {
352		vfio_virqfd_disable(&ctx->unmask);
353		vfio_virqfd_disable(&ctx->mask);
354		free_irq(pdev->irq, ctx);
355		if (ctx->trigger)
356			eventfd_ctx_put(ctx->trigger);
357		kfree(ctx->name);
358		vfio_irq_ctx_free(vdev, ctx, 0);
359	}
360	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 
 
361}
362
363/*
364 * MSI/MSI-X
365 */
366static irqreturn_t vfio_msihandler(int irq, void *arg)
367{
368	struct eventfd_ctx *trigger = arg;
369
370	eventfd_signal(trigger);
371	return IRQ_HANDLED;
372}
373
374static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msix)
375{
376	struct pci_dev *pdev = vdev->pdev;
377	unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI;
378	int ret;
379	u16 cmd;
380
381	if (!is_irq_none(vdev))
382		return -EINVAL;
383
384	/* return the number of supported vectors if we can't get all: */
385	cmd = vfio_pci_memory_lock_and_enable(vdev);
386	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
387	if (ret < nvec) {
388		if (ret > 0)
389			pci_free_irq_vectors(pdev);
390		vfio_pci_memory_unlock_and_restore(vdev, cmd);
391		return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392	}
393	vfio_pci_memory_unlock_and_restore(vdev, cmd);
394
 
395	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
396				VFIO_PCI_MSI_IRQ_INDEX;
397
398	if (!msix) {
399		/*
400		 * Compute the virtual hardware field for max msi vectors -
401		 * it is the log base 2 of the number of vectors.
402		 */
403		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
404	}
405
406	return 0;
407}
408
409/*
410 * vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
411 * interrupt vector. If a Linux IRQ number is not available then a new
412 * interrupt is allocated if dynamic MSI-X is supported.
413 *
414 * Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
415 * essentially forming a cache that subsequent allocations can draw from.
416 * Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
417 * disabled.
418 */
419static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
420			      unsigned int vector, bool msix)
421{
422	struct pci_dev *pdev = vdev->pdev;
423	struct msi_map map;
424	int irq;
425	u16 cmd;
426
427	irq = pci_irq_vector(pdev, vector);
428	if (WARN_ON_ONCE(irq == 0))
429		return -EINVAL;
430	if (irq > 0 || !msix || !vdev->has_dyn_msix)
431		return irq;
432
433	cmd = vfio_pci_memory_lock_and_enable(vdev);
434	map = pci_msix_alloc_irq_at(pdev, vector, NULL);
435	vfio_pci_memory_unlock_and_restore(vdev, cmd);
436
437	return map.index < 0 ? map.index : map.virq;
438}
439
440static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
441				      unsigned int vector, int fd, bool msix)
442{
443	struct pci_dev *pdev = vdev->pdev;
444	struct vfio_pci_irq_ctx *ctx;
445	struct eventfd_ctx *trigger;
446	int irq = -EINVAL, ret;
447	u16 cmd;
448
449	ctx = vfio_irq_ctx_get(vdev, vector);
450
451	if (ctx) {
452		irq_bypass_unregister_producer(&ctx->producer);
453		irq = pci_irq_vector(pdev, vector);
454		cmd = vfio_pci_memory_lock_and_enable(vdev);
455		free_irq(irq, ctx->trigger);
456		vfio_pci_memory_unlock_and_restore(vdev, cmd);
457		/* Interrupt stays allocated, will be freed at MSI-X disable. */
458		kfree(ctx->name);
459		eventfd_ctx_put(ctx->trigger);
460		vfio_irq_ctx_free(vdev, ctx, vector);
461	}
462
463	if (fd < 0)
464		return 0;
465
466	if (irq == -EINVAL) {
467		/* Interrupt stays allocated, will be freed at MSI-X disable. */
468		irq = vfio_msi_alloc_irq(vdev, vector, msix);
469		if (irq < 0)
470			return irq;
471	}
472
473	ctx = vfio_irq_ctx_alloc(vdev, vector);
474	if (!ctx)
475		return -ENOMEM;
476
477	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
478			      msix ? "x" : "", vector, pci_name(pdev));
479	if (!ctx->name) {
480		ret = -ENOMEM;
481		goto out_free_ctx;
482	}
483
484	trigger = eventfd_ctx_fdget(fd);
485	if (IS_ERR(trigger)) {
486		ret = PTR_ERR(trigger);
487		goto out_free_name;
488	}
489
490	/*
491	 * If the vector was previously allocated, refresh the on-device
492	 * message data before enabling in case it had been cleared or
493	 * corrupted (e.g. due to backdoor resets) since writing.
 
 
494	 */
495	cmd = vfio_pci_memory_lock_and_enable(vdev);
496	if (msix) {
497		struct msi_msg msg;
498
499		get_cached_msi_msg(irq, &msg);
500		pci_write_msi_msg(irq, &msg);
501	}
502
503	ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
504	vfio_pci_memory_unlock_and_restore(vdev, cmd);
505	if (ret)
506		goto out_put_eventfd_ctx;
507
508	ctx->producer.token = trigger;
509	ctx->producer.irq = irq;
510	ret = irq_bypass_register_producer(&ctx->producer);
511	if (unlikely(ret)) {
 
 
 
512		dev_info(&pdev->dev,
513		"irq bypass producer (token %p) registration fails: %d\n",
514		ctx->producer.token, ret);
515
516		ctx->producer.token = NULL;
517	}
518	ctx->trigger = trigger;
519
520	return 0;
521
522out_put_eventfd_ctx:
523	eventfd_ctx_put(trigger);
524out_free_name:
525	kfree(ctx->name);
526out_free_ctx:
527	vfio_irq_ctx_free(vdev, ctx, vector);
528	return ret;
529}
530
531static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
532			      unsigned count, int32_t *fds, bool msix)
533{
534	unsigned int i, j;
535	int ret = 0;
 
 
536
537	for (i = 0, j = start; i < count && !ret; i++, j++) {
538		int fd = fds ? fds[i] : -1;
539		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
540	}
541
542	if (ret) {
543		for (i = start; i < j; i++)
544			vfio_msi_set_vector_signal(vdev, i, -1, msix);
545	}
546
547	return ret;
548}
549
550static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
551{
552	struct pci_dev *pdev = vdev->pdev;
553	struct vfio_pci_irq_ctx *ctx;
554	unsigned long i;
555	u16 cmd;
556
557	xa_for_each(&vdev->ctx, i, ctx) {
558		vfio_virqfd_disable(&ctx->unmask);
559		vfio_virqfd_disable(&ctx->mask);
560		vfio_msi_set_vector_signal(vdev, i, -1, msix);
561	}
562
563	cmd = vfio_pci_memory_lock_and_enable(vdev);
564	pci_free_irq_vectors(pdev);
565	vfio_pci_memory_unlock_and_restore(vdev, cmd);
566
567	/*
568	 * Both disable paths above use pci_intx_for_msi() to clear DisINTx
569	 * via their shutdown paths.  Restore for NoINTx devices.
570	 */
571	if (vdev->nointx)
572		pci_intx(pdev, 0);
573
574	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 
 
575}
576
577/*
578 * IOCTL support
579 */
580static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
581				    unsigned index, unsigned start,
582				    unsigned count, uint32_t flags, void *data)
583{
584	if (!is_intx(vdev) || start != 0 || count != 1)
585		return -EINVAL;
586
587	if (flags & VFIO_IRQ_SET_DATA_NONE) {
588		__vfio_pci_intx_unmask(vdev);
589	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
590		uint8_t unmask = *(uint8_t *)data;
591		if (unmask)
592			__vfio_pci_intx_unmask(vdev);
593	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
594		struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
595		int32_t fd = *(int32_t *)data;
596
597		if (WARN_ON_ONCE(!ctx))
598			return -EINVAL;
599		if (fd >= 0)
600			return vfio_virqfd_enable((void *) vdev,
601						  vfio_pci_intx_unmask_handler,
602						  vfio_send_intx_eventfd, ctx,
603						  &ctx->unmask, fd);
604
605		vfio_virqfd_disable(&ctx->unmask);
606	}
607
608	return 0;
609}
610
611static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
612				  unsigned index, unsigned start,
613				  unsigned count, uint32_t flags, void *data)
614{
615	if (!is_intx(vdev) || start != 0 || count != 1)
616		return -EINVAL;
617
618	if (flags & VFIO_IRQ_SET_DATA_NONE) {
619		__vfio_pci_intx_mask(vdev);
620	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
621		uint8_t mask = *(uint8_t *)data;
622		if (mask)
623			__vfio_pci_intx_mask(vdev);
624	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
625		return -ENOTTY; /* XXX implement me */
626	}
627
628	return 0;
629}
630
631static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
632				     unsigned index, unsigned start,
633				     unsigned count, uint32_t flags, void *data)
634{
635	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
636		vfio_intx_disable(vdev);
637		return 0;
638	}
639
640	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
641		return -EINVAL;
642
643	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
644		struct eventfd_ctx *trigger = NULL;
645		int32_t fd = *(int32_t *)data;
646		int ret;
647
648		if (fd >= 0) {
649			trigger = eventfd_ctx_fdget(fd);
650			if (IS_ERR(trigger))
651				return PTR_ERR(trigger);
652		}
653
654		if (is_intx(vdev))
655			ret = vfio_intx_set_signal(vdev, trigger);
656		else
657			ret = vfio_intx_enable(vdev, trigger);
658
659		if (ret && trigger)
660			eventfd_ctx_put(trigger);
 
661
662		return ret;
663	}
664
665	if (!is_intx(vdev))
666		return -EINVAL;
667
668	if (flags & VFIO_IRQ_SET_DATA_NONE) {
669		vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0));
670	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
671		uint8_t trigger = *(uint8_t *)data;
672		if (trigger)
673			vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0));
674	}
675	return 0;
676}
677
678static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
679				    unsigned index, unsigned start,
680				    unsigned count, uint32_t flags, void *data)
681{
682	struct vfio_pci_irq_ctx *ctx;
683	unsigned int i;
684	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
685
686	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
687		vfio_msi_disable(vdev, msix);
688		return 0;
689	}
690
691	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
692		return -EINVAL;
693
694	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
695		int32_t *fds = data;
696		int ret;
697
698		if (vdev->irq_type == index)
699			return vfio_msi_set_block(vdev, start, count,
700						  fds, msix);
701
702		ret = vfio_msi_enable(vdev, start + count, msix);
703		if (ret)
704			return ret;
705
706		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
707		if (ret)
708			vfio_msi_disable(vdev, msix);
709
710		return ret;
711	}
712
713	if (!irq_is(vdev, index))
714		return -EINVAL;
715
716	for (i = start; i < start + count; i++) {
717		ctx = vfio_irq_ctx_get(vdev, i);
718		if (!ctx)
719			continue;
720		if (flags & VFIO_IRQ_SET_DATA_NONE) {
721			eventfd_signal(ctx->trigger);
722		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
723			uint8_t *bools = data;
724			if (bools[i - start])
725				eventfd_signal(ctx->trigger);
726		}
727	}
728	return 0;
729}
730
731static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
732					   unsigned int count, uint32_t flags,
733					   void *data)
734{
 
 
 
 
 
735	/* DATA_NONE/DATA_BOOL enables loopback testing */
736	if (flags & VFIO_IRQ_SET_DATA_NONE) {
737		if (*ctx) {
738			if (count) {
739				eventfd_signal(*ctx);
740			} else {
741				eventfd_ctx_put(*ctx);
742				*ctx = NULL;
743			}
744			return 0;
745		}
746	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
747		uint8_t trigger;
748
749		if (!count)
750			return -EINVAL;
751
752		trigger = *(uint8_t *)data;
753		if (trigger && *ctx)
754			eventfd_signal(*ctx);
 
 
755
 
 
 
 
 
756		return 0;
757	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
758		int32_t fd;
759
760		if (!count)
761			return -EINVAL;
762
763		fd = *(int32_t *)data;
764		if (fd == -1) {
765			if (*ctx)
766				eventfd_ctx_put(*ctx);
767			*ctx = NULL;
768		} else if (fd >= 0) {
769			struct eventfd_ctx *efdctx;
770
771			efdctx = eventfd_ctx_fdget(fd);
772			if (IS_ERR(efdctx))
773				return PTR_ERR(efdctx);
774
775			if (*ctx)
776				eventfd_ctx_put(*ctx);
777
778			*ctx = efdctx;
779		}
780		return 0;
781	}
782
783	return -EINVAL;
784}
785
786static int vfio_pci_set_err_trigger(struct vfio_pci_core_device *vdev,
787				    unsigned index, unsigned start,
788				    unsigned count, uint32_t flags, void *data)
789{
790	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
791		return -EINVAL;
792
793	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
794					       count, flags, data);
 
 
 
 
795}
796
797static int vfio_pci_set_req_trigger(struct vfio_pci_core_device *vdev,
798				    unsigned index, unsigned start,
799				    unsigned count, uint32_t flags, void *data)
800{
801	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
802		return -EINVAL;
803
804	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
805					       count, flags, data);
806}
807
808int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
809			    unsigned index, unsigned start, unsigned count,
810			    void *data)
811{
812	int (*func)(struct vfio_pci_core_device *vdev, unsigned index,
813		    unsigned start, unsigned count, uint32_t flags,
814		    void *data) = NULL;
815
816	switch (index) {
817	case VFIO_PCI_INTX_IRQ_INDEX:
818		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
819		case VFIO_IRQ_SET_ACTION_MASK:
820			func = vfio_pci_set_intx_mask;
821			break;
822		case VFIO_IRQ_SET_ACTION_UNMASK:
823			func = vfio_pci_set_intx_unmask;
824			break;
825		case VFIO_IRQ_SET_ACTION_TRIGGER:
826			func = vfio_pci_set_intx_trigger;
827			break;
828		}
829		break;
830	case VFIO_PCI_MSI_IRQ_INDEX:
831	case VFIO_PCI_MSIX_IRQ_INDEX:
832		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
833		case VFIO_IRQ_SET_ACTION_MASK:
834		case VFIO_IRQ_SET_ACTION_UNMASK:
835			/* XXX Need masking support exported */
836			break;
837		case VFIO_IRQ_SET_ACTION_TRIGGER:
838			func = vfio_pci_set_msi_trigger;
839			break;
840		}
841		break;
842	case VFIO_PCI_ERR_IRQ_INDEX:
843		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
844		case VFIO_IRQ_SET_ACTION_TRIGGER:
845			if (pci_is_pcie(vdev->pdev))
846				func = vfio_pci_set_err_trigger;
847			break;
848		}
849		break;
850	case VFIO_PCI_REQ_IRQ_INDEX:
851		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
852		case VFIO_IRQ_SET_ACTION_TRIGGER:
853			func = vfio_pci_set_req_trigger;
854			break;
855		}
856		break;
857	}
858
859	if (!func)
860		return -ENOTTY;
861
862	return func(vdev, index, start, count, flags, data);
863}