Loading...
1/*
2 * Xen event channels
3 *
4 * Xen models interrupts with abstract event channels. Because each
5 * domain gets 1024 event channels, but NR_IRQ is not that large, we
6 * must dynamically map irqs<->event channels. The event channels
7 * interface with the rest of the kernel by defining a xen interrupt
8 * chip. When an event is received, it is mapped to an irq and sent
9 * through the normal interrupt processing path.
10 *
11 * There are four kinds of events which can be mapped to an event
12 * channel:
13 *
14 * 1. Inter-domain notifications. This includes all the virtual
15 * device events, since they're driven by front-ends in another domain
16 * (typically dom0).
17 * 2. VIRQs, typically used for timers. These are per-cpu events.
18 * 3. IPIs.
19 * 4. PIRQs - Hardware interrupts.
20 *
21 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
22 */
23
24#include <linux/linkage.h>
25#include <linux/interrupt.h>
26#include <linux/irq.h>
27#include <linux/module.h>
28#include <linux/string.h>
29#include <linux/bootmem.h>
30#include <linux/slab.h>
31#include <linux/irqnr.h>
32#include <linux/pci.h>
33
34#include <asm/desc.h>
35#include <asm/ptrace.h>
36#include <asm/irq.h>
37#include <asm/idle.h>
38#include <asm/io_apic.h>
39#include <asm/sync_bitops.h>
40#include <asm/xen/pci.h>
41#include <asm/xen/hypercall.h>
42#include <asm/xen/hypervisor.h>
43
44#include <xen/xen.h>
45#include <xen/hvm.h>
46#include <xen/xen-ops.h>
47#include <xen/events.h>
48#include <xen/interface/xen.h>
49#include <xen/interface/event_channel.h>
50#include <xen/interface/hvm/hvm_op.h>
51#include <xen/interface/hvm/params.h>
52
53/*
54 * This lock protects updates to the following mapping and reference-count
55 * arrays. The lock does not need to be acquired to read the mapping tables.
56 */
57static DEFINE_MUTEX(irq_mapping_update_lock);
58
59static LIST_HEAD(xen_irq_list_head);
60
61/* IRQ <-> VIRQ mapping. */
62static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
63
64/* IRQ <-> IPI mapping */
65static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
66
67/* Interrupt types. */
68enum xen_irq_type {
69 IRQT_UNBOUND = 0,
70 IRQT_PIRQ,
71 IRQT_VIRQ,
72 IRQT_IPI,
73 IRQT_EVTCHN
74};
75
76/*
77 * Packed IRQ information:
78 * type - enum xen_irq_type
79 * event channel - irq->event channel mapping
80 * cpu - cpu this event channel is bound to
81 * index - type-specific information:
82 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
83 * guest, or GSI (real passthrough IRQ) of the device.
84 * VIRQ - virq number
85 * IPI - IPI vector
86 * EVTCHN -
87 */
88struct irq_info
89{
90 struct list_head list;
91 enum xen_irq_type type; /* type */
92 unsigned irq;
93 unsigned short evtchn; /* event channel */
94 unsigned short cpu; /* cpu bound */
95
96 union {
97 unsigned short virq;
98 enum ipi_vector ipi;
99 struct {
100 unsigned short pirq;
101 unsigned short gsi;
102 unsigned char vector;
103 unsigned char flags;
104 uint16_t domid;
105 } pirq;
106 } u;
107};
108#define PIRQ_NEEDS_EOI (1 << 0)
109#define PIRQ_SHAREABLE (1 << 1)
110
111static int *evtchn_to_irq;
112
113static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
114 cpu_evtchn_mask);
115
116/* Xen will never allocate port zero for any purpose. */
117#define VALID_EVTCHN(chn) ((chn) != 0)
118
119static struct irq_chip xen_dynamic_chip;
120static struct irq_chip xen_percpu_chip;
121static struct irq_chip xen_pirq_chip;
122static void enable_dynirq(struct irq_data *data);
123static void disable_dynirq(struct irq_data *data);
124
125/* Get info for IRQ */
126static struct irq_info *info_for_irq(unsigned irq)
127{
128 return irq_get_handler_data(irq);
129}
130
131/* Constructors for packed IRQ information. */
132static void xen_irq_info_common_init(struct irq_info *info,
133 unsigned irq,
134 enum xen_irq_type type,
135 unsigned short evtchn,
136 unsigned short cpu)
137{
138
139 BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
140
141 info->type = type;
142 info->irq = irq;
143 info->evtchn = evtchn;
144 info->cpu = cpu;
145
146 evtchn_to_irq[evtchn] = irq;
147}
148
149static void xen_irq_info_evtchn_init(unsigned irq,
150 unsigned short evtchn)
151{
152 struct irq_info *info = info_for_irq(irq);
153
154 xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
155}
156
157static void xen_irq_info_ipi_init(unsigned cpu,
158 unsigned irq,
159 unsigned short evtchn,
160 enum ipi_vector ipi)
161{
162 struct irq_info *info = info_for_irq(irq);
163
164 xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
165
166 info->u.ipi = ipi;
167
168 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
169}
170
171static void xen_irq_info_virq_init(unsigned cpu,
172 unsigned irq,
173 unsigned short evtchn,
174 unsigned short virq)
175{
176 struct irq_info *info = info_for_irq(irq);
177
178 xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
179
180 info->u.virq = virq;
181
182 per_cpu(virq_to_irq, cpu)[virq] = irq;
183}
184
185static void xen_irq_info_pirq_init(unsigned irq,
186 unsigned short evtchn,
187 unsigned short pirq,
188 unsigned short gsi,
189 unsigned short vector,
190 uint16_t domid,
191 unsigned char flags)
192{
193 struct irq_info *info = info_for_irq(irq);
194
195 xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
196
197 info->u.pirq.pirq = pirq;
198 info->u.pirq.gsi = gsi;
199 info->u.pirq.vector = vector;
200 info->u.pirq.domid = domid;
201 info->u.pirq.flags = flags;
202}
203
204/*
205 * Accessors for packed IRQ information.
206 */
207static unsigned int evtchn_from_irq(unsigned irq)
208{
209 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
210 return 0;
211
212 return info_for_irq(irq)->evtchn;
213}
214
215unsigned irq_from_evtchn(unsigned int evtchn)
216{
217 return evtchn_to_irq[evtchn];
218}
219EXPORT_SYMBOL_GPL(irq_from_evtchn);
220
221static enum ipi_vector ipi_from_irq(unsigned irq)
222{
223 struct irq_info *info = info_for_irq(irq);
224
225 BUG_ON(info == NULL);
226 BUG_ON(info->type != IRQT_IPI);
227
228 return info->u.ipi;
229}
230
231static unsigned virq_from_irq(unsigned irq)
232{
233 struct irq_info *info = info_for_irq(irq);
234
235 BUG_ON(info == NULL);
236 BUG_ON(info->type != IRQT_VIRQ);
237
238 return info->u.virq;
239}
240
241static unsigned pirq_from_irq(unsigned irq)
242{
243 struct irq_info *info = info_for_irq(irq);
244
245 BUG_ON(info == NULL);
246 BUG_ON(info->type != IRQT_PIRQ);
247
248 return info->u.pirq.pirq;
249}
250
251static enum xen_irq_type type_from_irq(unsigned irq)
252{
253 return info_for_irq(irq)->type;
254}
255
256static unsigned cpu_from_irq(unsigned irq)
257{
258 return info_for_irq(irq)->cpu;
259}
260
261static unsigned int cpu_from_evtchn(unsigned int evtchn)
262{
263 int irq = evtchn_to_irq[evtchn];
264 unsigned ret = 0;
265
266 if (irq != -1)
267 ret = cpu_from_irq(irq);
268
269 return ret;
270}
271
272static bool pirq_needs_eoi(unsigned irq)
273{
274 struct irq_info *info = info_for_irq(irq);
275
276 BUG_ON(info->type != IRQT_PIRQ);
277
278 return info->u.pirq.flags & PIRQ_NEEDS_EOI;
279}
280
281static inline unsigned long active_evtchns(unsigned int cpu,
282 struct shared_info *sh,
283 unsigned int idx)
284{
285 return (sh->evtchn_pending[idx] &
286 per_cpu(cpu_evtchn_mask, cpu)[idx] &
287 ~sh->evtchn_mask[idx]);
288}
289
290static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
291{
292 int irq = evtchn_to_irq[chn];
293
294 BUG_ON(irq == -1);
295#ifdef CONFIG_SMP
296 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
297#endif
298
299 clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)));
300 set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
301
302 info_for_irq(irq)->cpu = cpu;
303}
304
305static void init_evtchn_cpu_bindings(void)
306{
307 int i;
308#ifdef CONFIG_SMP
309 struct irq_info *info;
310
311 /* By default all event channels notify CPU#0. */
312 list_for_each_entry(info, &xen_irq_list_head, list) {
313 struct irq_desc *desc = irq_to_desc(info->irq);
314 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
315 }
316#endif
317
318 for_each_possible_cpu(i)
319 memset(per_cpu(cpu_evtchn_mask, i),
320 (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
321}
322
323static inline void clear_evtchn(int port)
324{
325 struct shared_info *s = HYPERVISOR_shared_info;
326 sync_clear_bit(port, &s->evtchn_pending[0]);
327}
328
329static inline void set_evtchn(int port)
330{
331 struct shared_info *s = HYPERVISOR_shared_info;
332 sync_set_bit(port, &s->evtchn_pending[0]);
333}
334
335static inline int test_evtchn(int port)
336{
337 struct shared_info *s = HYPERVISOR_shared_info;
338 return sync_test_bit(port, &s->evtchn_pending[0]);
339}
340
341
342/**
343 * notify_remote_via_irq - send event to remote end of event channel via irq
344 * @irq: irq of event channel to send event to
345 *
346 * Unlike notify_remote_via_evtchn(), this is safe to use across
347 * save/restore. Notifications on a broken connection are silently
348 * dropped.
349 */
350void notify_remote_via_irq(int irq)
351{
352 int evtchn = evtchn_from_irq(irq);
353
354 if (VALID_EVTCHN(evtchn))
355 notify_remote_via_evtchn(evtchn);
356}
357EXPORT_SYMBOL_GPL(notify_remote_via_irq);
358
359static void mask_evtchn(int port)
360{
361 struct shared_info *s = HYPERVISOR_shared_info;
362 sync_set_bit(port, &s->evtchn_mask[0]);
363}
364
365static void unmask_evtchn(int port)
366{
367 struct shared_info *s = HYPERVISOR_shared_info;
368 unsigned int cpu = get_cpu();
369
370 BUG_ON(!irqs_disabled());
371
372 /* Slow path (hypercall) if this is a non-local port. */
373 if (unlikely(cpu != cpu_from_evtchn(port))) {
374 struct evtchn_unmask unmask = { .port = port };
375 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
376 } else {
377 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
378
379 sync_clear_bit(port, &s->evtchn_mask[0]);
380
381 /*
382 * The following is basically the equivalent of
383 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
384 * the interrupt edge' if the channel is masked.
385 */
386 if (sync_test_bit(port, &s->evtchn_pending[0]) &&
387 !sync_test_and_set_bit(port / BITS_PER_LONG,
388 &vcpu_info->evtchn_pending_sel))
389 vcpu_info->evtchn_upcall_pending = 1;
390 }
391
392 put_cpu();
393}
394
395static void xen_irq_init(unsigned irq)
396{
397 struct irq_info *info;
398#ifdef CONFIG_SMP
399 struct irq_desc *desc = irq_to_desc(irq);
400
401 /* By default all event channels notify CPU#0. */
402 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
403#endif
404
405 info = kzalloc(sizeof(*info), GFP_KERNEL);
406 if (info == NULL)
407 panic("Unable to allocate metadata for IRQ%d\n", irq);
408
409 info->type = IRQT_UNBOUND;
410
411 irq_set_handler_data(irq, info);
412
413 list_add_tail(&info->list, &xen_irq_list_head);
414}
415
416static int __must_check xen_allocate_irq_dynamic(void)
417{
418 int first = 0;
419 int irq;
420
421#ifdef CONFIG_X86_IO_APIC
422 /*
423 * For an HVM guest or domain 0 which see "real" (emulated or
424 * actual respectively) GSIs we allocate dynamic IRQs
425 * e.g. those corresponding to event channels or MSIs
426 * etc. from the range above those "real" GSIs to avoid
427 * collisions.
428 */
429 if (xen_initial_domain() || xen_hvm_domain())
430 first = get_nr_irqs_gsi();
431#endif
432
433 irq = irq_alloc_desc_from(first, -1);
434
435 xen_irq_init(irq);
436
437 return irq;
438}
439
440static int __must_check xen_allocate_irq_gsi(unsigned gsi)
441{
442 int irq;
443
444 /*
445 * A PV guest has no concept of a GSI (since it has no ACPI
446 * nor access to/knowledge of the physical APICs). Therefore
447 * all IRQs are dynamically allocated from the entire IRQ
448 * space.
449 */
450 if (xen_pv_domain() && !xen_initial_domain())
451 return xen_allocate_irq_dynamic();
452
453 /* Legacy IRQ descriptors are already allocated by the arch. */
454 if (gsi < NR_IRQS_LEGACY)
455 irq = gsi;
456 else
457 irq = irq_alloc_desc_at(gsi, -1);
458
459 xen_irq_init(irq);
460
461 return irq;
462}
463
464static void xen_free_irq(unsigned irq)
465{
466 struct irq_info *info = irq_get_handler_data(irq);
467
468 list_del(&info->list);
469
470 irq_set_handler_data(irq, NULL);
471
472 kfree(info);
473
474 /* Legacy IRQ descriptors are managed by the arch. */
475 if (irq < NR_IRQS_LEGACY)
476 return;
477
478 irq_free_desc(irq);
479}
480
481static void pirq_query_unmask(int irq)
482{
483 struct physdev_irq_status_query irq_status;
484 struct irq_info *info = info_for_irq(irq);
485
486 BUG_ON(info->type != IRQT_PIRQ);
487
488 irq_status.irq = pirq_from_irq(irq);
489 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
490 irq_status.flags = 0;
491
492 info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
493 if (irq_status.flags & XENIRQSTAT_needs_eoi)
494 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
495}
496
497static bool probing_irq(int irq)
498{
499 struct irq_desc *desc = irq_to_desc(irq);
500
501 return desc && desc->action == NULL;
502}
503
504static void eoi_pirq(struct irq_data *data)
505{
506 int evtchn = evtchn_from_irq(data->irq);
507 struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
508 int rc = 0;
509
510 irq_move_irq(data);
511
512 if (VALID_EVTCHN(evtchn))
513 clear_evtchn(evtchn);
514
515 if (pirq_needs_eoi(data->irq)) {
516 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
517 WARN_ON(rc);
518 }
519}
520
521static void mask_ack_pirq(struct irq_data *data)
522{
523 disable_dynirq(data);
524 eoi_pirq(data);
525}
526
527static unsigned int __startup_pirq(unsigned int irq)
528{
529 struct evtchn_bind_pirq bind_pirq;
530 struct irq_info *info = info_for_irq(irq);
531 int evtchn = evtchn_from_irq(irq);
532 int rc;
533
534 BUG_ON(info->type != IRQT_PIRQ);
535
536 if (VALID_EVTCHN(evtchn))
537 goto out;
538
539 bind_pirq.pirq = pirq_from_irq(irq);
540 /* NB. We are happy to share unless we are probing. */
541 bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
542 BIND_PIRQ__WILL_SHARE : 0;
543 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
544 if (rc != 0) {
545 if (!probing_irq(irq))
546 printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
547 irq);
548 return 0;
549 }
550 evtchn = bind_pirq.port;
551
552 pirq_query_unmask(irq);
553
554 evtchn_to_irq[evtchn] = irq;
555 bind_evtchn_to_cpu(evtchn, 0);
556 info->evtchn = evtchn;
557
558out:
559 unmask_evtchn(evtchn);
560 eoi_pirq(irq_get_irq_data(irq));
561
562 return 0;
563}
564
565static unsigned int startup_pirq(struct irq_data *data)
566{
567 return __startup_pirq(data->irq);
568}
569
570static void shutdown_pirq(struct irq_data *data)
571{
572 struct evtchn_close close;
573 unsigned int irq = data->irq;
574 struct irq_info *info = info_for_irq(irq);
575 int evtchn = evtchn_from_irq(irq);
576
577 BUG_ON(info->type != IRQT_PIRQ);
578
579 if (!VALID_EVTCHN(evtchn))
580 return;
581
582 mask_evtchn(evtchn);
583
584 close.port = evtchn;
585 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
586 BUG();
587
588 bind_evtchn_to_cpu(evtchn, 0);
589 evtchn_to_irq[evtchn] = -1;
590 info->evtchn = 0;
591}
592
593static void enable_pirq(struct irq_data *data)
594{
595 startup_pirq(data);
596}
597
598static void disable_pirq(struct irq_data *data)
599{
600 disable_dynirq(data);
601}
602
603static int find_irq_by_gsi(unsigned gsi)
604{
605 struct irq_info *info;
606
607 list_for_each_entry(info, &xen_irq_list_head, list) {
608 if (info->type != IRQT_PIRQ)
609 continue;
610
611 if (info->u.pirq.gsi == gsi)
612 return info->irq;
613 }
614
615 return -1;
616}
617
618/*
619 * Do not make any assumptions regarding the relationship between the
620 * IRQ number returned here and the Xen pirq argument.
621 *
622 * Note: We don't assign an event channel until the irq actually started
623 * up. Return an existing irq if we've already got one for the gsi.
624 *
625 * Shareable implies level triggered, not shareable implies edge
626 * triggered here.
627 */
628int xen_bind_pirq_gsi_to_irq(unsigned gsi,
629 unsigned pirq, int shareable, char *name)
630{
631 int irq = -1;
632 struct physdev_irq irq_op;
633
634 mutex_lock(&irq_mapping_update_lock);
635
636 irq = find_irq_by_gsi(gsi);
637 if (irq != -1) {
638 printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
639 irq, gsi);
640 goto out; /* XXX need refcount? */
641 }
642
643 irq = xen_allocate_irq_gsi(gsi);
644 if (irq < 0)
645 goto out;
646
647 irq_op.irq = irq;
648 irq_op.vector = 0;
649
650 /* Only the privileged domain can do this. For non-priv, the pcifront
651 * driver provides a PCI bus that does the call to do exactly
652 * this in the priv domain. */
653 if (xen_initial_domain() &&
654 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
655 xen_free_irq(irq);
656 irq = -ENOSPC;
657 goto out;
658 }
659
660 xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
661 shareable ? PIRQ_SHAREABLE : 0);
662
663 pirq_query_unmask(irq);
664 /* We try to use the handler with the appropriate semantic for the
665 * type of interrupt: if the interrupt is an edge triggered
666 * interrupt we use handle_edge_irq.
667 *
668 * On the other hand if the interrupt is level triggered we use
669 * handle_fasteoi_irq like the native code does for this kind of
670 * interrupts.
671 *
672 * Depending on the Xen version, pirq_needs_eoi might return true
673 * not only for level triggered interrupts but for edge triggered
674 * interrupts too. In any case Xen always honors the eoi mechanism,
675 * not injecting any more pirqs of the same kind if the first one
676 * hasn't received an eoi yet. Therefore using the fasteoi handler
677 * is the right choice either way.
678 */
679 if (shareable)
680 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
681 handle_fasteoi_irq, name);
682 else
683 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
684 handle_edge_irq, name);
685
686out:
687 mutex_unlock(&irq_mapping_update_lock);
688
689 return irq;
690}
691
692#ifdef CONFIG_PCI_MSI
693int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
694{
695 int rc;
696 struct physdev_get_free_pirq op_get_free_pirq;
697
698 op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
699 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
700
701 WARN_ONCE(rc == -ENOSYS,
702 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
703
704 return rc ? -1 : op_get_free_pirq.pirq;
705}
706
707int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
708 int pirq, int vector, const char *name,
709 domid_t domid)
710{
711 int irq, ret;
712
713 mutex_lock(&irq_mapping_update_lock);
714
715 irq = xen_allocate_irq_dynamic();
716 if (irq == -1)
717 goto out;
718
719 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
720 name);
721
722 xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
723 ret = irq_set_msi_desc(irq, msidesc);
724 if (ret < 0)
725 goto error_irq;
726out:
727 mutex_unlock(&irq_mapping_update_lock);
728 return irq;
729error_irq:
730 mutex_unlock(&irq_mapping_update_lock);
731 xen_free_irq(irq);
732 return -1;
733}
734#endif
735
736int xen_destroy_irq(int irq)
737{
738 struct irq_desc *desc;
739 struct physdev_unmap_pirq unmap_irq;
740 struct irq_info *info = info_for_irq(irq);
741 int rc = -ENOENT;
742
743 mutex_lock(&irq_mapping_update_lock);
744
745 desc = irq_to_desc(irq);
746 if (!desc)
747 goto out;
748
749 if (xen_initial_domain()) {
750 unmap_irq.pirq = info->u.pirq.pirq;
751 unmap_irq.domid = info->u.pirq.domid;
752 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
753 /* If another domain quits without making the pci_disable_msix
754 * call, the Xen hypervisor takes care of freeing the PIRQs
755 * (free_domain_pirqs).
756 */
757 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
758 printk(KERN_INFO "domain %d does not have %d anymore\n",
759 info->u.pirq.domid, info->u.pirq.pirq);
760 else if (rc) {
761 printk(KERN_WARNING "unmap irq failed %d\n", rc);
762 goto out;
763 }
764 }
765
766 xen_free_irq(irq);
767
768out:
769 mutex_unlock(&irq_mapping_update_lock);
770 return rc;
771}
772
773int xen_irq_from_pirq(unsigned pirq)
774{
775 int irq;
776
777 struct irq_info *info;
778
779 mutex_lock(&irq_mapping_update_lock);
780
781 list_for_each_entry(info, &xen_irq_list_head, list) {
782 if (info == NULL || info->type != IRQT_PIRQ)
783 continue;
784 irq = info->irq;
785 if (info->u.pirq.pirq == pirq)
786 goto out;
787 }
788 irq = -1;
789out:
790 mutex_unlock(&irq_mapping_update_lock);
791
792 return irq;
793}
794
795
796int xen_pirq_from_irq(unsigned irq)
797{
798 return pirq_from_irq(irq);
799}
800EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
801int bind_evtchn_to_irq(unsigned int evtchn)
802{
803 int irq;
804
805 mutex_lock(&irq_mapping_update_lock);
806
807 irq = evtchn_to_irq[evtchn];
808
809 if (irq == -1) {
810 irq = xen_allocate_irq_dynamic();
811 if (irq == -1)
812 goto out;
813
814 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
815 handle_edge_irq, "event");
816
817 xen_irq_info_evtchn_init(irq, evtchn);
818 }
819
820out:
821 mutex_unlock(&irq_mapping_update_lock);
822
823 return irq;
824}
825EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
826
827static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
828{
829 struct evtchn_bind_ipi bind_ipi;
830 int evtchn, irq;
831
832 mutex_lock(&irq_mapping_update_lock);
833
834 irq = per_cpu(ipi_to_irq, cpu)[ipi];
835
836 if (irq == -1) {
837 irq = xen_allocate_irq_dynamic();
838 if (irq < 0)
839 goto out;
840
841 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
842 handle_percpu_irq, "ipi");
843
844 bind_ipi.vcpu = cpu;
845 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
846 &bind_ipi) != 0)
847 BUG();
848 evtchn = bind_ipi.port;
849
850 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
851
852 bind_evtchn_to_cpu(evtchn, cpu);
853 }
854
855 out:
856 mutex_unlock(&irq_mapping_update_lock);
857 return irq;
858}
859
860static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
861 unsigned int remote_port)
862{
863 struct evtchn_bind_interdomain bind_interdomain;
864 int err;
865
866 bind_interdomain.remote_dom = remote_domain;
867 bind_interdomain.remote_port = remote_port;
868
869 err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
870 &bind_interdomain);
871
872 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
873}
874
875
876int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
877{
878 struct evtchn_bind_virq bind_virq;
879 int evtchn, irq;
880
881 mutex_lock(&irq_mapping_update_lock);
882
883 irq = per_cpu(virq_to_irq, cpu)[virq];
884
885 if (irq == -1) {
886 irq = xen_allocate_irq_dynamic();
887 if (irq == -1)
888 goto out;
889
890 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
891 handle_percpu_irq, "virq");
892
893 bind_virq.virq = virq;
894 bind_virq.vcpu = cpu;
895 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
896 &bind_virq) != 0)
897 BUG();
898 evtchn = bind_virq.port;
899
900 xen_irq_info_virq_init(cpu, irq, evtchn, virq);
901
902 bind_evtchn_to_cpu(evtchn, cpu);
903 }
904
905out:
906 mutex_unlock(&irq_mapping_update_lock);
907
908 return irq;
909}
910
911static void unbind_from_irq(unsigned int irq)
912{
913 struct evtchn_close close;
914 int evtchn = evtchn_from_irq(irq);
915
916 mutex_lock(&irq_mapping_update_lock);
917
918 if (VALID_EVTCHN(evtchn)) {
919 close.port = evtchn;
920 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
921 BUG();
922
923 switch (type_from_irq(irq)) {
924 case IRQT_VIRQ:
925 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
926 [virq_from_irq(irq)] = -1;
927 break;
928 case IRQT_IPI:
929 per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
930 [ipi_from_irq(irq)] = -1;
931 break;
932 default:
933 break;
934 }
935
936 /* Closed ports are implicitly re-bound to VCPU0. */
937 bind_evtchn_to_cpu(evtchn, 0);
938
939 evtchn_to_irq[evtchn] = -1;
940 }
941
942 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
943
944 xen_free_irq(irq);
945
946 mutex_unlock(&irq_mapping_update_lock);
947}
948
949int bind_evtchn_to_irqhandler(unsigned int evtchn,
950 irq_handler_t handler,
951 unsigned long irqflags,
952 const char *devname, void *dev_id)
953{
954 int irq, retval;
955
956 irq = bind_evtchn_to_irq(evtchn);
957 if (irq < 0)
958 return irq;
959 retval = request_irq(irq, handler, irqflags, devname, dev_id);
960 if (retval != 0) {
961 unbind_from_irq(irq);
962 return retval;
963 }
964
965 return irq;
966}
967EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
968
969int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
970 unsigned int remote_port,
971 irq_handler_t handler,
972 unsigned long irqflags,
973 const char *devname,
974 void *dev_id)
975{
976 int irq, retval;
977
978 irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
979 if (irq < 0)
980 return irq;
981
982 retval = request_irq(irq, handler, irqflags, devname, dev_id);
983 if (retval != 0) {
984 unbind_from_irq(irq);
985 return retval;
986 }
987
988 return irq;
989}
990EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
991
992int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
993 irq_handler_t handler,
994 unsigned long irqflags, const char *devname, void *dev_id)
995{
996 int irq, retval;
997
998 irq = bind_virq_to_irq(virq, cpu);
999 if (irq < 0)
1000 return irq;
1001 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1002 if (retval != 0) {
1003 unbind_from_irq(irq);
1004 return retval;
1005 }
1006
1007 return irq;
1008}
1009EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1010
1011int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1012 unsigned int cpu,
1013 irq_handler_t handler,
1014 unsigned long irqflags,
1015 const char *devname,
1016 void *dev_id)
1017{
1018 int irq, retval;
1019
1020 irq = bind_ipi_to_irq(ipi, cpu);
1021 if (irq < 0)
1022 return irq;
1023
1024 irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
1025 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1026 if (retval != 0) {
1027 unbind_from_irq(irq);
1028 return retval;
1029 }
1030
1031 return irq;
1032}
1033
1034void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1035{
1036 free_irq(irq, dev_id);
1037 unbind_from_irq(irq);
1038}
1039EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1040
1041void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1042{
1043 int irq = per_cpu(ipi_to_irq, cpu)[vector];
1044 BUG_ON(irq < 0);
1045 notify_remote_via_irq(irq);
1046}
1047
1048irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
1049{
1050 struct shared_info *sh = HYPERVISOR_shared_info;
1051 int cpu = smp_processor_id();
1052 unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
1053 int i;
1054 unsigned long flags;
1055 static DEFINE_SPINLOCK(debug_lock);
1056 struct vcpu_info *v;
1057
1058 spin_lock_irqsave(&debug_lock, flags);
1059
1060 printk("\nvcpu %d\n ", cpu);
1061
1062 for_each_online_cpu(i) {
1063 int pending;
1064 v = per_cpu(xen_vcpu, i);
1065 pending = (get_irq_regs() && i == cpu)
1066 ? xen_irqs_disabled(get_irq_regs())
1067 : v->evtchn_upcall_mask;
1068 printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i,
1069 pending, v->evtchn_upcall_pending,
1070 (int)(sizeof(v->evtchn_pending_sel)*2),
1071 v->evtchn_pending_sel);
1072 }
1073 v = per_cpu(xen_vcpu, cpu);
1074
1075 printk("\npending:\n ");
1076 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
1077 printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2,
1078 sh->evtchn_pending[i],
1079 i % 8 == 0 ? "\n " : " ");
1080 printk("\nglobal mask:\n ");
1081 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1082 printk("%0*lx%s",
1083 (int)(sizeof(sh->evtchn_mask[0])*2),
1084 sh->evtchn_mask[i],
1085 i % 8 == 0 ? "\n " : " ");
1086
1087 printk("\nglobally unmasked:\n ");
1088 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1089 printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
1090 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
1091 i % 8 == 0 ? "\n " : " ");
1092
1093 printk("\nlocal cpu%d mask:\n ", cpu);
1094 for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--)
1095 printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
1096 cpu_evtchn[i],
1097 i % 8 == 0 ? "\n " : " ");
1098
1099 printk("\nlocally unmasked:\n ");
1100 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
1101 unsigned long pending = sh->evtchn_pending[i]
1102 & ~sh->evtchn_mask[i]
1103 & cpu_evtchn[i];
1104 printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
1105 pending, i % 8 == 0 ? "\n " : " ");
1106 }
1107
1108 printk("\npending list:\n");
1109 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1110 if (sync_test_bit(i, sh->evtchn_pending)) {
1111 int word_idx = i / BITS_PER_LONG;
1112 printk(" %d: event %d -> irq %d%s%s%s\n",
1113 cpu_from_evtchn(i), i,
1114 evtchn_to_irq[i],
1115 sync_test_bit(word_idx, &v->evtchn_pending_sel)
1116 ? "" : " l2-clear",
1117 !sync_test_bit(i, sh->evtchn_mask)
1118 ? "" : " globally-masked",
1119 sync_test_bit(i, cpu_evtchn)
1120 ? "" : " locally-masked");
1121 }
1122 }
1123
1124 spin_unlock_irqrestore(&debug_lock, flags);
1125
1126 return IRQ_HANDLED;
1127}
1128
1129static DEFINE_PER_CPU(unsigned, xed_nesting_count);
1130static DEFINE_PER_CPU(unsigned int, current_word_idx);
1131static DEFINE_PER_CPU(unsigned int, current_bit_idx);
1132
1133/*
1134 * Mask out the i least significant bits of w
1135 */
1136#define MASK_LSBS(w, i) (w & ((~0UL) << i))
1137
1138/*
1139 * Search the CPUs pending events bitmasks. For each one found, map
1140 * the event number to an irq, and feed it into do_IRQ() for
1141 * handling.
1142 *
1143 * Xen uses a two-level bitmap to speed searching. The first level is
1144 * a bitset of words which contain pending event bits. The second
1145 * level is a bitset of pending events themselves.
1146 */
1147static void __xen_evtchn_do_upcall(void)
1148{
1149 int start_word_idx, start_bit_idx;
1150 int word_idx, bit_idx;
1151 int i;
1152 int cpu = get_cpu();
1153 struct shared_info *s = HYPERVISOR_shared_info;
1154 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1155 unsigned count;
1156
1157 do {
1158 unsigned long pending_words;
1159
1160 vcpu_info->evtchn_upcall_pending = 0;
1161
1162 if (__this_cpu_inc_return(xed_nesting_count) - 1)
1163 goto out;
1164
1165#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
1166 /* Clear master flag /before/ clearing selector flag. */
1167 wmb();
1168#endif
1169 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
1170
1171 start_word_idx = __this_cpu_read(current_word_idx);
1172 start_bit_idx = __this_cpu_read(current_bit_idx);
1173
1174 word_idx = start_word_idx;
1175
1176 for (i = 0; pending_words != 0; i++) {
1177 unsigned long pending_bits;
1178 unsigned long words;
1179
1180 words = MASK_LSBS(pending_words, word_idx);
1181
1182 /*
1183 * If we masked out all events, wrap to beginning.
1184 */
1185 if (words == 0) {
1186 word_idx = 0;
1187 bit_idx = 0;
1188 continue;
1189 }
1190 word_idx = __ffs(words);
1191
1192 pending_bits = active_evtchns(cpu, s, word_idx);
1193 bit_idx = 0; /* usually scan entire word from start */
1194 if (word_idx == start_word_idx) {
1195 /* We scan the starting word in two parts */
1196 if (i == 0)
1197 /* 1st time: start in the middle */
1198 bit_idx = start_bit_idx;
1199 else
1200 /* 2nd time: mask bits done already */
1201 bit_idx &= (1UL << start_bit_idx) - 1;
1202 }
1203
1204 do {
1205 unsigned long bits;
1206 int port, irq;
1207 struct irq_desc *desc;
1208
1209 bits = MASK_LSBS(pending_bits, bit_idx);
1210
1211 /* If we masked out all events, move on. */
1212 if (bits == 0)
1213 break;
1214
1215 bit_idx = __ffs(bits);
1216
1217 /* Process port. */
1218 port = (word_idx * BITS_PER_LONG) + bit_idx;
1219 irq = evtchn_to_irq[port];
1220
1221 if (irq != -1) {
1222 desc = irq_to_desc(irq);
1223 if (desc)
1224 generic_handle_irq_desc(irq, desc);
1225 }
1226
1227 bit_idx = (bit_idx + 1) % BITS_PER_LONG;
1228
1229 /* Next caller starts at last processed + 1 */
1230 __this_cpu_write(current_word_idx,
1231 bit_idx ? word_idx :
1232 (word_idx+1) % BITS_PER_LONG);
1233 __this_cpu_write(current_bit_idx, bit_idx);
1234 } while (bit_idx != 0);
1235
1236 /* Scan start_l1i twice; all others once. */
1237 if ((word_idx != start_word_idx) || (i != 0))
1238 pending_words &= ~(1UL << word_idx);
1239
1240 word_idx = (word_idx + 1) % BITS_PER_LONG;
1241 }
1242
1243 BUG_ON(!irqs_disabled());
1244
1245 count = __this_cpu_read(xed_nesting_count);
1246 __this_cpu_write(xed_nesting_count, 0);
1247 } while (count != 1 || vcpu_info->evtchn_upcall_pending);
1248
1249out:
1250
1251 put_cpu();
1252}
1253
1254void xen_evtchn_do_upcall(struct pt_regs *regs)
1255{
1256 struct pt_regs *old_regs = set_irq_regs(regs);
1257
1258 exit_idle();
1259 irq_enter();
1260
1261 __xen_evtchn_do_upcall();
1262
1263 irq_exit();
1264 set_irq_regs(old_regs);
1265}
1266
1267void xen_hvm_evtchn_do_upcall(void)
1268{
1269 __xen_evtchn_do_upcall();
1270}
1271EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1272
1273/* Rebind a new event channel to an existing irq. */
1274void rebind_evtchn_irq(int evtchn, int irq)
1275{
1276 struct irq_info *info = info_for_irq(irq);
1277
1278 /* Make sure the irq is masked, since the new event channel
1279 will also be masked. */
1280 disable_irq(irq);
1281
1282 mutex_lock(&irq_mapping_update_lock);
1283
1284 /* After resume the irq<->evtchn mappings are all cleared out */
1285 BUG_ON(evtchn_to_irq[evtchn] != -1);
1286 /* Expect irq to have been bound before,
1287 so there should be a proper type */
1288 BUG_ON(info->type == IRQT_UNBOUND);
1289
1290 xen_irq_info_evtchn_init(irq, evtchn);
1291
1292 mutex_unlock(&irq_mapping_update_lock);
1293
1294 /* new event channels are always bound to cpu 0 */
1295 irq_set_affinity(irq, cpumask_of(0));
1296
1297 /* Unmask the event channel. */
1298 enable_irq(irq);
1299}
1300
1301/* Rebind an evtchn so that it gets delivered to a specific cpu */
1302static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1303{
1304 struct evtchn_bind_vcpu bind_vcpu;
1305 int evtchn = evtchn_from_irq(irq);
1306
1307 if (!VALID_EVTCHN(evtchn))
1308 return -1;
1309
1310 /*
1311 * Events delivered via platform PCI interrupts are always
1312 * routed to vcpu 0 and hence cannot be rebound.
1313 */
1314 if (xen_hvm_domain() && !xen_have_vector_callback)
1315 return -1;
1316
1317 /* Send future instances of this interrupt to other vcpu. */
1318 bind_vcpu.port = evtchn;
1319 bind_vcpu.vcpu = tcpu;
1320
1321 /*
1322 * If this fails, it usually just indicates that we're dealing with a
1323 * virq or IPI channel, which don't actually need to be rebound. Ignore
1324 * it, but don't do the xenlinux-level rebind in that case.
1325 */
1326 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1327 bind_evtchn_to_cpu(evtchn, tcpu);
1328
1329 return 0;
1330}
1331
1332static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1333 bool force)
1334{
1335 unsigned tcpu = cpumask_first(dest);
1336
1337 return rebind_irq_to_cpu(data->irq, tcpu);
1338}
1339
1340int resend_irq_on_evtchn(unsigned int irq)
1341{
1342 int masked, evtchn = evtchn_from_irq(irq);
1343 struct shared_info *s = HYPERVISOR_shared_info;
1344
1345 if (!VALID_EVTCHN(evtchn))
1346 return 1;
1347
1348 masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
1349 sync_set_bit(evtchn, s->evtchn_pending);
1350 if (!masked)
1351 unmask_evtchn(evtchn);
1352
1353 return 1;
1354}
1355
1356static void enable_dynirq(struct irq_data *data)
1357{
1358 int evtchn = evtchn_from_irq(data->irq);
1359
1360 if (VALID_EVTCHN(evtchn))
1361 unmask_evtchn(evtchn);
1362}
1363
1364static void disable_dynirq(struct irq_data *data)
1365{
1366 int evtchn = evtchn_from_irq(data->irq);
1367
1368 if (VALID_EVTCHN(evtchn))
1369 mask_evtchn(evtchn);
1370}
1371
1372static void ack_dynirq(struct irq_data *data)
1373{
1374 int evtchn = evtchn_from_irq(data->irq);
1375
1376 irq_move_irq(data);
1377
1378 if (VALID_EVTCHN(evtchn))
1379 clear_evtchn(evtchn);
1380}
1381
1382static void mask_ack_dynirq(struct irq_data *data)
1383{
1384 disable_dynirq(data);
1385 ack_dynirq(data);
1386}
1387
1388static int retrigger_dynirq(struct irq_data *data)
1389{
1390 int evtchn = evtchn_from_irq(data->irq);
1391 struct shared_info *sh = HYPERVISOR_shared_info;
1392 int ret = 0;
1393
1394 if (VALID_EVTCHN(evtchn)) {
1395 int masked;
1396
1397 masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
1398 sync_set_bit(evtchn, sh->evtchn_pending);
1399 if (!masked)
1400 unmask_evtchn(evtchn);
1401 ret = 1;
1402 }
1403
1404 return ret;
1405}
1406
1407static void restore_pirqs(void)
1408{
1409 int pirq, rc, irq, gsi;
1410 struct physdev_map_pirq map_irq;
1411 struct irq_info *info;
1412
1413 list_for_each_entry(info, &xen_irq_list_head, list) {
1414 if (info->type != IRQT_PIRQ)
1415 continue;
1416
1417 pirq = info->u.pirq.pirq;
1418 gsi = info->u.pirq.gsi;
1419 irq = info->irq;
1420
1421 /* save/restore of PT devices doesn't work, so at this point the
1422 * only devices present are GSI based emulated devices */
1423 if (!gsi)
1424 continue;
1425
1426 map_irq.domid = DOMID_SELF;
1427 map_irq.type = MAP_PIRQ_TYPE_GSI;
1428 map_irq.index = gsi;
1429 map_irq.pirq = pirq;
1430
1431 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1432 if (rc) {
1433 printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1434 gsi, irq, pirq, rc);
1435 xen_free_irq(irq);
1436 continue;
1437 }
1438
1439 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1440
1441 __startup_pirq(irq);
1442 }
1443}
1444
1445static void restore_cpu_virqs(unsigned int cpu)
1446{
1447 struct evtchn_bind_virq bind_virq;
1448 int virq, irq, evtchn;
1449
1450 for (virq = 0; virq < NR_VIRQS; virq++) {
1451 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1452 continue;
1453
1454 BUG_ON(virq_from_irq(irq) != virq);
1455
1456 /* Get a new binding from Xen. */
1457 bind_virq.virq = virq;
1458 bind_virq.vcpu = cpu;
1459 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1460 &bind_virq) != 0)
1461 BUG();
1462 evtchn = bind_virq.port;
1463
1464 /* Record the new mapping. */
1465 xen_irq_info_virq_init(cpu, irq, evtchn, virq);
1466 bind_evtchn_to_cpu(evtchn, cpu);
1467 }
1468}
1469
1470static void restore_cpu_ipis(unsigned int cpu)
1471{
1472 struct evtchn_bind_ipi bind_ipi;
1473 int ipi, irq, evtchn;
1474
1475 for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
1476 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
1477 continue;
1478
1479 BUG_ON(ipi_from_irq(irq) != ipi);
1480
1481 /* Get a new binding from Xen. */
1482 bind_ipi.vcpu = cpu;
1483 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1484 &bind_ipi) != 0)
1485 BUG();
1486 evtchn = bind_ipi.port;
1487
1488 /* Record the new mapping. */
1489 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
1490 bind_evtchn_to_cpu(evtchn, cpu);
1491 }
1492}
1493
1494/* Clear an irq's pending state, in preparation for polling on it */
1495void xen_clear_irq_pending(int irq)
1496{
1497 int evtchn = evtchn_from_irq(irq);
1498
1499 if (VALID_EVTCHN(evtchn))
1500 clear_evtchn(evtchn);
1501}
1502EXPORT_SYMBOL(xen_clear_irq_pending);
1503void xen_set_irq_pending(int irq)
1504{
1505 int evtchn = evtchn_from_irq(irq);
1506
1507 if (VALID_EVTCHN(evtchn))
1508 set_evtchn(evtchn);
1509}
1510
1511bool xen_test_irq_pending(int irq)
1512{
1513 int evtchn = evtchn_from_irq(irq);
1514 bool ret = false;
1515
1516 if (VALID_EVTCHN(evtchn))
1517 ret = test_evtchn(evtchn);
1518
1519 return ret;
1520}
1521
1522/* Poll waiting for an irq to become pending with timeout. In the usual case,
1523 * the irq will be disabled so it won't deliver an interrupt. */
1524void xen_poll_irq_timeout(int irq, u64 timeout)
1525{
1526 evtchn_port_t evtchn = evtchn_from_irq(irq);
1527
1528 if (VALID_EVTCHN(evtchn)) {
1529 struct sched_poll poll;
1530
1531 poll.nr_ports = 1;
1532 poll.timeout = timeout;
1533 set_xen_guest_handle(poll.ports, &evtchn);
1534
1535 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
1536 BUG();
1537 }
1538}
1539EXPORT_SYMBOL(xen_poll_irq_timeout);
1540/* Poll waiting for an irq to become pending. In the usual case, the
1541 * irq will be disabled so it won't deliver an interrupt. */
1542void xen_poll_irq(int irq)
1543{
1544 xen_poll_irq_timeout(irq, 0 /* no timeout */);
1545}
1546
1547/* Check whether the IRQ line is shared with other guests. */
1548int xen_test_irq_shared(int irq)
1549{
1550 struct irq_info *info = info_for_irq(irq);
1551 struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
1552
1553 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
1554 return 0;
1555 return !(irq_status.flags & XENIRQSTAT_shared);
1556}
1557EXPORT_SYMBOL_GPL(xen_test_irq_shared);
1558
1559void xen_irq_resume(void)
1560{
1561 unsigned int cpu, evtchn;
1562 struct irq_info *info;
1563
1564 init_evtchn_cpu_bindings();
1565
1566 /* New event-channel space is not 'live' yet. */
1567 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
1568 mask_evtchn(evtchn);
1569
1570 /* No IRQ <-> event-channel mappings. */
1571 list_for_each_entry(info, &xen_irq_list_head, list)
1572 info->evtchn = 0; /* zap event-channel binding */
1573
1574 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
1575 evtchn_to_irq[evtchn] = -1;
1576
1577 for_each_possible_cpu(cpu) {
1578 restore_cpu_virqs(cpu);
1579 restore_cpu_ipis(cpu);
1580 }
1581
1582 restore_pirqs();
1583}
1584
1585static struct irq_chip xen_dynamic_chip __read_mostly = {
1586 .name = "xen-dyn",
1587
1588 .irq_disable = disable_dynirq,
1589 .irq_mask = disable_dynirq,
1590 .irq_unmask = enable_dynirq,
1591
1592 .irq_ack = ack_dynirq,
1593 .irq_mask_ack = mask_ack_dynirq,
1594
1595 .irq_set_affinity = set_affinity_irq,
1596 .irq_retrigger = retrigger_dynirq,
1597};
1598
1599static struct irq_chip xen_pirq_chip __read_mostly = {
1600 .name = "xen-pirq",
1601
1602 .irq_startup = startup_pirq,
1603 .irq_shutdown = shutdown_pirq,
1604 .irq_enable = enable_pirq,
1605 .irq_disable = disable_pirq,
1606
1607 .irq_mask = disable_dynirq,
1608 .irq_unmask = enable_dynirq,
1609
1610 .irq_ack = eoi_pirq,
1611 .irq_eoi = eoi_pirq,
1612 .irq_mask_ack = mask_ack_pirq,
1613
1614 .irq_set_affinity = set_affinity_irq,
1615
1616 .irq_retrigger = retrigger_dynirq,
1617};
1618
1619static struct irq_chip xen_percpu_chip __read_mostly = {
1620 .name = "xen-percpu",
1621
1622 .irq_disable = disable_dynirq,
1623 .irq_mask = disable_dynirq,
1624 .irq_unmask = enable_dynirq,
1625
1626 .irq_ack = ack_dynirq,
1627};
1628
1629int xen_set_callback_via(uint64_t via)
1630{
1631 struct xen_hvm_param a;
1632 a.domid = DOMID_SELF;
1633 a.index = HVM_PARAM_CALLBACK_IRQ;
1634 a.value = via;
1635 return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
1636}
1637EXPORT_SYMBOL_GPL(xen_set_callback_via);
1638
1639#ifdef CONFIG_XEN_PVHVM
1640/* Vector callbacks are better than PCI interrupts to receive event
1641 * channel notifications because we can receive vector callbacks on any
1642 * vcpu and we don't need PCI support or APIC interactions. */
1643void xen_callback_vector(void)
1644{
1645 int rc;
1646 uint64_t callback_via;
1647 if (xen_have_vector_callback) {
1648 callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
1649 rc = xen_set_callback_via(callback_via);
1650 if (rc) {
1651 printk(KERN_ERR "Request for Xen HVM callback vector"
1652 " failed.\n");
1653 xen_have_vector_callback = 0;
1654 return;
1655 }
1656 printk(KERN_INFO "Xen HVM callback vector for event delivery is "
1657 "enabled\n");
1658 /* in the restore case the vector has already been allocated */
1659 if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors))
1660 alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
1661 }
1662}
1663#else
1664void xen_callback_vector(void) {}
1665#endif
1666
1667void __init xen_init_IRQ(void)
1668{
1669 int i;
1670
1671 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
1672 GFP_KERNEL);
1673 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1674 evtchn_to_irq[i] = -1;
1675
1676 init_evtchn_cpu_bindings();
1677
1678 /* No event channels are 'live' right now. */
1679 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1680 mask_evtchn(i);
1681
1682 if (xen_hvm_domain()) {
1683 xen_callback_vector();
1684 native_init_IRQ();
1685 /* pci_xen_hvm_init must be called after native_init_IRQ so that
1686 * __acpi_register_gsi can point at the right function */
1687 pci_xen_hvm_init();
1688 } else {
1689 irq_ctx_init(smp_processor_id());
1690 if (xen_initial_domain())
1691 pci_xen_initial_domain();
1692 }
1693}
1/*
2 * Xen event channels
3 *
4 * Xen models interrupts with abstract event channels. Because each
5 * domain gets 1024 event channels, but NR_IRQ is not that large, we
6 * must dynamically map irqs<->event channels. The event channels
7 * interface with the rest of the kernel by defining a xen interrupt
8 * chip. When an event is received, it is mapped to an irq and sent
9 * through the normal interrupt processing path.
10 *
11 * There are four kinds of events which can be mapped to an event
12 * channel:
13 *
14 * 1. Inter-domain notifications. This includes all the virtual
15 * device events, since they're driven by front-ends in another domain
16 * (typically dom0).
17 * 2. VIRQs, typically used for timers. These are per-cpu events.
18 * 3. IPIs.
19 * 4. PIRQs - Hardware interrupts.
20 *
21 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
22 */
23
24#include <linux/linkage.h>
25#include <linux/interrupt.h>
26#include <linux/irq.h>
27#include <linux/module.h>
28#include <linux/string.h>
29#include <linux/bootmem.h>
30#include <linux/slab.h>
31#include <linux/irqnr.h>
32#include <linux/pci.h>
33
34#include <asm/desc.h>
35#include <asm/ptrace.h>
36#include <asm/irq.h>
37#include <asm/idle.h>
38#include <asm/io_apic.h>
39#include <asm/sync_bitops.h>
40#include <asm/xen/page.h>
41#include <asm/xen/pci.h>
42#include <asm/xen/hypercall.h>
43#include <asm/xen/hypervisor.h>
44
45#include <xen/xen.h>
46#include <xen/hvm.h>
47#include <xen/xen-ops.h>
48#include <xen/events.h>
49#include <xen/interface/xen.h>
50#include <xen/interface/event_channel.h>
51#include <xen/interface/hvm/hvm_op.h>
52#include <xen/interface/hvm/params.h>
53
54/*
55 * This lock protects updates to the following mapping and reference-count
56 * arrays. The lock does not need to be acquired to read the mapping tables.
57 */
58static DEFINE_MUTEX(irq_mapping_update_lock);
59
60static LIST_HEAD(xen_irq_list_head);
61
62/* IRQ <-> VIRQ mapping. */
63static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
64
65/* IRQ <-> IPI mapping */
66static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
67
68/* Interrupt types. */
69enum xen_irq_type {
70 IRQT_UNBOUND = 0,
71 IRQT_PIRQ,
72 IRQT_VIRQ,
73 IRQT_IPI,
74 IRQT_EVTCHN
75};
76
77/*
78 * Packed IRQ information:
79 * type - enum xen_irq_type
80 * event channel - irq->event channel mapping
81 * cpu - cpu this event channel is bound to
82 * index - type-specific information:
83 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
84 * guest, or GSI (real passthrough IRQ) of the device.
85 * VIRQ - virq number
86 * IPI - IPI vector
87 * EVTCHN -
88 */
89struct irq_info {
90 struct list_head list;
91 int refcnt;
92 enum xen_irq_type type; /* type */
93 unsigned irq;
94 unsigned short evtchn; /* event channel */
95 unsigned short cpu; /* cpu bound */
96
97 union {
98 unsigned short virq;
99 enum ipi_vector ipi;
100 struct {
101 unsigned short pirq;
102 unsigned short gsi;
103 unsigned char vector;
104 unsigned char flags;
105 uint16_t domid;
106 } pirq;
107 } u;
108};
109#define PIRQ_NEEDS_EOI (1 << 0)
110#define PIRQ_SHAREABLE (1 << 1)
111
112static int *evtchn_to_irq;
113static unsigned long *pirq_eoi_map;
114static bool (*pirq_needs_eoi)(unsigned irq);
115
116static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
117 cpu_evtchn_mask);
118
119/* Xen will never allocate port zero for any purpose. */
120#define VALID_EVTCHN(chn) ((chn) != 0)
121
122static struct irq_chip xen_dynamic_chip;
123static struct irq_chip xen_percpu_chip;
124static struct irq_chip xen_pirq_chip;
125static void enable_dynirq(struct irq_data *data);
126static void disable_dynirq(struct irq_data *data);
127
128/* Get info for IRQ */
129static struct irq_info *info_for_irq(unsigned irq)
130{
131 return irq_get_handler_data(irq);
132}
133
134/* Constructors for packed IRQ information. */
135static void xen_irq_info_common_init(struct irq_info *info,
136 unsigned irq,
137 enum xen_irq_type type,
138 unsigned short evtchn,
139 unsigned short cpu)
140{
141
142 BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
143
144 info->type = type;
145 info->irq = irq;
146 info->evtchn = evtchn;
147 info->cpu = cpu;
148
149 evtchn_to_irq[evtchn] = irq;
150}
151
152static void xen_irq_info_evtchn_init(unsigned irq,
153 unsigned short evtchn)
154{
155 struct irq_info *info = info_for_irq(irq);
156
157 xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
158}
159
160static void xen_irq_info_ipi_init(unsigned cpu,
161 unsigned irq,
162 unsigned short evtchn,
163 enum ipi_vector ipi)
164{
165 struct irq_info *info = info_for_irq(irq);
166
167 xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
168
169 info->u.ipi = ipi;
170
171 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
172}
173
174static void xen_irq_info_virq_init(unsigned cpu,
175 unsigned irq,
176 unsigned short evtchn,
177 unsigned short virq)
178{
179 struct irq_info *info = info_for_irq(irq);
180
181 xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
182
183 info->u.virq = virq;
184
185 per_cpu(virq_to_irq, cpu)[virq] = irq;
186}
187
188static void xen_irq_info_pirq_init(unsigned irq,
189 unsigned short evtchn,
190 unsigned short pirq,
191 unsigned short gsi,
192 unsigned short vector,
193 uint16_t domid,
194 unsigned char flags)
195{
196 struct irq_info *info = info_for_irq(irq);
197
198 xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
199
200 info->u.pirq.pirq = pirq;
201 info->u.pirq.gsi = gsi;
202 info->u.pirq.vector = vector;
203 info->u.pirq.domid = domid;
204 info->u.pirq.flags = flags;
205}
206
207/*
208 * Accessors for packed IRQ information.
209 */
210static unsigned int evtchn_from_irq(unsigned irq)
211{
212 if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
213 return 0;
214
215 return info_for_irq(irq)->evtchn;
216}
217
218unsigned irq_from_evtchn(unsigned int evtchn)
219{
220 return evtchn_to_irq[evtchn];
221}
222EXPORT_SYMBOL_GPL(irq_from_evtchn);
223
224static enum ipi_vector ipi_from_irq(unsigned irq)
225{
226 struct irq_info *info = info_for_irq(irq);
227
228 BUG_ON(info == NULL);
229 BUG_ON(info->type != IRQT_IPI);
230
231 return info->u.ipi;
232}
233
234static unsigned virq_from_irq(unsigned irq)
235{
236 struct irq_info *info = info_for_irq(irq);
237
238 BUG_ON(info == NULL);
239 BUG_ON(info->type != IRQT_VIRQ);
240
241 return info->u.virq;
242}
243
244static unsigned pirq_from_irq(unsigned irq)
245{
246 struct irq_info *info = info_for_irq(irq);
247
248 BUG_ON(info == NULL);
249 BUG_ON(info->type != IRQT_PIRQ);
250
251 return info->u.pirq.pirq;
252}
253
254static enum xen_irq_type type_from_irq(unsigned irq)
255{
256 return info_for_irq(irq)->type;
257}
258
259static unsigned cpu_from_irq(unsigned irq)
260{
261 return info_for_irq(irq)->cpu;
262}
263
264static unsigned int cpu_from_evtchn(unsigned int evtchn)
265{
266 int irq = evtchn_to_irq[evtchn];
267 unsigned ret = 0;
268
269 if (irq != -1)
270 ret = cpu_from_irq(irq);
271
272 return ret;
273}
274
275static bool pirq_check_eoi_map(unsigned irq)
276{
277 return test_bit(pirq_from_irq(irq), pirq_eoi_map);
278}
279
280static bool pirq_needs_eoi_flag(unsigned irq)
281{
282 struct irq_info *info = info_for_irq(irq);
283 BUG_ON(info->type != IRQT_PIRQ);
284
285 return info->u.pirq.flags & PIRQ_NEEDS_EOI;
286}
287
288static inline unsigned long active_evtchns(unsigned int cpu,
289 struct shared_info *sh,
290 unsigned int idx)
291{
292 return sh->evtchn_pending[idx] &
293 per_cpu(cpu_evtchn_mask, cpu)[idx] &
294 ~sh->evtchn_mask[idx];
295}
296
297static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
298{
299 int irq = evtchn_to_irq[chn];
300
301 BUG_ON(irq == -1);
302#ifdef CONFIG_SMP
303 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
304#endif
305
306 clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)));
307 set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
308
309 info_for_irq(irq)->cpu = cpu;
310}
311
312static void init_evtchn_cpu_bindings(void)
313{
314 int i;
315#ifdef CONFIG_SMP
316 struct irq_info *info;
317
318 /* By default all event channels notify CPU#0. */
319 list_for_each_entry(info, &xen_irq_list_head, list) {
320 struct irq_desc *desc = irq_to_desc(info->irq);
321 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
322 }
323#endif
324
325 for_each_possible_cpu(i)
326 memset(per_cpu(cpu_evtchn_mask, i),
327 (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
328}
329
330static inline void clear_evtchn(int port)
331{
332 struct shared_info *s = HYPERVISOR_shared_info;
333 sync_clear_bit(port, &s->evtchn_pending[0]);
334}
335
336static inline void set_evtchn(int port)
337{
338 struct shared_info *s = HYPERVISOR_shared_info;
339 sync_set_bit(port, &s->evtchn_pending[0]);
340}
341
342static inline int test_evtchn(int port)
343{
344 struct shared_info *s = HYPERVISOR_shared_info;
345 return sync_test_bit(port, &s->evtchn_pending[0]);
346}
347
348
349/**
350 * notify_remote_via_irq - send event to remote end of event channel via irq
351 * @irq: irq of event channel to send event to
352 *
353 * Unlike notify_remote_via_evtchn(), this is safe to use across
354 * save/restore. Notifications on a broken connection are silently
355 * dropped.
356 */
357void notify_remote_via_irq(int irq)
358{
359 int evtchn = evtchn_from_irq(irq);
360
361 if (VALID_EVTCHN(evtchn))
362 notify_remote_via_evtchn(evtchn);
363}
364EXPORT_SYMBOL_GPL(notify_remote_via_irq);
365
366static void mask_evtchn(int port)
367{
368 struct shared_info *s = HYPERVISOR_shared_info;
369 sync_set_bit(port, &s->evtchn_mask[0]);
370}
371
372static void unmask_evtchn(int port)
373{
374 struct shared_info *s = HYPERVISOR_shared_info;
375 unsigned int cpu = get_cpu();
376
377 BUG_ON(!irqs_disabled());
378
379 /* Slow path (hypercall) if this is a non-local port. */
380 if (unlikely(cpu != cpu_from_evtchn(port))) {
381 struct evtchn_unmask unmask = { .port = port };
382 (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
383 } else {
384 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
385
386 sync_clear_bit(port, &s->evtchn_mask[0]);
387
388 /*
389 * The following is basically the equivalent of
390 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
391 * the interrupt edge' if the channel is masked.
392 */
393 if (sync_test_bit(port, &s->evtchn_pending[0]) &&
394 !sync_test_and_set_bit(port / BITS_PER_LONG,
395 &vcpu_info->evtchn_pending_sel))
396 vcpu_info->evtchn_upcall_pending = 1;
397 }
398
399 put_cpu();
400}
401
402static void xen_irq_init(unsigned irq)
403{
404 struct irq_info *info;
405#ifdef CONFIG_SMP
406 struct irq_desc *desc = irq_to_desc(irq);
407
408 /* By default all event channels notify CPU#0. */
409 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
410#endif
411
412 info = kzalloc(sizeof(*info), GFP_KERNEL);
413 if (info == NULL)
414 panic("Unable to allocate metadata for IRQ%d\n", irq);
415
416 info->type = IRQT_UNBOUND;
417 info->refcnt = -1;
418
419 irq_set_handler_data(irq, info);
420
421 list_add_tail(&info->list, &xen_irq_list_head);
422}
423
424static int __must_check xen_allocate_irq_dynamic(void)
425{
426 int first = 0;
427 int irq;
428
429#ifdef CONFIG_X86_IO_APIC
430 /*
431 * For an HVM guest or domain 0 which see "real" (emulated or
432 * actual respectively) GSIs we allocate dynamic IRQs
433 * e.g. those corresponding to event channels or MSIs
434 * etc. from the range above those "real" GSIs to avoid
435 * collisions.
436 */
437 if (xen_initial_domain() || xen_hvm_domain())
438 first = get_nr_irqs_gsi();
439#endif
440
441 irq = irq_alloc_desc_from(first, -1);
442
443 if (irq >= 0)
444 xen_irq_init(irq);
445
446 return irq;
447}
448
449static int __must_check xen_allocate_irq_gsi(unsigned gsi)
450{
451 int irq;
452
453 /*
454 * A PV guest has no concept of a GSI (since it has no ACPI
455 * nor access to/knowledge of the physical APICs). Therefore
456 * all IRQs are dynamically allocated from the entire IRQ
457 * space.
458 */
459 if (xen_pv_domain() && !xen_initial_domain())
460 return xen_allocate_irq_dynamic();
461
462 /* Legacy IRQ descriptors are already allocated by the arch. */
463 if (gsi < NR_IRQS_LEGACY)
464 irq = gsi;
465 else
466 irq = irq_alloc_desc_at(gsi, -1);
467
468 xen_irq_init(irq);
469
470 return irq;
471}
472
473static void xen_free_irq(unsigned irq)
474{
475 struct irq_info *info = irq_get_handler_data(irq);
476
477 list_del(&info->list);
478
479 irq_set_handler_data(irq, NULL);
480
481 WARN_ON(info->refcnt > 0);
482
483 kfree(info);
484
485 /* Legacy IRQ descriptors are managed by the arch. */
486 if (irq < NR_IRQS_LEGACY)
487 return;
488
489 irq_free_desc(irq);
490}
491
492static void pirq_query_unmask(int irq)
493{
494 struct physdev_irq_status_query irq_status;
495 struct irq_info *info = info_for_irq(irq);
496
497 BUG_ON(info->type != IRQT_PIRQ);
498
499 irq_status.irq = pirq_from_irq(irq);
500 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
501 irq_status.flags = 0;
502
503 info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
504 if (irq_status.flags & XENIRQSTAT_needs_eoi)
505 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
506}
507
508static bool probing_irq(int irq)
509{
510 struct irq_desc *desc = irq_to_desc(irq);
511
512 return desc && desc->action == NULL;
513}
514
515static void eoi_pirq(struct irq_data *data)
516{
517 int evtchn = evtchn_from_irq(data->irq);
518 struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
519 int rc = 0;
520
521 irq_move_irq(data);
522
523 if (VALID_EVTCHN(evtchn))
524 clear_evtchn(evtchn);
525
526 if (pirq_needs_eoi(data->irq)) {
527 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
528 WARN_ON(rc);
529 }
530}
531
532static void mask_ack_pirq(struct irq_data *data)
533{
534 disable_dynirq(data);
535 eoi_pirq(data);
536}
537
538static unsigned int __startup_pirq(unsigned int irq)
539{
540 struct evtchn_bind_pirq bind_pirq;
541 struct irq_info *info = info_for_irq(irq);
542 int evtchn = evtchn_from_irq(irq);
543 int rc;
544
545 BUG_ON(info->type != IRQT_PIRQ);
546
547 if (VALID_EVTCHN(evtchn))
548 goto out;
549
550 bind_pirq.pirq = pirq_from_irq(irq);
551 /* NB. We are happy to share unless we are probing. */
552 bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
553 BIND_PIRQ__WILL_SHARE : 0;
554 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
555 if (rc != 0) {
556 if (!probing_irq(irq))
557 printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
558 irq);
559 return 0;
560 }
561 evtchn = bind_pirq.port;
562
563 pirq_query_unmask(irq);
564
565 evtchn_to_irq[evtchn] = irq;
566 bind_evtchn_to_cpu(evtchn, 0);
567 info->evtchn = evtchn;
568
569out:
570 unmask_evtchn(evtchn);
571 eoi_pirq(irq_get_irq_data(irq));
572
573 return 0;
574}
575
576static unsigned int startup_pirq(struct irq_data *data)
577{
578 return __startup_pirq(data->irq);
579}
580
581static void shutdown_pirq(struct irq_data *data)
582{
583 struct evtchn_close close;
584 unsigned int irq = data->irq;
585 struct irq_info *info = info_for_irq(irq);
586 int evtchn = evtchn_from_irq(irq);
587
588 BUG_ON(info->type != IRQT_PIRQ);
589
590 if (!VALID_EVTCHN(evtchn))
591 return;
592
593 mask_evtchn(evtchn);
594
595 close.port = evtchn;
596 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
597 BUG();
598
599 bind_evtchn_to_cpu(evtchn, 0);
600 evtchn_to_irq[evtchn] = -1;
601 info->evtchn = 0;
602}
603
604static void enable_pirq(struct irq_data *data)
605{
606 startup_pirq(data);
607}
608
609static void disable_pirq(struct irq_data *data)
610{
611 disable_dynirq(data);
612}
613
614int xen_irq_from_gsi(unsigned gsi)
615{
616 struct irq_info *info;
617
618 list_for_each_entry(info, &xen_irq_list_head, list) {
619 if (info->type != IRQT_PIRQ)
620 continue;
621
622 if (info->u.pirq.gsi == gsi)
623 return info->irq;
624 }
625
626 return -1;
627}
628EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
629
630/*
631 * Do not make any assumptions regarding the relationship between the
632 * IRQ number returned here and the Xen pirq argument.
633 *
634 * Note: We don't assign an event channel until the irq actually started
635 * up. Return an existing irq if we've already got one for the gsi.
636 *
637 * Shareable implies level triggered, not shareable implies edge
638 * triggered here.
639 */
640int xen_bind_pirq_gsi_to_irq(unsigned gsi,
641 unsigned pirq, int shareable, char *name)
642{
643 int irq = -1;
644 struct physdev_irq irq_op;
645
646 mutex_lock(&irq_mapping_update_lock);
647
648 irq = xen_irq_from_gsi(gsi);
649 if (irq != -1) {
650 printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
651 irq, gsi);
652 goto out;
653 }
654
655 irq = xen_allocate_irq_gsi(gsi);
656 if (irq < 0)
657 goto out;
658
659 irq_op.irq = irq;
660 irq_op.vector = 0;
661
662 /* Only the privileged domain can do this. For non-priv, the pcifront
663 * driver provides a PCI bus that does the call to do exactly
664 * this in the priv domain. */
665 if (xen_initial_domain() &&
666 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
667 xen_free_irq(irq);
668 irq = -ENOSPC;
669 goto out;
670 }
671
672 xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
673 shareable ? PIRQ_SHAREABLE : 0);
674
675 pirq_query_unmask(irq);
676 /* We try to use the handler with the appropriate semantic for the
677 * type of interrupt: if the interrupt is an edge triggered
678 * interrupt we use handle_edge_irq.
679 *
680 * On the other hand if the interrupt is level triggered we use
681 * handle_fasteoi_irq like the native code does for this kind of
682 * interrupts.
683 *
684 * Depending on the Xen version, pirq_needs_eoi might return true
685 * not only for level triggered interrupts but for edge triggered
686 * interrupts too. In any case Xen always honors the eoi mechanism,
687 * not injecting any more pirqs of the same kind if the first one
688 * hasn't received an eoi yet. Therefore using the fasteoi handler
689 * is the right choice either way.
690 */
691 if (shareable)
692 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
693 handle_fasteoi_irq, name);
694 else
695 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
696 handle_edge_irq, name);
697
698out:
699 mutex_unlock(&irq_mapping_update_lock);
700
701 return irq;
702}
703
704#ifdef CONFIG_PCI_MSI
705int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
706{
707 int rc;
708 struct physdev_get_free_pirq op_get_free_pirq;
709
710 op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
711 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
712
713 WARN_ONCE(rc == -ENOSYS,
714 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
715
716 return rc ? -1 : op_get_free_pirq.pirq;
717}
718
719int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
720 int pirq, int vector, const char *name,
721 domid_t domid)
722{
723 int irq, ret;
724
725 mutex_lock(&irq_mapping_update_lock);
726
727 irq = xen_allocate_irq_dynamic();
728 if (irq < 0)
729 goto out;
730
731 irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
732 name);
733
734 xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
735 ret = irq_set_msi_desc(irq, msidesc);
736 if (ret < 0)
737 goto error_irq;
738out:
739 mutex_unlock(&irq_mapping_update_lock);
740 return irq;
741error_irq:
742 mutex_unlock(&irq_mapping_update_lock);
743 xen_free_irq(irq);
744 return ret;
745}
746#endif
747
748int xen_destroy_irq(int irq)
749{
750 struct irq_desc *desc;
751 struct physdev_unmap_pirq unmap_irq;
752 struct irq_info *info = info_for_irq(irq);
753 int rc = -ENOENT;
754
755 mutex_lock(&irq_mapping_update_lock);
756
757 desc = irq_to_desc(irq);
758 if (!desc)
759 goto out;
760
761 if (xen_initial_domain()) {
762 unmap_irq.pirq = info->u.pirq.pirq;
763 unmap_irq.domid = info->u.pirq.domid;
764 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
765 /* If another domain quits without making the pci_disable_msix
766 * call, the Xen hypervisor takes care of freeing the PIRQs
767 * (free_domain_pirqs).
768 */
769 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
770 printk(KERN_INFO "domain %d does not have %d anymore\n",
771 info->u.pirq.domid, info->u.pirq.pirq);
772 else if (rc) {
773 printk(KERN_WARNING "unmap irq failed %d\n", rc);
774 goto out;
775 }
776 }
777
778 xen_free_irq(irq);
779
780out:
781 mutex_unlock(&irq_mapping_update_lock);
782 return rc;
783}
784
785int xen_irq_from_pirq(unsigned pirq)
786{
787 int irq;
788
789 struct irq_info *info;
790
791 mutex_lock(&irq_mapping_update_lock);
792
793 list_for_each_entry(info, &xen_irq_list_head, list) {
794 if (info->type != IRQT_PIRQ)
795 continue;
796 irq = info->irq;
797 if (info->u.pirq.pirq == pirq)
798 goto out;
799 }
800 irq = -1;
801out:
802 mutex_unlock(&irq_mapping_update_lock);
803
804 return irq;
805}
806
807
808int xen_pirq_from_irq(unsigned irq)
809{
810 return pirq_from_irq(irq);
811}
812EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
813int bind_evtchn_to_irq(unsigned int evtchn)
814{
815 int irq;
816
817 mutex_lock(&irq_mapping_update_lock);
818
819 irq = evtchn_to_irq[evtchn];
820
821 if (irq == -1) {
822 irq = xen_allocate_irq_dynamic();
823 if (irq == -1)
824 goto out;
825
826 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
827 handle_edge_irq, "event");
828
829 xen_irq_info_evtchn_init(irq, evtchn);
830 } else {
831 struct irq_info *info = info_for_irq(irq);
832 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
833 }
834
835out:
836 mutex_unlock(&irq_mapping_update_lock);
837
838 return irq;
839}
840EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
841
842static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
843{
844 struct evtchn_bind_ipi bind_ipi;
845 int evtchn, irq;
846
847 mutex_lock(&irq_mapping_update_lock);
848
849 irq = per_cpu(ipi_to_irq, cpu)[ipi];
850
851 if (irq == -1) {
852 irq = xen_allocate_irq_dynamic();
853 if (irq < 0)
854 goto out;
855
856 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
857 handle_percpu_irq, "ipi");
858
859 bind_ipi.vcpu = cpu;
860 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
861 &bind_ipi) != 0)
862 BUG();
863 evtchn = bind_ipi.port;
864
865 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
866
867 bind_evtchn_to_cpu(evtchn, cpu);
868 } else {
869 struct irq_info *info = info_for_irq(irq);
870 WARN_ON(info == NULL || info->type != IRQT_IPI);
871 }
872
873 out:
874 mutex_unlock(&irq_mapping_update_lock);
875 return irq;
876}
877
878static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
879 unsigned int remote_port)
880{
881 struct evtchn_bind_interdomain bind_interdomain;
882 int err;
883
884 bind_interdomain.remote_dom = remote_domain;
885 bind_interdomain.remote_port = remote_port;
886
887 err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
888 &bind_interdomain);
889
890 return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
891}
892
893static int find_virq(unsigned int virq, unsigned int cpu)
894{
895 struct evtchn_status status;
896 int port, rc = -ENOENT;
897
898 memset(&status, 0, sizeof(status));
899 for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
900 status.dom = DOMID_SELF;
901 status.port = port;
902 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
903 if (rc < 0)
904 continue;
905 if (status.status != EVTCHNSTAT_virq)
906 continue;
907 if (status.u.virq == virq && status.vcpu == cpu) {
908 rc = port;
909 break;
910 }
911 }
912 return rc;
913}
914
915int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
916{
917 struct evtchn_bind_virq bind_virq;
918 int evtchn, irq, ret;
919
920 mutex_lock(&irq_mapping_update_lock);
921
922 irq = per_cpu(virq_to_irq, cpu)[virq];
923
924 if (irq == -1) {
925 irq = xen_allocate_irq_dynamic();
926 if (irq == -1)
927 goto out;
928
929 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
930 handle_percpu_irq, "virq");
931
932 bind_virq.virq = virq;
933 bind_virq.vcpu = cpu;
934 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
935 &bind_virq);
936 if (ret == 0)
937 evtchn = bind_virq.port;
938 else {
939 if (ret == -EEXIST)
940 ret = find_virq(virq, cpu);
941 BUG_ON(ret < 0);
942 evtchn = ret;
943 }
944
945 xen_irq_info_virq_init(cpu, irq, evtchn, virq);
946
947 bind_evtchn_to_cpu(evtchn, cpu);
948 } else {
949 struct irq_info *info = info_for_irq(irq);
950 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
951 }
952
953out:
954 mutex_unlock(&irq_mapping_update_lock);
955
956 return irq;
957}
958
959static void unbind_from_irq(unsigned int irq)
960{
961 struct evtchn_close close;
962 int evtchn = evtchn_from_irq(irq);
963 struct irq_info *info = irq_get_handler_data(irq);
964
965 mutex_lock(&irq_mapping_update_lock);
966
967 if (info->refcnt > 0) {
968 info->refcnt--;
969 if (info->refcnt != 0)
970 goto done;
971 }
972
973 if (VALID_EVTCHN(evtchn)) {
974 close.port = evtchn;
975 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
976 BUG();
977
978 switch (type_from_irq(irq)) {
979 case IRQT_VIRQ:
980 per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
981 [virq_from_irq(irq)] = -1;
982 break;
983 case IRQT_IPI:
984 per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
985 [ipi_from_irq(irq)] = -1;
986 break;
987 default:
988 break;
989 }
990
991 /* Closed ports are implicitly re-bound to VCPU0. */
992 bind_evtchn_to_cpu(evtchn, 0);
993
994 evtchn_to_irq[evtchn] = -1;
995 }
996
997 BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
998
999 xen_free_irq(irq);
1000
1001 done:
1002 mutex_unlock(&irq_mapping_update_lock);
1003}
1004
1005int bind_evtchn_to_irqhandler(unsigned int evtchn,
1006 irq_handler_t handler,
1007 unsigned long irqflags,
1008 const char *devname, void *dev_id)
1009{
1010 int irq, retval;
1011
1012 irq = bind_evtchn_to_irq(evtchn);
1013 if (irq < 0)
1014 return irq;
1015 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1016 if (retval != 0) {
1017 unbind_from_irq(irq);
1018 return retval;
1019 }
1020
1021 return irq;
1022}
1023EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1024
1025int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
1026 unsigned int remote_port,
1027 irq_handler_t handler,
1028 unsigned long irqflags,
1029 const char *devname,
1030 void *dev_id)
1031{
1032 int irq, retval;
1033
1034 irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
1035 if (irq < 0)
1036 return irq;
1037
1038 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1039 if (retval != 0) {
1040 unbind_from_irq(irq);
1041 return retval;
1042 }
1043
1044 return irq;
1045}
1046EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
1047
1048int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1049 irq_handler_t handler,
1050 unsigned long irqflags, const char *devname, void *dev_id)
1051{
1052 int irq, retval;
1053
1054 irq = bind_virq_to_irq(virq, cpu);
1055 if (irq < 0)
1056 return irq;
1057 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1058 if (retval != 0) {
1059 unbind_from_irq(irq);
1060 return retval;
1061 }
1062
1063 return irq;
1064}
1065EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1066
1067int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1068 unsigned int cpu,
1069 irq_handler_t handler,
1070 unsigned long irqflags,
1071 const char *devname,
1072 void *dev_id)
1073{
1074 int irq, retval;
1075
1076 irq = bind_ipi_to_irq(ipi, cpu);
1077 if (irq < 0)
1078 return irq;
1079
1080 irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1081 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1082 if (retval != 0) {
1083 unbind_from_irq(irq);
1084 return retval;
1085 }
1086
1087 return irq;
1088}
1089
1090void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1091{
1092 free_irq(irq, dev_id);
1093 unbind_from_irq(irq);
1094}
1095EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1096
1097int evtchn_make_refcounted(unsigned int evtchn)
1098{
1099 int irq = evtchn_to_irq[evtchn];
1100 struct irq_info *info;
1101
1102 if (irq == -1)
1103 return -ENOENT;
1104
1105 info = irq_get_handler_data(irq);
1106
1107 if (!info)
1108 return -ENOENT;
1109
1110 WARN_ON(info->refcnt != -1);
1111
1112 info->refcnt = 1;
1113
1114 return 0;
1115}
1116EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1117
1118int evtchn_get(unsigned int evtchn)
1119{
1120 int irq;
1121 struct irq_info *info;
1122 int err = -ENOENT;
1123
1124 if (evtchn >= NR_EVENT_CHANNELS)
1125 return -EINVAL;
1126
1127 mutex_lock(&irq_mapping_update_lock);
1128
1129 irq = evtchn_to_irq[evtchn];
1130 if (irq == -1)
1131 goto done;
1132
1133 info = irq_get_handler_data(irq);
1134
1135 if (!info)
1136 goto done;
1137
1138 err = -EINVAL;
1139 if (info->refcnt <= 0)
1140 goto done;
1141
1142 info->refcnt++;
1143 err = 0;
1144 done:
1145 mutex_unlock(&irq_mapping_update_lock);
1146
1147 return err;
1148}
1149EXPORT_SYMBOL_GPL(evtchn_get);
1150
1151void evtchn_put(unsigned int evtchn)
1152{
1153 int irq = evtchn_to_irq[evtchn];
1154 if (WARN_ON(irq == -1))
1155 return;
1156 unbind_from_irq(irq);
1157}
1158EXPORT_SYMBOL_GPL(evtchn_put);
1159
1160void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1161{
1162 int irq = per_cpu(ipi_to_irq, cpu)[vector];
1163 BUG_ON(irq < 0);
1164 notify_remote_via_irq(irq);
1165}
1166
1167irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
1168{
1169 struct shared_info *sh = HYPERVISOR_shared_info;
1170 int cpu = smp_processor_id();
1171 unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
1172 int i;
1173 unsigned long flags;
1174 static DEFINE_SPINLOCK(debug_lock);
1175 struct vcpu_info *v;
1176
1177 spin_lock_irqsave(&debug_lock, flags);
1178
1179 printk("\nvcpu %d\n ", cpu);
1180
1181 for_each_online_cpu(i) {
1182 int pending;
1183 v = per_cpu(xen_vcpu, i);
1184 pending = (get_irq_regs() && i == cpu)
1185 ? xen_irqs_disabled(get_irq_regs())
1186 : v->evtchn_upcall_mask;
1187 printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i,
1188 pending, v->evtchn_upcall_pending,
1189 (int)(sizeof(v->evtchn_pending_sel)*2),
1190 v->evtchn_pending_sel);
1191 }
1192 v = per_cpu(xen_vcpu, cpu);
1193
1194 printk("\npending:\n ");
1195 for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
1196 printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2,
1197 sh->evtchn_pending[i],
1198 i % 8 == 0 ? "\n " : " ");
1199 printk("\nglobal mask:\n ");
1200 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1201 printk("%0*lx%s",
1202 (int)(sizeof(sh->evtchn_mask[0])*2),
1203 sh->evtchn_mask[i],
1204 i % 8 == 0 ? "\n " : " ");
1205
1206 printk("\nglobally unmasked:\n ");
1207 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
1208 printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
1209 sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
1210 i % 8 == 0 ? "\n " : " ");
1211
1212 printk("\nlocal cpu%d mask:\n ", cpu);
1213 for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--)
1214 printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
1215 cpu_evtchn[i],
1216 i % 8 == 0 ? "\n " : " ");
1217
1218 printk("\nlocally unmasked:\n ");
1219 for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
1220 unsigned long pending = sh->evtchn_pending[i]
1221 & ~sh->evtchn_mask[i]
1222 & cpu_evtchn[i];
1223 printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
1224 pending, i % 8 == 0 ? "\n " : " ");
1225 }
1226
1227 printk("\npending list:\n");
1228 for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1229 if (sync_test_bit(i, sh->evtchn_pending)) {
1230 int word_idx = i / BITS_PER_LONG;
1231 printk(" %d: event %d -> irq %d%s%s%s\n",
1232 cpu_from_evtchn(i), i,
1233 evtchn_to_irq[i],
1234 sync_test_bit(word_idx, &v->evtchn_pending_sel)
1235 ? "" : " l2-clear",
1236 !sync_test_bit(i, sh->evtchn_mask)
1237 ? "" : " globally-masked",
1238 sync_test_bit(i, cpu_evtchn)
1239 ? "" : " locally-masked");
1240 }
1241 }
1242
1243 spin_unlock_irqrestore(&debug_lock, flags);
1244
1245 return IRQ_HANDLED;
1246}
1247
1248static DEFINE_PER_CPU(unsigned, xed_nesting_count);
1249static DEFINE_PER_CPU(unsigned int, current_word_idx);
1250static DEFINE_PER_CPU(unsigned int, current_bit_idx);
1251
1252/*
1253 * Mask out the i least significant bits of w
1254 */
1255#define MASK_LSBS(w, i) (w & ((~0UL) << i))
1256
1257/*
1258 * Search the CPUs pending events bitmasks. For each one found, map
1259 * the event number to an irq, and feed it into do_IRQ() for
1260 * handling.
1261 *
1262 * Xen uses a two-level bitmap to speed searching. The first level is
1263 * a bitset of words which contain pending event bits. The second
1264 * level is a bitset of pending events themselves.
1265 */
1266static void __xen_evtchn_do_upcall(void)
1267{
1268 int start_word_idx, start_bit_idx;
1269 int word_idx, bit_idx;
1270 int i;
1271 int cpu = get_cpu();
1272 struct shared_info *s = HYPERVISOR_shared_info;
1273 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1274 unsigned count;
1275
1276 do {
1277 unsigned long pending_words;
1278
1279 vcpu_info->evtchn_upcall_pending = 0;
1280
1281 if (__this_cpu_inc_return(xed_nesting_count) - 1)
1282 goto out;
1283
1284#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
1285 /* Clear master flag /before/ clearing selector flag. */
1286 wmb();
1287#endif
1288 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
1289
1290 start_word_idx = __this_cpu_read(current_word_idx);
1291 start_bit_idx = __this_cpu_read(current_bit_idx);
1292
1293 word_idx = start_word_idx;
1294
1295 for (i = 0; pending_words != 0; i++) {
1296 unsigned long pending_bits;
1297 unsigned long words;
1298
1299 words = MASK_LSBS(pending_words, word_idx);
1300
1301 /*
1302 * If we masked out all events, wrap to beginning.
1303 */
1304 if (words == 0) {
1305 word_idx = 0;
1306 bit_idx = 0;
1307 continue;
1308 }
1309 word_idx = __ffs(words);
1310
1311 pending_bits = active_evtchns(cpu, s, word_idx);
1312 bit_idx = 0; /* usually scan entire word from start */
1313 if (word_idx == start_word_idx) {
1314 /* We scan the starting word in two parts */
1315 if (i == 0)
1316 /* 1st time: start in the middle */
1317 bit_idx = start_bit_idx;
1318 else
1319 /* 2nd time: mask bits done already */
1320 bit_idx &= (1UL << start_bit_idx) - 1;
1321 }
1322
1323 do {
1324 unsigned long bits;
1325 int port, irq;
1326 struct irq_desc *desc;
1327
1328 bits = MASK_LSBS(pending_bits, bit_idx);
1329
1330 /* If we masked out all events, move on. */
1331 if (bits == 0)
1332 break;
1333
1334 bit_idx = __ffs(bits);
1335
1336 /* Process port. */
1337 port = (word_idx * BITS_PER_LONG) + bit_idx;
1338 irq = evtchn_to_irq[port];
1339
1340 if (irq != -1) {
1341 desc = irq_to_desc(irq);
1342 if (desc)
1343 generic_handle_irq_desc(irq, desc);
1344 }
1345
1346 bit_idx = (bit_idx + 1) % BITS_PER_LONG;
1347
1348 /* Next caller starts at last processed + 1 */
1349 __this_cpu_write(current_word_idx,
1350 bit_idx ? word_idx :
1351 (word_idx+1) % BITS_PER_LONG);
1352 __this_cpu_write(current_bit_idx, bit_idx);
1353 } while (bit_idx != 0);
1354
1355 /* Scan start_l1i twice; all others once. */
1356 if ((word_idx != start_word_idx) || (i != 0))
1357 pending_words &= ~(1UL << word_idx);
1358
1359 word_idx = (word_idx + 1) % BITS_PER_LONG;
1360 }
1361
1362 BUG_ON(!irqs_disabled());
1363
1364 count = __this_cpu_read(xed_nesting_count);
1365 __this_cpu_write(xed_nesting_count, 0);
1366 } while (count != 1 || vcpu_info->evtchn_upcall_pending);
1367
1368out:
1369
1370 put_cpu();
1371}
1372
1373void xen_evtchn_do_upcall(struct pt_regs *regs)
1374{
1375 struct pt_regs *old_regs = set_irq_regs(regs);
1376
1377 exit_idle();
1378 irq_enter();
1379
1380 __xen_evtchn_do_upcall();
1381
1382 irq_exit();
1383 set_irq_regs(old_regs);
1384}
1385
1386void xen_hvm_evtchn_do_upcall(void)
1387{
1388 __xen_evtchn_do_upcall();
1389}
1390EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1391
1392/* Rebind a new event channel to an existing irq. */
1393void rebind_evtchn_irq(int evtchn, int irq)
1394{
1395 struct irq_info *info = info_for_irq(irq);
1396
1397 /* Make sure the irq is masked, since the new event channel
1398 will also be masked. */
1399 disable_irq(irq);
1400
1401 mutex_lock(&irq_mapping_update_lock);
1402
1403 /* After resume the irq<->evtchn mappings are all cleared out */
1404 BUG_ON(evtchn_to_irq[evtchn] != -1);
1405 /* Expect irq to have been bound before,
1406 so there should be a proper type */
1407 BUG_ON(info->type == IRQT_UNBOUND);
1408
1409 xen_irq_info_evtchn_init(irq, evtchn);
1410
1411 mutex_unlock(&irq_mapping_update_lock);
1412
1413 /* new event channels are always bound to cpu 0 */
1414 irq_set_affinity(irq, cpumask_of(0));
1415
1416 /* Unmask the event channel. */
1417 enable_irq(irq);
1418}
1419
1420/* Rebind an evtchn so that it gets delivered to a specific cpu */
1421static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1422{
1423 struct evtchn_bind_vcpu bind_vcpu;
1424 int evtchn = evtchn_from_irq(irq);
1425
1426 if (!VALID_EVTCHN(evtchn))
1427 return -1;
1428
1429 /*
1430 * Events delivered via platform PCI interrupts are always
1431 * routed to vcpu 0 and hence cannot be rebound.
1432 */
1433 if (xen_hvm_domain() && !xen_have_vector_callback)
1434 return -1;
1435
1436 /* Send future instances of this interrupt to other vcpu. */
1437 bind_vcpu.port = evtchn;
1438 bind_vcpu.vcpu = tcpu;
1439
1440 /*
1441 * If this fails, it usually just indicates that we're dealing with a
1442 * virq or IPI channel, which don't actually need to be rebound. Ignore
1443 * it, but don't do the xenlinux-level rebind in that case.
1444 */
1445 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1446 bind_evtchn_to_cpu(evtchn, tcpu);
1447
1448 return 0;
1449}
1450
1451static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1452 bool force)
1453{
1454 unsigned tcpu = cpumask_first(dest);
1455
1456 return rebind_irq_to_cpu(data->irq, tcpu);
1457}
1458
1459int resend_irq_on_evtchn(unsigned int irq)
1460{
1461 int masked, evtchn = evtchn_from_irq(irq);
1462 struct shared_info *s = HYPERVISOR_shared_info;
1463
1464 if (!VALID_EVTCHN(evtchn))
1465 return 1;
1466
1467 masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
1468 sync_set_bit(evtchn, s->evtchn_pending);
1469 if (!masked)
1470 unmask_evtchn(evtchn);
1471
1472 return 1;
1473}
1474
1475static void enable_dynirq(struct irq_data *data)
1476{
1477 int evtchn = evtchn_from_irq(data->irq);
1478
1479 if (VALID_EVTCHN(evtchn))
1480 unmask_evtchn(evtchn);
1481}
1482
1483static void disable_dynirq(struct irq_data *data)
1484{
1485 int evtchn = evtchn_from_irq(data->irq);
1486
1487 if (VALID_EVTCHN(evtchn))
1488 mask_evtchn(evtchn);
1489}
1490
1491static void ack_dynirq(struct irq_data *data)
1492{
1493 int evtchn = evtchn_from_irq(data->irq);
1494
1495 irq_move_irq(data);
1496
1497 if (VALID_EVTCHN(evtchn))
1498 clear_evtchn(evtchn);
1499}
1500
1501static void mask_ack_dynirq(struct irq_data *data)
1502{
1503 disable_dynirq(data);
1504 ack_dynirq(data);
1505}
1506
1507static int retrigger_dynirq(struct irq_data *data)
1508{
1509 int evtchn = evtchn_from_irq(data->irq);
1510 struct shared_info *sh = HYPERVISOR_shared_info;
1511 int ret = 0;
1512
1513 if (VALID_EVTCHN(evtchn)) {
1514 int masked;
1515
1516 masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
1517 sync_set_bit(evtchn, sh->evtchn_pending);
1518 if (!masked)
1519 unmask_evtchn(evtchn);
1520 ret = 1;
1521 }
1522
1523 return ret;
1524}
1525
1526static void restore_pirqs(void)
1527{
1528 int pirq, rc, irq, gsi;
1529 struct physdev_map_pirq map_irq;
1530 struct irq_info *info;
1531
1532 list_for_each_entry(info, &xen_irq_list_head, list) {
1533 if (info->type != IRQT_PIRQ)
1534 continue;
1535
1536 pirq = info->u.pirq.pirq;
1537 gsi = info->u.pirq.gsi;
1538 irq = info->irq;
1539
1540 /* save/restore of PT devices doesn't work, so at this point the
1541 * only devices present are GSI based emulated devices */
1542 if (!gsi)
1543 continue;
1544
1545 map_irq.domid = DOMID_SELF;
1546 map_irq.type = MAP_PIRQ_TYPE_GSI;
1547 map_irq.index = gsi;
1548 map_irq.pirq = pirq;
1549
1550 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1551 if (rc) {
1552 printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1553 gsi, irq, pirq, rc);
1554 xen_free_irq(irq);
1555 continue;
1556 }
1557
1558 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1559
1560 __startup_pirq(irq);
1561 }
1562}
1563
1564static void restore_cpu_virqs(unsigned int cpu)
1565{
1566 struct evtchn_bind_virq bind_virq;
1567 int virq, irq, evtchn;
1568
1569 for (virq = 0; virq < NR_VIRQS; virq++) {
1570 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1571 continue;
1572
1573 BUG_ON(virq_from_irq(irq) != virq);
1574
1575 /* Get a new binding from Xen. */
1576 bind_virq.virq = virq;
1577 bind_virq.vcpu = cpu;
1578 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1579 &bind_virq) != 0)
1580 BUG();
1581 evtchn = bind_virq.port;
1582
1583 /* Record the new mapping. */
1584 xen_irq_info_virq_init(cpu, irq, evtchn, virq);
1585 bind_evtchn_to_cpu(evtchn, cpu);
1586 }
1587}
1588
1589static void restore_cpu_ipis(unsigned int cpu)
1590{
1591 struct evtchn_bind_ipi bind_ipi;
1592 int ipi, irq, evtchn;
1593
1594 for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
1595 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
1596 continue;
1597
1598 BUG_ON(ipi_from_irq(irq) != ipi);
1599
1600 /* Get a new binding from Xen. */
1601 bind_ipi.vcpu = cpu;
1602 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1603 &bind_ipi) != 0)
1604 BUG();
1605 evtchn = bind_ipi.port;
1606
1607 /* Record the new mapping. */
1608 xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
1609 bind_evtchn_to_cpu(evtchn, cpu);
1610 }
1611}
1612
1613/* Clear an irq's pending state, in preparation for polling on it */
1614void xen_clear_irq_pending(int irq)
1615{
1616 int evtchn = evtchn_from_irq(irq);
1617
1618 if (VALID_EVTCHN(evtchn))
1619 clear_evtchn(evtchn);
1620}
1621EXPORT_SYMBOL(xen_clear_irq_pending);
1622void xen_set_irq_pending(int irq)
1623{
1624 int evtchn = evtchn_from_irq(irq);
1625
1626 if (VALID_EVTCHN(evtchn))
1627 set_evtchn(evtchn);
1628}
1629
1630bool xen_test_irq_pending(int irq)
1631{
1632 int evtchn = evtchn_from_irq(irq);
1633 bool ret = false;
1634
1635 if (VALID_EVTCHN(evtchn))
1636 ret = test_evtchn(evtchn);
1637
1638 return ret;
1639}
1640
1641/* Poll waiting for an irq to become pending with timeout. In the usual case,
1642 * the irq will be disabled so it won't deliver an interrupt. */
1643void xen_poll_irq_timeout(int irq, u64 timeout)
1644{
1645 evtchn_port_t evtchn = evtchn_from_irq(irq);
1646
1647 if (VALID_EVTCHN(evtchn)) {
1648 struct sched_poll poll;
1649
1650 poll.nr_ports = 1;
1651 poll.timeout = timeout;
1652 set_xen_guest_handle(poll.ports, &evtchn);
1653
1654 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
1655 BUG();
1656 }
1657}
1658EXPORT_SYMBOL(xen_poll_irq_timeout);
1659/* Poll waiting for an irq to become pending. In the usual case, the
1660 * irq will be disabled so it won't deliver an interrupt. */
1661void xen_poll_irq(int irq)
1662{
1663 xen_poll_irq_timeout(irq, 0 /* no timeout */);
1664}
1665
1666/* Check whether the IRQ line is shared with other guests. */
1667int xen_test_irq_shared(int irq)
1668{
1669 struct irq_info *info = info_for_irq(irq);
1670 struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
1671
1672 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
1673 return 0;
1674 return !(irq_status.flags & XENIRQSTAT_shared);
1675}
1676EXPORT_SYMBOL_GPL(xen_test_irq_shared);
1677
1678void xen_irq_resume(void)
1679{
1680 unsigned int cpu, evtchn;
1681 struct irq_info *info;
1682
1683 init_evtchn_cpu_bindings();
1684
1685 /* New event-channel space is not 'live' yet. */
1686 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
1687 mask_evtchn(evtchn);
1688
1689 /* No IRQ <-> event-channel mappings. */
1690 list_for_each_entry(info, &xen_irq_list_head, list)
1691 info->evtchn = 0; /* zap event-channel binding */
1692
1693 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
1694 evtchn_to_irq[evtchn] = -1;
1695
1696 for_each_possible_cpu(cpu) {
1697 restore_cpu_virqs(cpu);
1698 restore_cpu_ipis(cpu);
1699 }
1700
1701 restore_pirqs();
1702}
1703
1704static struct irq_chip xen_dynamic_chip __read_mostly = {
1705 .name = "xen-dyn",
1706
1707 .irq_disable = disable_dynirq,
1708 .irq_mask = disable_dynirq,
1709 .irq_unmask = enable_dynirq,
1710
1711 .irq_ack = ack_dynirq,
1712 .irq_mask_ack = mask_ack_dynirq,
1713
1714 .irq_set_affinity = set_affinity_irq,
1715 .irq_retrigger = retrigger_dynirq,
1716};
1717
1718static struct irq_chip xen_pirq_chip __read_mostly = {
1719 .name = "xen-pirq",
1720
1721 .irq_startup = startup_pirq,
1722 .irq_shutdown = shutdown_pirq,
1723 .irq_enable = enable_pirq,
1724 .irq_disable = disable_pirq,
1725
1726 .irq_mask = disable_dynirq,
1727 .irq_unmask = enable_dynirq,
1728
1729 .irq_ack = eoi_pirq,
1730 .irq_eoi = eoi_pirq,
1731 .irq_mask_ack = mask_ack_pirq,
1732
1733 .irq_set_affinity = set_affinity_irq,
1734
1735 .irq_retrigger = retrigger_dynirq,
1736};
1737
1738static struct irq_chip xen_percpu_chip __read_mostly = {
1739 .name = "xen-percpu",
1740
1741 .irq_disable = disable_dynirq,
1742 .irq_mask = disable_dynirq,
1743 .irq_unmask = enable_dynirq,
1744
1745 .irq_ack = ack_dynirq,
1746};
1747
1748int xen_set_callback_via(uint64_t via)
1749{
1750 struct xen_hvm_param a;
1751 a.domid = DOMID_SELF;
1752 a.index = HVM_PARAM_CALLBACK_IRQ;
1753 a.value = via;
1754 return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
1755}
1756EXPORT_SYMBOL_GPL(xen_set_callback_via);
1757
1758#ifdef CONFIG_XEN_PVHVM
1759/* Vector callbacks are better than PCI interrupts to receive event
1760 * channel notifications because we can receive vector callbacks on any
1761 * vcpu and we don't need PCI support or APIC interactions. */
1762void xen_callback_vector(void)
1763{
1764 int rc;
1765 uint64_t callback_via;
1766 if (xen_have_vector_callback) {
1767 callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
1768 rc = xen_set_callback_via(callback_via);
1769 if (rc) {
1770 printk(KERN_ERR "Request for Xen HVM callback vector"
1771 " failed.\n");
1772 xen_have_vector_callback = 0;
1773 return;
1774 }
1775 printk(KERN_INFO "Xen HVM callback vector for event delivery is "
1776 "enabled\n");
1777 /* in the restore case the vector has already been allocated */
1778 if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors))
1779 alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
1780 }
1781}
1782#else
1783void xen_callback_vector(void) {}
1784#endif
1785
1786void __init xen_init_IRQ(void)
1787{
1788 int i, rc;
1789
1790 evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
1791 GFP_KERNEL);
1792 BUG_ON(!evtchn_to_irq);
1793 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1794 evtchn_to_irq[i] = -1;
1795
1796 init_evtchn_cpu_bindings();
1797
1798 /* No event channels are 'live' right now. */
1799 for (i = 0; i < NR_EVENT_CHANNELS; i++)
1800 mask_evtchn(i);
1801
1802 pirq_needs_eoi = pirq_needs_eoi_flag;
1803
1804 if (xen_hvm_domain()) {
1805 xen_callback_vector();
1806 native_init_IRQ();
1807 /* pci_xen_hvm_init must be called after native_init_IRQ so that
1808 * __acpi_register_gsi can point at the right function */
1809 pci_xen_hvm_init();
1810 } else {
1811 struct physdev_pirq_eoi_gmfn eoi_gmfn;
1812
1813 irq_ctx_init(smp_processor_id());
1814 if (xen_initial_domain())
1815 pci_xen_initial_domain();
1816
1817 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
1818 eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
1819 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
1820 if (rc != 0) {
1821 free_page((unsigned long) pirq_eoi_map);
1822 pirq_eoi_map = NULL;
1823 } else
1824 pirq_needs_eoi = pirq_check_eoi_map;
1825 }
1826}