]> Git Repo - J-linux.git/blob - drivers/xen/events/events_base.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[J-linux.git] / drivers / xen / events / events_base.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xen event channels
4  *
5  * Xen models interrupts with abstract event channels.  Because each
6  * domain gets 1024 event channels, but NR_IRQ is not that large, we
7  * must dynamically map irqs<->event channels.  The event channels
8  * interface with the rest of the kernel by defining a xen interrupt
9  * chip.  When an event is received, it is mapped to an irq and sent
10  * through the normal interrupt processing path.
11  *
12  * There are four kinds of events which can be mapped to an event
13  * channel:
14  *
15  * 1. Inter-domain notifications.  This includes all the virtual
16  *    device events, since they're driven by front-ends in another domain
17  *    (typically dom0).
18  * 2. VIRQs, typically used for timers.  These are per-cpu events.
19  * 3. IPIs.
20  * 4. PIRQs - Hardware interrupts.
21  *
22  * Jeremy Fitzhardinge <[email protected]>, XenSource Inc, 2007
23  */
24
25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26
27 #include <linux/linkage.h>
28 #include <linux/interrupt.h>
29 #include <linux/irq.h>
30 #include <linux/moduleparam.h>
31 #include <linux/string.h>
32 #include <linux/memblock.h>
33 #include <linux/slab.h>
34 #include <linux/irqnr.h>
35 #include <linux/pci.h>
36 #include <linux/spinlock.h>
37 #include <linux/cpuhotplug.h>
38 #include <linux/atomic.h>
39 #include <linux/ktime.h>
40
41 #ifdef CONFIG_X86
42 #include <asm/desc.h>
43 #include <asm/ptrace.h>
44 #include <asm/idtentry.h>
45 #include <asm/irq.h>
46 #include <asm/io_apic.h>
47 #include <asm/i8259.h>
48 #include <asm/xen/cpuid.h>
49 #include <asm/xen/pci.h>
50 #endif
51 #include <asm/sync_bitops.h>
52 #include <asm/xen/hypercall.h>
53 #include <asm/xen/hypervisor.h>
54 #include <xen/page.h>
55
56 #include <xen/xen.h>
57 #include <xen/hvm.h>
58 #include <xen/xen-ops.h>
59 #include <xen/events.h>
60 #include <xen/interface/xen.h>
61 #include <xen/interface/event_channel.h>
62 #include <xen/interface/hvm/hvm_op.h>
63 #include <xen/interface/hvm/params.h>
64 #include <xen/interface/physdev.h>
65 #include <xen/interface/sched.h>
66 #include <xen/interface/vcpu.h>
67 #include <xen/xenbus.h>
68 #include <asm/hw_irq.h>
69
70 #include "events_internal.h"
71
72 #undef MODULE_PARAM_PREFIX
73 #define MODULE_PARAM_PREFIX "xen."
74
75 /* Interrupt types. */
76 enum xen_irq_type {
77         IRQT_UNBOUND = 0,
78         IRQT_PIRQ,
79         IRQT_VIRQ,
80         IRQT_IPI,
81         IRQT_EVTCHN
82 };
83
84 /*
85  * Packed IRQ information:
86  * type - enum xen_irq_type
87  * event channel - irq->event channel mapping
88  * cpu - cpu this event channel is bound to
89  * index - type-specific information:
90  *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
91  *           guest, or GSI (real passthrough IRQ) of the device.
92  *    VIRQ - virq number
93  *    IPI - IPI vector
94  *    EVTCHN -
95  */
96 struct irq_info {
97         struct list_head list;
98         struct list_head eoi_list;
99         short refcnt;
100         u8 spurious_cnt;
101         u8 is_accounted;
102         short type;             /* type: IRQT_* */
103         u8 mask_reason;         /* Why is event channel masked */
104 #define EVT_MASK_REASON_EXPLICIT        0x01
105 #define EVT_MASK_REASON_TEMPORARY       0x02
106 #define EVT_MASK_REASON_EOI_PENDING     0x04
107         u8 is_active;           /* Is event just being handled? */
108         unsigned irq;
109         evtchn_port_t evtchn;   /* event channel */
110         unsigned short cpu;     /* cpu bound */
111         unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
112         unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
113         u64 eoi_time;           /* Time in jiffies when to EOI. */
114         raw_spinlock_t lock;
115
116         union {
117                 unsigned short virq;
118                 enum ipi_vector ipi;
119                 struct {
120                         unsigned short pirq;
121                         unsigned short gsi;
122                         unsigned char vector;
123                         unsigned char flags;
124                         uint16_t domid;
125                 } pirq;
126                 struct xenbus_device *interdomain;
127         } u;
128 };
129
130 #define PIRQ_NEEDS_EOI  (1 << 0)
131 #define PIRQ_SHAREABLE  (1 << 1)
132 #define PIRQ_MSI_GROUP  (1 << 2)
133
134 static uint __read_mostly event_loop_timeout = 2;
135 module_param(event_loop_timeout, uint, 0644);
136
137 static uint __read_mostly event_eoi_delay = 10;
138 module_param(event_eoi_delay, uint, 0644);
139
140 const struct evtchn_ops *evtchn_ops;
141
142 /*
143  * This lock protects updates to the following mapping and reference-count
144  * arrays. The lock does not need to be acquired to read the mapping tables.
145  */
146 static DEFINE_MUTEX(irq_mapping_update_lock);
147
148 /*
149  * Lock protecting event handling loop against removing event channels.
150  * Adding of event channels is no issue as the associated IRQ becomes active
151  * only after everything is setup (before request_[threaded_]irq() the handler
152  * can't be entered for an event, as the event channel will be unmasked only
153  * then).
154  */
155 static DEFINE_RWLOCK(evtchn_rwlock);
156
157 /*
158  * Lock hierarchy:
159  *
160  * irq_mapping_update_lock
161  *   evtchn_rwlock
162  *     IRQ-desc lock
163  *       percpu eoi_list_lock
164  *         irq_info->lock
165  */
166
167 static LIST_HEAD(xen_irq_list_head);
168
169 /* IRQ <-> VIRQ mapping. */
170 static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
171
172 /* IRQ <-> IPI mapping */
173 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
174
175 /* Event channel distribution data */
176 static atomic_t channels_on_cpu[NR_CPUS];
177
178 static int **evtchn_to_irq;
179 #ifdef CONFIG_X86
180 static unsigned long *pirq_eoi_map;
181 #endif
182 static bool (*pirq_needs_eoi)(unsigned irq);
183
184 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
185 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
186 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
187
188 /* Xen will never allocate port zero for any purpose. */
189 #define VALID_EVTCHN(chn)       ((chn) != 0)
190
191 static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
192
193 static struct irq_chip xen_dynamic_chip;
194 static struct irq_chip xen_lateeoi_chip;
195 static struct irq_chip xen_percpu_chip;
196 static struct irq_chip xen_pirq_chip;
197 static void enable_dynirq(struct irq_data *data);
198 static void disable_dynirq(struct irq_data *data);
199
200 static DEFINE_PER_CPU(unsigned int, irq_epoch);
201
202 static void clear_evtchn_to_irq_row(int *evtchn_row)
203 {
204         unsigned col;
205
206         for (col = 0; col < EVTCHN_PER_ROW; col++)
207                 WRITE_ONCE(evtchn_row[col], -1);
208 }
209
210 static void clear_evtchn_to_irq_all(void)
211 {
212         unsigned row;
213
214         for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
215                 if (evtchn_to_irq[row] == NULL)
216                         continue;
217                 clear_evtchn_to_irq_row(evtchn_to_irq[row]);
218         }
219 }
220
221 static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
222 {
223         unsigned row;
224         unsigned col;
225         int *evtchn_row;
226
227         if (evtchn >= xen_evtchn_max_channels())
228                 return -EINVAL;
229
230         row = EVTCHN_ROW(evtchn);
231         col = EVTCHN_COL(evtchn);
232
233         if (evtchn_to_irq[row] == NULL) {
234                 /* Unallocated irq entries return -1 anyway */
235                 if (irq == -1)
236                         return 0;
237
238                 evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
239                 if (evtchn_row == NULL)
240                         return -ENOMEM;
241
242                 clear_evtchn_to_irq_row(evtchn_row);
243
244                 /*
245                  * We've prepared an empty row for the mapping. If a different
246                  * thread was faster inserting it, we can drop ours.
247                  */
248                 if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
249                         free_page((unsigned long) evtchn_row);
250         }
251
252         WRITE_ONCE(evtchn_to_irq[row][col], irq);
253         return 0;
254 }
255
256 int get_evtchn_to_irq(evtchn_port_t evtchn)
257 {
258         if (evtchn >= xen_evtchn_max_channels())
259                 return -1;
260         if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
261                 return -1;
262         return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
263 }
264
265 /* Get info for IRQ */
266 static struct irq_info *info_for_irq(unsigned irq)
267 {
268         if (irq < nr_legacy_irqs())
269                 return legacy_info_ptrs[irq];
270         else
271                 return irq_get_chip_data(irq);
272 }
273
274 static void set_info_for_irq(unsigned int irq, struct irq_info *info)
275 {
276         if (irq < nr_legacy_irqs())
277                 legacy_info_ptrs[irq] = info;
278         else
279                 irq_set_chip_data(irq, info);
280 }
281
282 /* Per CPU channel accounting */
283 static void channels_on_cpu_dec(struct irq_info *info)
284 {
285         if (!info->is_accounted)
286                 return;
287
288         info->is_accounted = 0;
289
290         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
291                 return;
292
293         WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
294 }
295
296 static void channels_on_cpu_inc(struct irq_info *info)
297 {
298         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
299                 return;
300
301         if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
302                                             INT_MAX)))
303                 return;
304
305         info->is_accounted = 1;
306 }
307
308 /* Constructors for packed IRQ information. */
309 static int xen_irq_info_common_setup(struct irq_info *info,
310                                      unsigned irq,
311                                      enum xen_irq_type type,
312                                      evtchn_port_t evtchn,
313                                      unsigned short cpu)
314 {
315         int ret;
316
317         BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
318
319         info->type = type;
320         info->irq = irq;
321         info->evtchn = evtchn;
322         info->cpu = cpu;
323         info->mask_reason = EVT_MASK_REASON_EXPLICIT;
324         raw_spin_lock_init(&info->lock);
325
326         ret = set_evtchn_to_irq(evtchn, irq);
327         if (ret < 0)
328                 return ret;
329
330         irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
331
332         return xen_evtchn_port_setup(evtchn);
333 }
334
335 static int xen_irq_info_evtchn_setup(unsigned irq,
336                                      evtchn_port_t evtchn,
337                                      struct xenbus_device *dev)
338 {
339         struct irq_info *info = info_for_irq(irq);
340         int ret;
341
342         ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
343         info->u.interdomain = dev;
344         if (dev)
345                 atomic_inc(&dev->event_channels);
346
347         return ret;
348 }
349
350 static int xen_irq_info_ipi_setup(unsigned cpu,
351                                   unsigned irq,
352                                   evtchn_port_t evtchn,
353                                   enum ipi_vector ipi)
354 {
355         struct irq_info *info = info_for_irq(irq);
356
357         info->u.ipi = ipi;
358
359         per_cpu(ipi_to_irq, cpu)[ipi] = irq;
360
361         return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
362 }
363
364 static int xen_irq_info_virq_setup(unsigned cpu,
365                                    unsigned irq,
366                                    evtchn_port_t evtchn,
367                                    unsigned virq)
368 {
369         struct irq_info *info = info_for_irq(irq);
370
371         info->u.virq = virq;
372
373         per_cpu(virq_to_irq, cpu)[virq] = irq;
374
375         return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
376 }
377
378 static int xen_irq_info_pirq_setup(unsigned irq,
379                                    evtchn_port_t evtchn,
380                                    unsigned pirq,
381                                    unsigned gsi,
382                                    uint16_t domid,
383                                    unsigned char flags)
384 {
385         struct irq_info *info = info_for_irq(irq);
386
387         info->u.pirq.pirq = pirq;
388         info->u.pirq.gsi = gsi;
389         info->u.pirq.domid = domid;
390         info->u.pirq.flags = flags;
391
392         return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
393 }
394
395 static void xen_irq_info_cleanup(struct irq_info *info)
396 {
397         set_evtchn_to_irq(info->evtchn, -1);
398         xen_evtchn_port_remove(info->evtchn, info->cpu);
399         info->evtchn = 0;
400         channels_on_cpu_dec(info);
401 }
402
403 /*
404  * Accessors for packed IRQ information.
405  */
406 evtchn_port_t evtchn_from_irq(unsigned irq)
407 {
408         const struct irq_info *info = NULL;
409
410         if (likely(irq < nr_irqs))
411                 info = info_for_irq(irq);
412         if (!info)
413                 return 0;
414
415         return info->evtchn;
416 }
417
418 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
419 {
420         return get_evtchn_to_irq(evtchn);
421 }
422 EXPORT_SYMBOL_GPL(irq_from_evtchn);
423
424 int irq_from_virq(unsigned int cpu, unsigned int virq)
425 {
426         return per_cpu(virq_to_irq, cpu)[virq];
427 }
428
429 static enum ipi_vector ipi_from_irq(unsigned irq)
430 {
431         struct irq_info *info = info_for_irq(irq);
432
433         BUG_ON(info == NULL);
434         BUG_ON(info->type != IRQT_IPI);
435
436         return info->u.ipi;
437 }
438
439 static unsigned virq_from_irq(unsigned irq)
440 {
441         struct irq_info *info = info_for_irq(irq);
442
443         BUG_ON(info == NULL);
444         BUG_ON(info->type != IRQT_VIRQ);
445
446         return info->u.virq;
447 }
448
449 static unsigned pirq_from_irq(unsigned irq)
450 {
451         struct irq_info *info = info_for_irq(irq);
452
453         BUG_ON(info == NULL);
454         BUG_ON(info->type != IRQT_PIRQ);
455
456         return info->u.pirq.pirq;
457 }
458
459 static enum xen_irq_type type_from_irq(unsigned irq)
460 {
461         return info_for_irq(irq)->type;
462 }
463
464 static unsigned cpu_from_irq(unsigned irq)
465 {
466         return info_for_irq(irq)->cpu;
467 }
468
469 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
470 {
471         int irq = get_evtchn_to_irq(evtchn);
472         unsigned ret = 0;
473
474         if (irq != -1)
475                 ret = cpu_from_irq(irq);
476
477         return ret;
478 }
479
480 static void do_mask(struct irq_info *info, u8 reason)
481 {
482         unsigned long flags;
483
484         raw_spin_lock_irqsave(&info->lock, flags);
485
486         if (!info->mask_reason)
487                 mask_evtchn(info->evtchn);
488
489         info->mask_reason |= reason;
490
491         raw_spin_unlock_irqrestore(&info->lock, flags);
492 }
493
494 static void do_unmask(struct irq_info *info, u8 reason)
495 {
496         unsigned long flags;
497
498         raw_spin_lock_irqsave(&info->lock, flags);
499
500         info->mask_reason &= ~reason;
501
502         if (!info->mask_reason)
503                 unmask_evtchn(info->evtchn);
504
505         raw_spin_unlock_irqrestore(&info->lock, flags);
506 }
507
508 #ifdef CONFIG_X86
509 static bool pirq_check_eoi_map(unsigned irq)
510 {
511         return test_bit(pirq_from_irq(irq), pirq_eoi_map);
512 }
513 #endif
514
515 static bool pirq_needs_eoi_flag(unsigned irq)
516 {
517         struct irq_info *info = info_for_irq(irq);
518         BUG_ON(info->type != IRQT_PIRQ);
519
520         return info->u.pirq.flags & PIRQ_NEEDS_EOI;
521 }
522
523 static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
524                                bool force_affinity)
525 {
526         int irq = get_evtchn_to_irq(evtchn);
527         struct irq_info *info = info_for_irq(irq);
528
529         BUG_ON(irq == -1);
530
531         if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
532                 struct irq_data *data = irq_get_irq_data(irq);
533
534                 irq_data_update_affinity(data, cpumask_of(cpu));
535                 irq_data_update_effective_affinity(data, cpumask_of(cpu));
536         }
537
538         xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
539
540         channels_on_cpu_dec(info);
541         info->cpu = cpu;
542         channels_on_cpu_inc(info);
543 }
544
545 /**
546  * notify_remote_via_irq - send event to remote end of event channel via irq
547  * @irq: irq of event channel to send event to
548  *
549  * Unlike notify_remote_via_evtchn(), this is safe to use across
550  * save/restore. Notifications on a broken connection are silently
551  * dropped.
552  */
553 void notify_remote_via_irq(int irq)
554 {
555         evtchn_port_t evtchn = evtchn_from_irq(irq);
556
557         if (VALID_EVTCHN(evtchn))
558                 notify_remote_via_evtchn(evtchn);
559 }
560 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
561
562 struct lateeoi_work {
563         struct delayed_work delayed;
564         spinlock_t eoi_list_lock;
565         struct list_head eoi_list;
566 };
567
568 static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
569
570 static void lateeoi_list_del(struct irq_info *info)
571 {
572         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
573         unsigned long flags;
574
575         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
576         list_del_init(&info->eoi_list);
577         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
578 }
579
580 static void lateeoi_list_add(struct irq_info *info)
581 {
582         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
583         struct irq_info *elem;
584         u64 now = get_jiffies_64();
585         unsigned long delay;
586         unsigned long flags;
587
588         if (now < info->eoi_time)
589                 delay = info->eoi_time - now;
590         else
591                 delay = 1;
592
593         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
594
595         if (list_empty(&eoi->eoi_list)) {
596                 list_add(&info->eoi_list, &eoi->eoi_list);
597                 mod_delayed_work_on(info->eoi_cpu, system_wq,
598                                     &eoi->delayed, delay);
599         } else {
600                 list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
601                         if (elem->eoi_time <= info->eoi_time)
602                                 break;
603                 }
604                 list_add(&info->eoi_list, &elem->eoi_list);
605         }
606
607         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
608 }
609
610 static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
611 {
612         evtchn_port_t evtchn;
613         unsigned int cpu;
614         unsigned int delay = 0;
615
616         evtchn = info->evtchn;
617         if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
618                 return;
619
620         if (spurious) {
621                 struct xenbus_device *dev = info->u.interdomain;
622                 unsigned int threshold = 1;
623
624                 if (dev && dev->spurious_threshold)
625                         threshold = dev->spurious_threshold;
626
627                 if ((1 << info->spurious_cnt) < (HZ << 2)) {
628                         if (info->spurious_cnt != 0xFF)
629                                 info->spurious_cnt++;
630                 }
631                 if (info->spurious_cnt > threshold) {
632                         delay = 1 << (info->spurious_cnt - 1 - threshold);
633                         if (delay > HZ)
634                                 delay = HZ;
635                         if (!info->eoi_time)
636                                 info->eoi_cpu = smp_processor_id();
637                         info->eoi_time = get_jiffies_64() + delay;
638                         if (dev)
639                                 atomic_add(delay, &dev->jiffies_eoi_delayed);
640                 }
641                 if (dev)
642                         atomic_inc(&dev->spurious_events);
643         } else {
644                 info->spurious_cnt = 0;
645         }
646
647         cpu = info->eoi_cpu;
648         if (info->eoi_time &&
649             (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
650                 lateeoi_list_add(info);
651                 return;
652         }
653
654         info->eoi_time = 0;
655
656         /* is_active hasn't been reset yet, do it now. */
657         smp_store_release(&info->is_active, 0);
658         do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
659 }
660
661 static void xen_irq_lateeoi_worker(struct work_struct *work)
662 {
663         struct lateeoi_work *eoi;
664         struct irq_info *info;
665         u64 now = get_jiffies_64();
666         unsigned long flags;
667
668         eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
669
670         read_lock_irqsave(&evtchn_rwlock, flags);
671
672         while (true) {
673                 spin_lock(&eoi->eoi_list_lock);
674
675                 info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
676                                                 eoi_list);
677
678                 if (info == NULL || now < info->eoi_time) {
679                         spin_unlock(&eoi->eoi_list_lock);
680                         break;
681                 }
682
683                 list_del_init(&info->eoi_list);
684
685                 spin_unlock(&eoi->eoi_list_lock);
686
687                 info->eoi_time = 0;
688
689                 xen_irq_lateeoi_locked(info, false);
690         }
691
692         if (info)
693                 mod_delayed_work_on(info->eoi_cpu, system_wq,
694                                     &eoi->delayed, info->eoi_time - now);
695
696         read_unlock_irqrestore(&evtchn_rwlock, flags);
697 }
698
699 static void xen_cpu_init_eoi(unsigned int cpu)
700 {
701         struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
702
703         INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
704         spin_lock_init(&eoi->eoi_list_lock);
705         INIT_LIST_HEAD(&eoi->eoi_list);
706 }
707
708 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
709 {
710         struct irq_info *info;
711         unsigned long flags;
712
713         read_lock_irqsave(&evtchn_rwlock, flags);
714
715         info = info_for_irq(irq);
716
717         if (info)
718                 xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
719
720         read_unlock_irqrestore(&evtchn_rwlock, flags);
721 }
722 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
723
724 static void xen_irq_init(unsigned irq)
725 {
726         struct irq_info *info;
727
728         info = kzalloc(sizeof(*info), GFP_KERNEL);
729         if (info == NULL)
730                 panic("Unable to allocate metadata for IRQ%d\n", irq);
731
732         info->type = IRQT_UNBOUND;
733         info->refcnt = -1;
734
735         set_info_for_irq(irq, info);
736         /*
737          * Interrupt affinity setting can be immediate. No point
738          * in delaying it until an interrupt is handled.
739          */
740         irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
741
742         INIT_LIST_HEAD(&info->eoi_list);
743         list_add_tail(&info->list, &xen_irq_list_head);
744 }
745
746 static int __must_check xen_allocate_irqs_dynamic(int nvec)
747 {
748         int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
749
750         if (irq >= 0) {
751                 for (i = 0; i < nvec; i++)
752                         xen_irq_init(irq + i);
753         }
754
755         return irq;
756 }
757
758 static inline int __must_check xen_allocate_irq_dynamic(void)
759 {
760
761         return xen_allocate_irqs_dynamic(1);
762 }
763
764 static int __must_check xen_allocate_irq_gsi(unsigned gsi)
765 {
766         int irq;
767
768         /*
769          * A PV guest has no concept of a GSI (since it has no ACPI
770          * nor access to/knowledge of the physical APICs). Therefore
771          * all IRQs are dynamically allocated from the entire IRQ
772          * space.
773          */
774         if (xen_pv_domain() && !xen_initial_domain())
775                 return xen_allocate_irq_dynamic();
776
777         /* Legacy IRQ descriptors are already allocated by the arch. */
778         if (gsi < nr_legacy_irqs())
779                 irq = gsi;
780         else
781                 irq = irq_alloc_desc_at(gsi, -1);
782
783         xen_irq_init(irq);
784
785         return irq;
786 }
787
788 static void xen_free_irq(unsigned irq)
789 {
790         struct irq_info *info = info_for_irq(irq);
791         unsigned long flags;
792
793         if (WARN_ON(!info))
794                 return;
795
796         write_lock_irqsave(&evtchn_rwlock, flags);
797
798         if (!list_empty(&info->eoi_list))
799                 lateeoi_list_del(info);
800
801         list_del(&info->list);
802
803         set_info_for_irq(irq, NULL);
804
805         WARN_ON(info->refcnt > 0);
806
807         write_unlock_irqrestore(&evtchn_rwlock, flags);
808
809         kfree(info);
810
811         /* Legacy IRQ descriptors are managed by the arch. */
812         if (irq < nr_legacy_irqs())
813                 return;
814
815         irq_free_desc(irq);
816 }
817
818 static void xen_evtchn_close(evtchn_port_t port)
819 {
820         struct evtchn_close close;
821
822         close.port = port;
823         if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
824                 BUG();
825 }
826
827 /* Not called for lateeoi events. */
828 static void event_handler_exit(struct irq_info *info)
829 {
830         smp_store_release(&info->is_active, 0);
831         clear_evtchn(info->evtchn);
832 }
833
834 static void pirq_query_unmask(int irq)
835 {
836         struct physdev_irq_status_query irq_status;
837         struct irq_info *info = info_for_irq(irq);
838
839         BUG_ON(info->type != IRQT_PIRQ);
840
841         irq_status.irq = pirq_from_irq(irq);
842         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
843                 irq_status.flags = 0;
844
845         info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
846         if (irq_status.flags & XENIRQSTAT_needs_eoi)
847                 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
848 }
849
850 static void eoi_pirq(struct irq_data *data)
851 {
852         struct irq_info *info = info_for_irq(data->irq);
853         evtchn_port_t evtchn = info ? info->evtchn : 0;
854         struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
855         int rc = 0;
856
857         if (!VALID_EVTCHN(evtchn))
858                 return;
859
860         event_handler_exit(info);
861
862         if (pirq_needs_eoi(data->irq)) {
863                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
864                 WARN_ON(rc);
865         }
866 }
867
868 static void mask_ack_pirq(struct irq_data *data)
869 {
870         disable_dynirq(data);
871         eoi_pirq(data);
872 }
873
874 static unsigned int __startup_pirq(unsigned int irq)
875 {
876         struct evtchn_bind_pirq bind_pirq;
877         struct irq_info *info = info_for_irq(irq);
878         evtchn_port_t evtchn = evtchn_from_irq(irq);
879         int rc;
880
881         BUG_ON(info->type != IRQT_PIRQ);
882
883         if (VALID_EVTCHN(evtchn))
884                 goto out;
885
886         bind_pirq.pirq = pirq_from_irq(irq);
887         /* NB. We are happy to share unless we are probing. */
888         bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
889                                         BIND_PIRQ__WILL_SHARE : 0;
890         rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
891         if (rc != 0) {
892                 pr_warn("Failed to obtain physical IRQ %d\n", irq);
893                 return 0;
894         }
895         evtchn = bind_pirq.port;
896
897         pirq_query_unmask(irq);
898
899         rc = set_evtchn_to_irq(evtchn, irq);
900         if (rc)
901                 goto err;
902
903         info->evtchn = evtchn;
904         bind_evtchn_to_cpu(evtchn, 0, false);
905
906         rc = xen_evtchn_port_setup(evtchn);
907         if (rc)
908                 goto err;
909
910 out:
911         do_unmask(info, EVT_MASK_REASON_EXPLICIT);
912
913         eoi_pirq(irq_get_irq_data(irq));
914
915         return 0;
916
917 err:
918         pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
919         xen_evtchn_close(evtchn);
920         return 0;
921 }
922
923 static unsigned int startup_pirq(struct irq_data *data)
924 {
925         return __startup_pirq(data->irq);
926 }
927
928 static void shutdown_pirq(struct irq_data *data)
929 {
930         unsigned int irq = data->irq;
931         struct irq_info *info = info_for_irq(irq);
932         evtchn_port_t evtchn = evtchn_from_irq(irq);
933
934         BUG_ON(info->type != IRQT_PIRQ);
935
936         if (!VALID_EVTCHN(evtchn))
937                 return;
938
939         do_mask(info, EVT_MASK_REASON_EXPLICIT);
940         xen_evtchn_close(evtchn);
941         xen_irq_info_cleanup(info);
942 }
943
944 static void enable_pirq(struct irq_data *data)
945 {
946         enable_dynirq(data);
947 }
948
949 static void disable_pirq(struct irq_data *data)
950 {
951         disable_dynirq(data);
952 }
953
954 int xen_irq_from_gsi(unsigned gsi)
955 {
956         struct irq_info *info;
957
958         list_for_each_entry(info, &xen_irq_list_head, list) {
959                 if (info->type != IRQT_PIRQ)
960                         continue;
961
962                 if (info->u.pirq.gsi == gsi)
963                         return info->irq;
964         }
965
966         return -1;
967 }
968 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
969
970 static void __unbind_from_irq(unsigned int irq)
971 {
972         evtchn_port_t evtchn = evtchn_from_irq(irq);
973         struct irq_info *info = info_for_irq(irq);
974
975         if (info->refcnt > 0) {
976                 info->refcnt--;
977                 if (info->refcnt != 0)
978                         return;
979         }
980
981         if (VALID_EVTCHN(evtchn)) {
982                 unsigned int cpu = cpu_from_irq(irq);
983                 struct xenbus_device *dev;
984
985                 xen_evtchn_close(evtchn);
986
987                 switch (type_from_irq(irq)) {
988                 case IRQT_VIRQ:
989                         per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
990                         break;
991                 case IRQT_IPI:
992                         per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
993                         break;
994                 case IRQT_EVTCHN:
995                         dev = info->u.interdomain;
996                         if (dev)
997                                 atomic_dec(&dev->event_channels);
998                         break;
999                 default:
1000                         break;
1001                 }
1002
1003                 xen_irq_info_cleanup(info);
1004         }
1005
1006         xen_free_irq(irq);
1007 }
1008
1009 /*
1010  * Do not make any assumptions regarding the relationship between the
1011  * IRQ number returned here and the Xen pirq argument.
1012  *
1013  * Note: We don't assign an event channel until the irq actually started
1014  * up.  Return an existing irq if we've already got one for the gsi.
1015  *
1016  * Shareable implies level triggered, not shareable implies edge
1017  * triggered here.
1018  */
1019 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1020                              unsigned pirq, int shareable, char *name)
1021 {
1022         int irq;
1023         struct physdev_irq irq_op;
1024         int ret;
1025
1026         mutex_lock(&irq_mapping_update_lock);
1027
1028         irq = xen_irq_from_gsi(gsi);
1029         if (irq != -1) {
1030                 pr_info("%s: returning irq %d for gsi %u\n",
1031                         __func__, irq, gsi);
1032                 goto out;
1033         }
1034
1035         irq = xen_allocate_irq_gsi(gsi);
1036         if (irq < 0)
1037                 goto out;
1038
1039         irq_op.irq = irq;
1040         irq_op.vector = 0;
1041
1042         /* Only the privileged domain can do this. For non-priv, the pcifront
1043          * driver provides a PCI bus that does the call to do exactly
1044          * this in the priv domain. */
1045         if (xen_initial_domain() &&
1046             HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1047                 xen_free_irq(irq);
1048                 irq = -ENOSPC;
1049                 goto out;
1050         }
1051
1052         ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
1053                                shareable ? PIRQ_SHAREABLE : 0);
1054         if (ret < 0) {
1055                 __unbind_from_irq(irq);
1056                 irq = ret;
1057                 goto out;
1058         }
1059
1060         pirq_query_unmask(irq);
1061         /* We try to use the handler with the appropriate semantic for the
1062          * type of interrupt: if the interrupt is an edge triggered
1063          * interrupt we use handle_edge_irq.
1064          *
1065          * On the other hand if the interrupt is level triggered we use
1066          * handle_fasteoi_irq like the native code does for this kind of
1067          * interrupts.
1068          *
1069          * Depending on the Xen version, pirq_needs_eoi might return true
1070          * not only for level triggered interrupts but for edge triggered
1071          * interrupts too. In any case Xen always honors the eoi mechanism,
1072          * not injecting any more pirqs of the same kind if the first one
1073          * hasn't received an eoi yet. Therefore using the fasteoi handler
1074          * is the right choice either way.
1075          */
1076         if (shareable)
1077                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1078                                 handle_fasteoi_irq, name);
1079         else
1080                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1081                                 handle_edge_irq, name);
1082
1083 out:
1084         mutex_unlock(&irq_mapping_update_lock);
1085
1086         return irq;
1087 }
1088
1089 #ifdef CONFIG_PCI_MSI
1090 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1091 {
1092         int rc;
1093         struct physdev_get_free_pirq op_get_free_pirq;
1094
1095         op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1096         rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1097
1098         WARN_ONCE(rc == -ENOSYS,
1099                   "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1100
1101         return rc ? -1 : op_get_free_pirq.pirq;
1102 }
1103
1104 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1105                              int pirq, int nvec, const char *name, domid_t domid)
1106 {
1107         int i, irq, ret;
1108
1109         mutex_lock(&irq_mapping_update_lock);
1110
1111         irq = xen_allocate_irqs_dynamic(nvec);
1112         if (irq < 0)
1113                 goto out;
1114
1115         for (i = 0; i < nvec; i++) {
1116                 irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1117
1118                 ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1119                                               i == 0 ? 0 : PIRQ_MSI_GROUP);
1120                 if (ret < 0)
1121                         goto error_irq;
1122         }
1123
1124         ret = irq_set_msi_desc(irq, msidesc);
1125         if (ret < 0)
1126                 goto error_irq;
1127 out:
1128         mutex_unlock(&irq_mapping_update_lock);
1129         return irq;
1130 error_irq:
1131         while (nvec--)
1132                 __unbind_from_irq(irq + nvec);
1133         mutex_unlock(&irq_mapping_update_lock);
1134         return ret;
1135 }
1136 #endif
1137
1138 int xen_destroy_irq(int irq)
1139 {
1140         struct physdev_unmap_pirq unmap_irq;
1141         struct irq_info *info = info_for_irq(irq);
1142         int rc = -ENOENT;
1143
1144         mutex_lock(&irq_mapping_update_lock);
1145
1146         /*
1147          * If trying to remove a vector in a MSI group different
1148          * than the first one skip the PIRQ unmap unless this vector
1149          * is the first one in the group.
1150          */
1151         if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1152                 unmap_irq.pirq = info->u.pirq.pirq;
1153                 unmap_irq.domid = info->u.pirq.domid;
1154                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1155                 /* If another domain quits without making the pci_disable_msix
1156                  * call, the Xen hypervisor takes care of freeing the PIRQs
1157                  * (free_domain_pirqs).
1158                  */
1159                 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1160                         pr_info("domain %d does not have %d anymore\n",
1161                                 info->u.pirq.domid, info->u.pirq.pirq);
1162                 else if (rc) {
1163                         pr_warn("unmap irq failed %d\n", rc);
1164                         goto out;
1165                 }
1166         }
1167
1168         xen_free_irq(irq);
1169
1170 out:
1171         mutex_unlock(&irq_mapping_update_lock);
1172         return rc;
1173 }
1174
1175 int xen_irq_from_pirq(unsigned pirq)
1176 {
1177         int irq;
1178
1179         struct irq_info *info;
1180
1181         mutex_lock(&irq_mapping_update_lock);
1182
1183         list_for_each_entry(info, &xen_irq_list_head, list) {
1184                 if (info->type != IRQT_PIRQ)
1185                         continue;
1186                 irq = info->irq;
1187                 if (info->u.pirq.pirq == pirq)
1188                         goto out;
1189         }
1190         irq = -1;
1191 out:
1192         mutex_unlock(&irq_mapping_update_lock);
1193
1194         return irq;
1195 }
1196
1197
1198 int xen_pirq_from_irq(unsigned irq)
1199 {
1200         return pirq_from_irq(irq);
1201 }
1202 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1203
1204 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1205                                    struct xenbus_device *dev)
1206 {
1207         int irq;
1208         int ret;
1209
1210         if (evtchn >= xen_evtchn_max_channels())
1211                 return -ENOMEM;
1212
1213         mutex_lock(&irq_mapping_update_lock);
1214
1215         irq = get_evtchn_to_irq(evtchn);
1216
1217         if (irq == -1) {
1218                 irq = xen_allocate_irq_dynamic();
1219                 if (irq < 0)
1220                         goto out;
1221
1222                 irq_set_chip_and_handler_name(irq, chip,
1223                                               handle_edge_irq, "event");
1224
1225                 ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
1226                 if (ret < 0) {
1227                         __unbind_from_irq(irq);
1228                         irq = ret;
1229                         goto out;
1230                 }
1231                 /*
1232                  * New interdomain events are initially bound to vCPU0 This
1233                  * is required to setup the event channel in the first
1234                  * place and also important for UP guests because the
1235                  * affinity setting is not invoked on them so nothing would
1236                  * bind the channel.
1237                  */
1238                 bind_evtchn_to_cpu(evtchn, 0, false);
1239         } else {
1240                 struct irq_info *info = info_for_irq(irq);
1241                 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1242         }
1243
1244 out:
1245         mutex_unlock(&irq_mapping_update_lock);
1246
1247         return irq;
1248 }
1249
1250 int bind_evtchn_to_irq(evtchn_port_t evtchn)
1251 {
1252         return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
1253 }
1254 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1255
1256 int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
1257 {
1258         return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
1259 }
1260 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
1261
1262 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1263 {
1264         struct evtchn_bind_ipi bind_ipi;
1265         evtchn_port_t evtchn;
1266         int ret, irq;
1267
1268         mutex_lock(&irq_mapping_update_lock);
1269
1270         irq = per_cpu(ipi_to_irq, cpu)[ipi];
1271
1272         if (irq == -1) {
1273                 irq = xen_allocate_irq_dynamic();
1274                 if (irq < 0)
1275                         goto out;
1276
1277                 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1278                                               handle_percpu_irq, "ipi");
1279
1280                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1281                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1282                                                 &bind_ipi) != 0)
1283                         BUG();
1284                 evtchn = bind_ipi.port;
1285
1286                 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1287                 if (ret < 0) {
1288                         __unbind_from_irq(irq);
1289                         irq = ret;
1290                         goto out;
1291                 }
1292                 /*
1293                  * Force the affinity mask to the target CPU so proc shows
1294                  * the correct target.
1295                  */
1296                 bind_evtchn_to_cpu(evtchn, cpu, true);
1297         } else {
1298                 struct irq_info *info = info_for_irq(irq);
1299                 WARN_ON(info == NULL || info->type != IRQT_IPI);
1300         }
1301
1302  out:
1303         mutex_unlock(&irq_mapping_update_lock);
1304         return irq;
1305 }
1306
1307 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1308                                                evtchn_port_t remote_port,
1309                                                struct irq_chip *chip)
1310 {
1311         struct evtchn_bind_interdomain bind_interdomain;
1312         int err;
1313
1314         bind_interdomain.remote_dom  = dev->otherend_id;
1315         bind_interdomain.remote_port = remote_port;
1316
1317         err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1318                                           &bind_interdomain);
1319
1320         return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1321                                                chip, dev);
1322 }
1323
1324 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1325                                            evtchn_port_t remote_port)
1326 {
1327         return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1328                                                    &xen_lateeoi_chip);
1329 }
1330 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1331
1332 static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1333 {
1334         struct evtchn_status status;
1335         evtchn_port_t port;
1336         int rc = -ENOENT;
1337
1338         memset(&status, 0, sizeof(status));
1339         for (port = 0; port < xen_evtchn_max_channels(); port++) {
1340                 status.dom = DOMID_SELF;
1341                 status.port = port;
1342                 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1343                 if (rc < 0)
1344                         continue;
1345                 if (status.status != EVTCHNSTAT_virq)
1346                         continue;
1347                 if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1348                         *evtchn = port;
1349                         break;
1350                 }
1351         }
1352         return rc;
1353 }
1354
1355 /**
1356  * xen_evtchn_nr_channels - number of usable event channel ports
1357  *
1358  * This may be less than the maximum supported by the current
1359  * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1360  * supported.
1361  */
1362 unsigned xen_evtchn_nr_channels(void)
1363 {
1364         return evtchn_ops->nr_channels();
1365 }
1366 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1367
1368 int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1369 {
1370         struct evtchn_bind_virq bind_virq;
1371         evtchn_port_t evtchn = 0;
1372         int irq, ret;
1373
1374         mutex_lock(&irq_mapping_update_lock);
1375
1376         irq = per_cpu(virq_to_irq, cpu)[virq];
1377
1378         if (irq == -1) {
1379                 irq = xen_allocate_irq_dynamic();
1380                 if (irq < 0)
1381                         goto out;
1382
1383                 if (percpu)
1384                         irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1385                                                       handle_percpu_irq, "virq");
1386                 else
1387                         irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1388                                                       handle_edge_irq, "virq");
1389
1390                 bind_virq.virq = virq;
1391                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1392                 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1393                                                 &bind_virq);
1394                 if (ret == 0)
1395                         evtchn = bind_virq.port;
1396                 else {
1397                         if (ret == -EEXIST)
1398                                 ret = find_virq(virq, cpu, &evtchn);
1399                         BUG_ON(ret < 0);
1400                 }
1401
1402                 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1403                 if (ret < 0) {
1404                         __unbind_from_irq(irq);
1405                         irq = ret;
1406                         goto out;
1407                 }
1408
1409                 /*
1410                  * Force the affinity mask for percpu interrupts so proc
1411                  * shows the correct target.
1412                  */
1413                 bind_evtchn_to_cpu(evtchn, cpu, percpu);
1414         } else {
1415                 struct irq_info *info = info_for_irq(irq);
1416                 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1417         }
1418
1419 out:
1420         mutex_unlock(&irq_mapping_update_lock);
1421
1422         return irq;
1423 }
1424
1425 static void unbind_from_irq(unsigned int irq)
1426 {
1427         mutex_lock(&irq_mapping_update_lock);
1428         __unbind_from_irq(irq);
1429         mutex_unlock(&irq_mapping_update_lock);
1430 }
1431
1432 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1433                                           irq_handler_t handler,
1434                                           unsigned long irqflags,
1435                                           const char *devname, void *dev_id,
1436                                           struct irq_chip *chip)
1437 {
1438         int irq, retval;
1439
1440         irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
1441         if (irq < 0)
1442                 return irq;
1443         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1444         if (retval != 0) {
1445                 unbind_from_irq(irq);
1446                 return retval;
1447         }
1448
1449         return irq;
1450 }
1451
1452 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1453                               irq_handler_t handler,
1454                               unsigned long irqflags,
1455                               const char *devname, void *dev_id)
1456 {
1457         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1458                                               devname, dev_id,
1459                                               &xen_dynamic_chip);
1460 }
1461 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1462
1463 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1464                                       irq_handler_t handler,
1465                                       unsigned long irqflags,
1466                                       const char *devname, void *dev_id)
1467 {
1468         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1469                                               devname, dev_id,
1470                                               &xen_lateeoi_chip);
1471 }
1472 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1473
1474 static int bind_interdomain_evtchn_to_irqhandler_chip(
1475                 struct xenbus_device *dev, evtchn_port_t remote_port,
1476                 irq_handler_t handler, unsigned long irqflags,
1477                 const char *devname, void *dev_id, struct irq_chip *chip)
1478 {
1479         int irq, retval;
1480
1481         irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
1482         if (irq < 0)
1483                 return irq;
1484
1485         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1486         if (retval != 0) {
1487                 unbind_from_irq(irq);
1488                 return retval;
1489         }
1490
1491         return irq;
1492 }
1493
1494 int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1495                                                   evtchn_port_t remote_port,
1496                                                   irq_handler_t handler,
1497                                                   unsigned long irqflags,
1498                                                   const char *devname,
1499                                                   void *dev_id)
1500 {
1501         return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1502                                 remote_port, handler, irqflags, devname,
1503                                 dev_id, &xen_lateeoi_chip);
1504 }
1505 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1506
1507 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1508                             irq_handler_t handler,
1509                             unsigned long irqflags, const char *devname, void *dev_id)
1510 {
1511         int irq, retval;
1512
1513         irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1514         if (irq < 0)
1515                 return irq;
1516         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1517         if (retval != 0) {
1518                 unbind_from_irq(irq);
1519                 return retval;
1520         }
1521
1522         return irq;
1523 }
1524 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1525
1526 int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1527                            unsigned int cpu,
1528                            irq_handler_t handler,
1529                            unsigned long irqflags,
1530                            const char *devname,
1531                            void *dev_id)
1532 {
1533         int irq, retval;
1534
1535         irq = bind_ipi_to_irq(ipi, cpu);
1536         if (irq < 0)
1537                 return irq;
1538
1539         irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1540         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1541         if (retval != 0) {
1542                 unbind_from_irq(irq);
1543                 return retval;
1544         }
1545
1546         return irq;
1547 }
1548
1549 void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1550 {
1551         struct irq_info *info = info_for_irq(irq);
1552
1553         if (WARN_ON(!info))
1554                 return;
1555         free_irq(irq, dev_id);
1556         unbind_from_irq(irq);
1557 }
1558 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1559
1560 /**
1561  * xen_set_irq_priority() - set an event channel priority.
1562  * @irq:irq bound to an event channel.
1563  * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1564  */
1565 int xen_set_irq_priority(unsigned irq, unsigned priority)
1566 {
1567         struct evtchn_set_priority set_priority;
1568
1569         set_priority.port = evtchn_from_irq(irq);
1570         set_priority.priority = priority;
1571
1572         return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1573                                            &set_priority);
1574 }
1575 EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1576
1577 int evtchn_make_refcounted(evtchn_port_t evtchn)
1578 {
1579         int irq = get_evtchn_to_irq(evtchn);
1580         struct irq_info *info;
1581
1582         if (irq == -1)
1583                 return -ENOENT;
1584
1585         info = info_for_irq(irq);
1586
1587         if (!info)
1588                 return -ENOENT;
1589
1590         WARN_ON(info->refcnt != -1);
1591
1592         info->refcnt = 1;
1593
1594         return 0;
1595 }
1596 EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1597
1598 int evtchn_get(evtchn_port_t evtchn)
1599 {
1600         int irq;
1601         struct irq_info *info;
1602         int err = -ENOENT;
1603
1604         if (evtchn >= xen_evtchn_max_channels())
1605                 return -EINVAL;
1606
1607         mutex_lock(&irq_mapping_update_lock);
1608
1609         irq = get_evtchn_to_irq(evtchn);
1610         if (irq == -1)
1611                 goto done;
1612
1613         info = info_for_irq(irq);
1614
1615         if (!info)
1616                 goto done;
1617
1618         err = -EINVAL;
1619         if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1620                 goto done;
1621
1622         info->refcnt++;
1623         err = 0;
1624  done:
1625         mutex_unlock(&irq_mapping_update_lock);
1626
1627         return err;
1628 }
1629 EXPORT_SYMBOL_GPL(evtchn_get);
1630
1631 void evtchn_put(evtchn_port_t evtchn)
1632 {
1633         int irq = get_evtchn_to_irq(evtchn);
1634         if (WARN_ON(irq == -1))
1635                 return;
1636         unbind_from_irq(irq);
1637 }
1638 EXPORT_SYMBOL_GPL(evtchn_put);
1639
1640 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1641 {
1642         int irq;
1643
1644 #ifdef CONFIG_X86
1645         if (unlikely(vector == XEN_NMI_VECTOR)) {
1646                 int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1647                                              NULL);
1648                 if (rc < 0)
1649                         printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1650                 return;
1651         }
1652 #endif
1653         irq = per_cpu(ipi_to_irq, cpu)[vector];
1654         BUG_ON(irq < 0);
1655         notify_remote_via_irq(irq);
1656 }
1657
1658 struct evtchn_loop_ctrl {
1659         ktime_t timeout;
1660         unsigned count;
1661         bool defer_eoi;
1662 };
1663
1664 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1665 {
1666         int irq;
1667         struct irq_info *info;
1668         struct xenbus_device *dev;
1669
1670         irq = get_evtchn_to_irq(port);
1671         if (irq == -1)
1672                 return;
1673
1674         /*
1675          * Check for timeout every 256 events.
1676          * We are setting the timeout value only after the first 256
1677          * events in order to not hurt the common case of few loop
1678          * iterations. The 256 is basically an arbitrary value.
1679          *
1680          * In case we are hitting the timeout we need to defer all further
1681          * EOIs in order to ensure to leave the event handling loop rather
1682          * sooner than later.
1683          */
1684         if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1685                 ktime_t kt = ktime_get();
1686
1687                 if (!ctrl->timeout) {
1688                         kt = ktime_add_ms(kt,
1689                                           jiffies_to_msecs(event_loop_timeout));
1690                         ctrl->timeout = kt;
1691                 } else if (kt > ctrl->timeout) {
1692                         ctrl->defer_eoi = true;
1693                 }
1694         }
1695
1696         info = info_for_irq(irq);
1697         if (xchg_acquire(&info->is_active, 1))
1698                 return;
1699
1700         dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1701         if (dev)
1702                 atomic_inc(&dev->events);
1703
1704         if (ctrl->defer_eoi) {
1705                 info->eoi_cpu = smp_processor_id();
1706                 info->irq_epoch = __this_cpu_read(irq_epoch);
1707                 info->eoi_time = get_jiffies_64() + event_eoi_delay;
1708         }
1709
1710         generic_handle_irq(irq);
1711 }
1712
1713 static int __xen_evtchn_do_upcall(void)
1714 {
1715         struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1716         int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE;
1717         int cpu = smp_processor_id();
1718         struct evtchn_loop_ctrl ctrl = { 0 };
1719
1720         read_lock(&evtchn_rwlock);
1721
1722         do {
1723                 vcpu_info->evtchn_upcall_pending = 0;
1724
1725                 xen_evtchn_handle_events(cpu, &ctrl);
1726
1727                 BUG_ON(!irqs_disabled());
1728
1729                 virt_rmb(); /* Hypervisor can set upcall pending. */
1730
1731         } while (vcpu_info->evtchn_upcall_pending);
1732
1733         read_unlock(&evtchn_rwlock);
1734
1735         /*
1736          * Increment irq_epoch only now to defer EOIs only for
1737          * xen_irq_lateeoi() invocations occurring from inside the loop
1738          * above.
1739          */
1740         __this_cpu_inc(irq_epoch);
1741
1742         return ret;
1743 }
1744
1745 void xen_evtchn_do_upcall(struct pt_regs *regs)
1746 {
1747         struct pt_regs *old_regs = set_irq_regs(regs);
1748
1749         irq_enter();
1750
1751         __xen_evtchn_do_upcall();
1752
1753         irq_exit();
1754         set_irq_regs(old_regs);
1755 }
1756
1757 int xen_hvm_evtchn_do_upcall(void)
1758 {
1759         return __xen_evtchn_do_upcall();
1760 }
1761 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1762
1763 /* Rebind a new event channel to an existing irq. */
1764 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1765 {
1766         struct irq_info *info = info_for_irq(irq);
1767
1768         if (WARN_ON(!info))
1769                 return;
1770
1771         /* Make sure the irq is masked, since the new event channel
1772            will also be masked. */
1773         disable_irq(irq);
1774
1775         mutex_lock(&irq_mapping_update_lock);
1776
1777         /* After resume the irq<->evtchn mappings are all cleared out */
1778         BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1779         /* Expect irq to have been bound before,
1780            so there should be a proper type */
1781         BUG_ON(info->type == IRQT_UNBOUND);
1782
1783         (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
1784
1785         mutex_unlock(&irq_mapping_update_lock);
1786
1787         bind_evtchn_to_cpu(evtchn, info->cpu, false);
1788
1789         /* Unmask the event channel. */
1790         enable_irq(irq);
1791 }
1792
1793 /* Rebind an evtchn so that it gets delivered to a specific cpu */
1794 static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1795 {
1796         struct evtchn_bind_vcpu bind_vcpu;
1797         evtchn_port_t evtchn = info ? info->evtchn : 0;
1798
1799         if (!VALID_EVTCHN(evtchn))
1800                 return -1;
1801
1802         if (!xen_support_evtchn_rebind())
1803                 return -1;
1804
1805         /* Send future instances of this interrupt to other vcpu. */
1806         bind_vcpu.port = evtchn;
1807         bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1808
1809         /*
1810          * Mask the event while changing the VCPU binding to prevent
1811          * it being delivered on an unexpected VCPU.
1812          */
1813         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1814
1815         /*
1816          * If this fails, it usually just indicates that we're dealing with a
1817          * virq or IPI channel, which don't actually need to be rebound. Ignore
1818          * it, but don't do the xenlinux-level rebind in that case.
1819          */
1820         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1821                 bind_evtchn_to_cpu(evtchn, tcpu, false);
1822
1823         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1824
1825         return 0;
1826 }
1827
1828 /*
1829  * Find the CPU within @dest mask which has the least number of channels
1830  * assigned. This is not precise as the per cpu counts can be modified
1831  * concurrently.
1832  */
1833 static unsigned int select_target_cpu(const struct cpumask *dest)
1834 {
1835         unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1836
1837         for_each_cpu_and(cpu, dest, cpu_online_mask) {
1838                 unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1839
1840                 if (curch < minch) {
1841                         minch = curch;
1842                         best_cpu = cpu;
1843                 }
1844         }
1845
1846         /*
1847          * Catch the unlikely case that dest contains no online CPUs. Can't
1848          * recurse.
1849          */
1850         if (best_cpu == UINT_MAX)
1851                 return select_target_cpu(cpu_online_mask);
1852
1853         return best_cpu;
1854 }
1855
1856 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1857                             bool force)
1858 {
1859         unsigned int tcpu = select_target_cpu(dest);
1860         int ret;
1861
1862         ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1863         if (!ret)
1864                 irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1865
1866         return ret;
1867 }
1868
1869 static void enable_dynirq(struct irq_data *data)
1870 {
1871         struct irq_info *info = info_for_irq(data->irq);
1872         evtchn_port_t evtchn = info ? info->evtchn : 0;
1873
1874         if (VALID_EVTCHN(evtchn))
1875                 do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1876 }
1877
1878 static void disable_dynirq(struct irq_data *data)
1879 {
1880         struct irq_info *info = info_for_irq(data->irq);
1881         evtchn_port_t evtchn = info ? info->evtchn : 0;
1882
1883         if (VALID_EVTCHN(evtchn))
1884                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1885 }
1886
1887 static void ack_dynirq(struct irq_data *data)
1888 {
1889         struct irq_info *info = info_for_irq(data->irq);
1890         evtchn_port_t evtchn = info ? info->evtchn : 0;
1891
1892         if (VALID_EVTCHN(evtchn))
1893                 event_handler_exit(info);
1894 }
1895
1896 static void mask_ack_dynirq(struct irq_data *data)
1897 {
1898         disable_dynirq(data);
1899         ack_dynirq(data);
1900 }
1901
1902 static void lateeoi_ack_dynirq(struct irq_data *data)
1903 {
1904         struct irq_info *info = info_for_irq(data->irq);
1905         evtchn_port_t evtchn = info ? info->evtchn : 0;
1906
1907         if (VALID_EVTCHN(evtchn)) {
1908                 do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1909                 /*
1910                  * Don't call event_handler_exit().
1911                  * Need to keep is_active non-zero in order to ignore re-raised
1912                  * events after cpu affinity changes while a lateeoi is pending.
1913                  */
1914                 clear_evtchn(evtchn);
1915         }
1916 }
1917
1918 static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1919 {
1920         struct irq_info *info = info_for_irq(data->irq);
1921         evtchn_port_t evtchn = info ? info->evtchn : 0;
1922
1923         if (VALID_EVTCHN(evtchn)) {
1924                 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1925                 event_handler_exit(info);
1926         }
1927 }
1928
1929 static int retrigger_dynirq(struct irq_data *data)
1930 {
1931         struct irq_info *info = info_for_irq(data->irq);
1932         evtchn_port_t evtchn = info ? info->evtchn : 0;
1933
1934         if (!VALID_EVTCHN(evtchn))
1935                 return 0;
1936
1937         do_mask(info, EVT_MASK_REASON_TEMPORARY);
1938         set_evtchn(evtchn);
1939         do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1940
1941         return 1;
1942 }
1943
1944 static void restore_pirqs(void)
1945 {
1946         int pirq, rc, irq, gsi;
1947         struct physdev_map_pirq map_irq;
1948         struct irq_info *info;
1949
1950         list_for_each_entry(info, &xen_irq_list_head, list) {
1951                 if (info->type != IRQT_PIRQ)
1952                         continue;
1953
1954                 pirq = info->u.pirq.pirq;
1955                 gsi = info->u.pirq.gsi;
1956                 irq = info->irq;
1957
1958                 /* save/restore of PT devices doesn't work, so at this point the
1959                  * only devices present are GSI based emulated devices */
1960                 if (!gsi)
1961                         continue;
1962
1963                 map_irq.domid = DOMID_SELF;
1964                 map_irq.type = MAP_PIRQ_TYPE_GSI;
1965                 map_irq.index = gsi;
1966                 map_irq.pirq = pirq;
1967
1968                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1969                 if (rc) {
1970                         pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1971                                 gsi, irq, pirq, rc);
1972                         xen_free_irq(irq);
1973                         continue;
1974                 }
1975
1976                 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1977
1978                 __startup_pirq(irq);
1979         }
1980 }
1981
1982 static void restore_cpu_virqs(unsigned int cpu)
1983 {
1984         struct evtchn_bind_virq bind_virq;
1985         evtchn_port_t evtchn;
1986         int virq, irq;
1987
1988         for (virq = 0; virq < NR_VIRQS; virq++) {
1989                 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1990                         continue;
1991
1992                 BUG_ON(virq_from_irq(irq) != virq);
1993
1994                 /* Get a new binding from Xen. */
1995                 bind_virq.virq = virq;
1996                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1997                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1998                                                 &bind_virq) != 0)
1999                         BUG();
2000                 evtchn = bind_virq.port;
2001
2002                 /* Record the new mapping. */
2003                 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
2004                 /* The affinity mask is still valid */
2005                 bind_evtchn_to_cpu(evtchn, cpu, false);
2006         }
2007 }
2008
2009 static void restore_cpu_ipis(unsigned int cpu)
2010 {
2011         struct evtchn_bind_ipi bind_ipi;
2012         evtchn_port_t evtchn;
2013         int ipi, irq;
2014
2015         for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
2016                 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
2017                         continue;
2018
2019                 BUG_ON(ipi_from_irq(irq) != ipi);
2020
2021                 /* Get a new binding from Xen. */
2022                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
2023                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
2024                                                 &bind_ipi) != 0)
2025                         BUG();
2026                 evtchn = bind_ipi.port;
2027
2028                 /* Record the new mapping. */
2029                 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
2030                 /* The affinity mask is still valid */
2031                 bind_evtchn_to_cpu(evtchn, cpu, false);
2032         }
2033 }
2034
2035 /* Clear an irq's pending state, in preparation for polling on it */
2036 void xen_clear_irq_pending(int irq)
2037 {
2038         struct irq_info *info = info_for_irq(irq);
2039         evtchn_port_t evtchn = info ? info->evtchn : 0;
2040
2041         if (VALID_EVTCHN(evtchn))
2042                 event_handler_exit(info);
2043 }
2044 EXPORT_SYMBOL(xen_clear_irq_pending);
2045 void xen_set_irq_pending(int irq)
2046 {
2047         evtchn_port_t evtchn = evtchn_from_irq(irq);
2048
2049         if (VALID_EVTCHN(evtchn))
2050                 set_evtchn(evtchn);
2051 }
2052
2053 bool xen_test_irq_pending(int irq)
2054 {
2055         evtchn_port_t evtchn = evtchn_from_irq(irq);
2056         bool ret = false;
2057
2058         if (VALID_EVTCHN(evtchn))
2059                 ret = test_evtchn(evtchn);
2060
2061         return ret;
2062 }
2063
2064 /* Poll waiting for an irq to become pending with timeout.  In the usual case,
2065  * the irq will be disabled so it won't deliver an interrupt. */
2066 void xen_poll_irq_timeout(int irq, u64 timeout)
2067 {
2068         evtchn_port_t evtchn = evtchn_from_irq(irq);
2069
2070         if (VALID_EVTCHN(evtchn)) {
2071                 struct sched_poll poll;
2072
2073                 poll.nr_ports = 1;
2074                 poll.timeout = timeout;
2075                 set_xen_guest_handle(poll.ports, &evtchn);
2076
2077                 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2078                         BUG();
2079         }
2080 }
2081 EXPORT_SYMBOL(xen_poll_irq_timeout);
2082 /* Poll waiting for an irq to become pending.  In the usual case, the
2083  * irq will be disabled so it won't deliver an interrupt. */
2084 void xen_poll_irq(int irq)
2085 {
2086         xen_poll_irq_timeout(irq, 0 /* no timeout */);
2087 }
2088
2089 /* Check whether the IRQ line is shared with other guests. */
2090 int xen_test_irq_shared(int irq)
2091 {
2092         struct irq_info *info = info_for_irq(irq);
2093         struct physdev_irq_status_query irq_status;
2094
2095         if (WARN_ON(!info))
2096                 return -ENOENT;
2097
2098         irq_status.irq = info->u.pirq.pirq;
2099
2100         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2101                 return 0;
2102         return !(irq_status.flags & XENIRQSTAT_shared);
2103 }
2104 EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2105
2106 void xen_irq_resume(void)
2107 {
2108         unsigned int cpu;
2109         struct irq_info *info;
2110
2111         /* New event-channel space is not 'live' yet. */
2112         xen_evtchn_resume();
2113
2114         /* No IRQ <-> event-channel mappings. */
2115         list_for_each_entry(info, &xen_irq_list_head, list) {
2116                 /* Zap event-channel binding */
2117                 info->evtchn = 0;
2118                 /* Adjust accounting */
2119                 channels_on_cpu_dec(info);
2120         }
2121
2122         clear_evtchn_to_irq_all();
2123
2124         for_each_possible_cpu(cpu) {
2125                 restore_cpu_virqs(cpu);
2126                 restore_cpu_ipis(cpu);
2127         }
2128
2129         restore_pirqs();
2130 }
2131
2132 static struct irq_chip xen_dynamic_chip __read_mostly = {
2133         .name                   = "xen-dyn",
2134
2135         .irq_disable            = disable_dynirq,
2136         .irq_mask               = disable_dynirq,
2137         .irq_unmask             = enable_dynirq,
2138
2139         .irq_ack                = ack_dynirq,
2140         .irq_mask_ack           = mask_ack_dynirq,
2141
2142         .irq_set_affinity       = set_affinity_irq,
2143         .irq_retrigger          = retrigger_dynirq,
2144 };
2145
2146 static struct irq_chip xen_lateeoi_chip __read_mostly = {
2147         /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2148         .name                   = "xen-dyn-lateeoi",
2149
2150         .irq_disable            = disable_dynirq,
2151         .irq_mask               = disable_dynirq,
2152         .irq_unmask             = enable_dynirq,
2153
2154         .irq_ack                = lateeoi_ack_dynirq,
2155         .irq_mask_ack           = lateeoi_mask_ack_dynirq,
2156
2157         .irq_set_affinity       = set_affinity_irq,
2158         .irq_retrigger          = retrigger_dynirq,
2159 };
2160
2161 static struct irq_chip xen_pirq_chip __read_mostly = {
2162         .name                   = "xen-pirq",
2163
2164         .irq_startup            = startup_pirq,
2165         .irq_shutdown           = shutdown_pirq,
2166         .irq_enable             = enable_pirq,
2167         .irq_disable            = disable_pirq,
2168
2169         .irq_mask               = disable_dynirq,
2170         .irq_unmask             = enable_dynirq,
2171
2172         .irq_ack                = eoi_pirq,
2173         .irq_eoi                = eoi_pirq,
2174         .irq_mask_ack           = mask_ack_pirq,
2175
2176         .irq_set_affinity       = set_affinity_irq,
2177
2178         .irq_retrigger          = retrigger_dynirq,
2179 };
2180
2181 static struct irq_chip xen_percpu_chip __read_mostly = {
2182         .name                   = "xen-percpu",
2183
2184         .irq_disable            = disable_dynirq,
2185         .irq_mask               = disable_dynirq,
2186         .irq_unmask             = enable_dynirq,
2187
2188         .irq_ack                = ack_dynirq,
2189 };
2190
2191 #ifdef CONFIG_X86
2192 #ifdef CONFIG_XEN_PVHVM
2193 /* Vector callbacks are better than PCI interrupts to receive event
2194  * channel notifications because we can receive vector callbacks on any
2195  * vcpu and we don't need PCI support or APIC interactions. */
2196 void xen_setup_callback_vector(void)
2197 {
2198         uint64_t callback_via;
2199
2200         if (xen_have_vector_callback) {
2201                 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2202                 if (xen_set_callback_via(callback_via)) {
2203                         pr_err("Request for Xen HVM callback vector failed\n");
2204                         xen_have_vector_callback = false;
2205                 }
2206         }
2207 }
2208
2209 /*
2210  * Setup per-vCPU vector-type callbacks. If this setup is unavailable,
2211  * fallback to the global vector-type callback.
2212  */
2213 static __init void xen_init_setup_upcall_vector(void)
2214 {
2215         if (!xen_have_vector_callback)
2216                 return;
2217
2218         if ((cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) &&
2219             !xen_set_upcall_vector(0))
2220                 xen_percpu_upcall = true;
2221         else if (xen_feature(XENFEAT_hvm_callback_vector))
2222                 xen_setup_callback_vector();
2223         else
2224                 xen_have_vector_callback = false;
2225 }
2226
2227 int xen_set_upcall_vector(unsigned int cpu)
2228 {
2229         int rc;
2230         xen_hvm_evtchn_upcall_vector_t op = {
2231                 .vector = HYPERVISOR_CALLBACK_VECTOR,
2232                 .vcpu = per_cpu(xen_vcpu_id, cpu),
2233         };
2234
2235         rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &op);
2236         if (rc)
2237                 return rc;
2238
2239         /* Trick toolstack to think we are enlightened. */
2240         if (!cpu)
2241                 rc = xen_set_callback_via(1);
2242
2243         return rc;
2244 }
2245
2246 static __init void xen_alloc_callback_vector(void)
2247 {
2248         if (!xen_have_vector_callback)
2249                 return;
2250
2251         pr_info("Xen HVM callback vector for event delivery is enabled\n");
2252         alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2253 }
2254 #else
2255 void xen_setup_callback_vector(void) {}
2256 static inline void xen_init_setup_upcall_vector(void) {}
2257 int xen_set_upcall_vector(unsigned int cpu) {}
2258 static inline void xen_alloc_callback_vector(void) {}
2259 #endif /* CONFIG_XEN_PVHVM */
2260 #endif /* CONFIG_X86 */
2261
2262 bool xen_fifo_events = true;
2263 module_param_named(fifo_events, xen_fifo_events, bool, 0);
2264
2265 static int xen_evtchn_cpu_prepare(unsigned int cpu)
2266 {
2267         int ret = 0;
2268
2269         xen_cpu_init_eoi(cpu);
2270
2271         if (evtchn_ops->percpu_init)
2272                 ret = evtchn_ops->percpu_init(cpu);
2273
2274         return ret;
2275 }
2276
2277 static int xen_evtchn_cpu_dead(unsigned int cpu)
2278 {
2279         int ret = 0;
2280
2281         if (evtchn_ops->percpu_deinit)
2282                 ret = evtchn_ops->percpu_deinit(cpu);
2283
2284         return ret;
2285 }
2286
2287 void __init xen_init_IRQ(void)
2288 {
2289         int ret = -EINVAL;
2290         evtchn_port_t evtchn;
2291
2292         if (xen_fifo_events)
2293                 ret = xen_evtchn_fifo_init();
2294         if (ret < 0) {
2295                 xen_evtchn_2l_init();
2296                 xen_fifo_events = false;
2297         }
2298
2299         xen_cpu_init_eoi(smp_processor_id());
2300
2301         cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2302                                   "xen/evtchn:prepare",
2303                                   xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2304
2305         evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2306                                 sizeof(*evtchn_to_irq), GFP_KERNEL);
2307         BUG_ON(!evtchn_to_irq);
2308
2309         /* No event channels are 'live' right now. */
2310         for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2311                 mask_evtchn(evtchn);
2312
2313         pirq_needs_eoi = pirq_needs_eoi_flag;
2314
2315 #ifdef CONFIG_X86
2316         if (xen_pv_domain()) {
2317                 if (xen_initial_domain())
2318                         pci_xen_initial_domain();
2319         }
2320         xen_init_setup_upcall_vector();
2321         xen_alloc_callback_vector();
2322
2323
2324         if (xen_hvm_domain()) {
2325                 native_init_IRQ();
2326                 /* pci_xen_hvm_init must be called after native_init_IRQ so that
2327                  * __acpi_register_gsi can point at the right function */
2328                 pci_xen_hvm_init();
2329         } else {
2330                 int rc;
2331                 struct physdev_pirq_eoi_gmfn eoi_gmfn;
2332
2333                 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2334                 eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2335                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2336                 if (rc != 0) {
2337                         free_page((unsigned long) pirq_eoi_map);
2338                         pirq_eoi_map = NULL;
2339                 } else
2340                         pirq_needs_eoi = pirq_check_eoi_map;
2341         }
2342 #endif
2343 }
This page took 0.160004 seconds and 4 git commands to generate.