* P2M_PER_PAGE depends on the architecture, as a mfn is always
* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
* 512 and 1024 entries respectively.
+++ *
+++ * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
+++ *
+++ * However not all entries are filled with MFNs. Specifically for all other
+++ * leaf entries, or for the top root, or middle one, for which there is a void
+++ * entry, we assume it is "missing". So (for example)
+++ * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
+++ *
+++ * We also have the possibility of setting 1-1 mappings on certain regions, so
+++ * that:
+++ * pfn_to_mfn(0xc0000)=0xc0000
+++ *
+++ * The benefit of this is, that we can assume for non-RAM regions (think
+++ * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
+++ * get the PFN value to match the MFN.
+++ *
+++ * For this to work efficiently we have one new page p2m_identity and
+++ * allocate (via reserved_brk) any other pages we need to cover the sides
+++ * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
+++ * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
+++ * no other fancy value).
+++ *
+++ * On lookup we spot that the entry points to p2m_identity and return the
+++ * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
+++ * If the entry points to an allocated page, we just proceed as before and
+++ * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
+++ * appropriate functions (pfn_to_mfn).
+++ *
+++ * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
+++ * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
+++ * non-identity pfn. To protect ourselves against we elect to set (and get) the
+++ * IDENTITY_FRAME_BIT on all identity mapped PFNs.
+++ *
+++ * This simplistic diagram is used to explain the more subtle piece of code.
+++ * There is also a digram of the P2M at the end that can help.
+++ * Imagine your E820 looking as so:
+++ *
+++ * 1GB 2GB
+++ * /-------------------+---------\/----\ /----------\ /---+-----\
+++ * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
+++ * \-------------------+---------/\----/ \----------/ \---+-----/
+++ * ^- 1029MB ^- 2001MB
+++ *
+++ * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
+++ * 2048MB = 524288 (0x80000)]
+++ *
+++ * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
+++ * is actually not present (would have to kick the balloon driver to put it in).
+++ *
+++ * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
+++ * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
+++ * of the PFN and the end PFN (263424 and 512256 respectively). The first step
+++ * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
+++ * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
+++ * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
+++ * to end pfn. We reserve_brk top leaf pages if they are missing (means they
+++ * point to p2m_mid_missing).
+++ *
+++ * With the E820 example above, 263424 is not 1GB aligned so we allocate a
+++ * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
+++ * Each entry in the allocate page is "missing" (points to p2m_missing).
+++ *
+++ * Next stage is to determine if we need to do a more granular boundary check
+++ * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
+++ * We check if the start pfn and end pfn violate that boundary check, and if
+++ * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
+++ * granularity of setting which PFNs are missing and which ones are identity.
+++ * In our example 263424 and 512256 both fail the check so we reserve_brk two
+++ * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
+++ * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
+++ *
+++ * At this point we would at minimum reserve_brk one page, but could be up to
+++ * three. Each call to set_phys_range_identity has at maximum a three page
+++ * cost. If we were to query the P2M at this stage, all those entries from
+++ * start PFN through end PFN (so 1029MB -> 2001MB) would return
+++ * INVALID_P2M_ENTRY ("missing").
+++ *
+++ * The next step is to walk from the start pfn to the end pfn setting
+++ * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
+++ * If we find that the middle leaf is pointing to p2m_missing we can swap it
+++ * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
+++ * point we do not need to worry about boundary aligment (so no need to
+++ * reserve_brk a middle page, figure out which PFNs are "missing" and which
+++ * ones are identity), as that has been done earlier. If we find that the
+++ * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
+++ * that page (which covers 512 PFNs) and set the appropriate PFN with
+++ * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
+++ * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
+++ * IDENTITY_FRAME_BIT set.
+++ *
+++ * All other regions that are void (or not filled) either point to p2m_missing
+++ * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
+++ * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
+++ * contain the INVALID_P2M_ENTRY value and are considered "missing."
+++ *
+++ * This is what the p2m ends up looking (for the E820 above) with this
+++ * fabulous drawing:
+++ *
+++ * p2m /--------------\
+++ * /-----\ | &mfn_list[0],| /-----------------\
+++ * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
+++ * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
+++ * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
+++ * |-----| \ | [p2m_identity]+\\ | .... |
+++ * | 2 |--\ \-------------------->| ... | \\ \----------------/
+++ * |-----| \ \---------------/ \\
+++ * | 3 |\ \ \\ p2m_identity
+++ * |-----| \ \-------------------->/---------------\ /-----------------\
+++ * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
+++ * \-----/ / | [p2m_identity]+-->| ..., ~0 |
+++ * / /---------------\ | .... | \-----------------/
+++ * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
+++ * / | IDENTITY[@256]|<----/ \---------------/
+++ * / | ~0, ~0, .... |
+++ * | \---------------/
+++ * |
+++ * p2m_missing p2m_missing
+++ * /------------------\ /------------\
+++ * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
+++ * | [p2m_mid_missing]+---->| ..., ~0 |
+++ * \------------------/ \------------/
+++ *
+++ * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/
#include <linux/init.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/sched.h>
+++#include <linux/seq_file.h>
#include <asm/cache.h>
#include <asm/setup.h>
static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
+++static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
+++
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+++/* We might hit two boundary violations at the start and end, at max each
+++ * boundary violation will require three middle nodes. */
+++RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+++
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_P2M_PFN);
* - After resume we're called from within stop_machine, but the mfn
* tree should alreay be completely allocated.
*/
--- void xen_build_mfn_list_list(void)
+++ void __ref xen_build_mfn_list_list(void)
{
unsigned long pfn;
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top);
+++ p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
+++ p2m_init(p2m_identity);
+++
/*
* The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just
p2m_top[topidx] = mid;
}
+ /*
+ * As long as the mfn_list has enough entries to completely
+ * fill a p2m page, pointing into the array is ok. But if
+ * not the entries beyond the last pfn will be undefined.
+ */
+ if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) {
+ unsigned long p2midx;
+
+ p2midx = max_pfn % P2M_PER_PAGE;
+ for ( ; p2midx < P2M_PER_PAGE; p2midx++)
+ mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY;
+ }
p2m_top[topidx][mididx] = &mfn_list[pfn];
}
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+++ /*
+++ * The INVALID_P2M_ENTRY is filled in both p2m_*identity
+++ * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
+++ * would be wrong.
+++ */
+++ if (p2m_top[topidx][mididx] == p2m_identity)
+++ return IDENTITY_FRAME(pfn);
+++
return p2m_top[topidx][mididx][idx];
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
p2m_top_mfn_p[topidx] = mid_mfn;
}
--- if (p2m_top[topidx][mididx] == p2m_missing) {
+++ if (p2m_top[topidx][mididx] == p2m_identity ||
+++ p2m_top[topidx][mididx] == p2m_missing) {
/* p2m leaf page is missing */
unsigned long *p2m;
+++ unsigned long *p2m_orig = p2m_top[topidx][mididx];
p2m = alloc_p2m_page();
if (!p2m)
p2m_init(p2m);
--- if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
+++ if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
free_p2m_page(p2m);
else
mid_mfn[mididx] = virt_to_mfn(p2m);
return true;
}
+++bool __early_alloc_p2m(unsigned long pfn)
+++{
+++ unsigned topidx, mididx, idx;
+++
+++ topidx = p2m_top_index(pfn);
+++ mididx = p2m_mid_index(pfn);
+++ idx = p2m_index(pfn);
+++
+++ /* Pfff.. No boundary cross-over, lets get out. */
+++ if (!idx)
+++ return false;
+++
+++ WARN(p2m_top[topidx][mididx] == p2m_identity,
+++ "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
+++ topidx, mididx);
+++
+++ /*
+++ * Could be done by xen_build_dynamic_phys_to_machine..
+++ */
+++ if (p2m_top[topidx][mididx] != p2m_missing)
+++ return false;
+++
+++ /* Boundary cross-over for the edges: */
+++ if (idx) {
+++ unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+++
+++ p2m_init(p2m);
+++
+++ p2m_top[topidx][mididx] = p2m;
+++
+++ }
+++ return idx != 0;
+++}
+++unsigned long set_phys_range_identity(unsigned long pfn_s,
+++ unsigned long pfn_e)
+++{
+++ unsigned long pfn;
+++
+++ if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
+++ return 0;
+++
+++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
+++ return pfn_e - pfn_s;
+++
+++ if (pfn_s > pfn_e)
+++ return 0;
+++
+++ for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
+++ pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
+++ pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
+++ {
+++ unsigned topidx = p2m_top_index(pfn);
+++ if (p2m_top[topidx] == p2m_mid_missing) {
+++ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+++
+++ p2m_mid_init(mid);
+++
+++ p2m_top[topidx] = mid;
+++ }
+++ }
+++
+++ __early_alloc_p2m(pfn_s);
+++ __early_alloc_p2m(pfn_e);
+++
+++ for (pfn = pfn_s; pfn < pfn_e; pfn++)
+++ if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
+++ break;
+++
+++ if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
+++ "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
+++ (pfn_e - pfn_s) - (pfn - pfn_s)))
+++ printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
+++
+++ return pfn - pfn_s;
+++}
+++
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;
+++ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+++ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+++ return true;
+++ }
if (unlikely(pfn >= MAX_P2M_PFN)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+++ /* For sparse holes were the p2m leaf has real PFN along with
+++ * PCI holes, stick in the PFN as the MFN value.
+++ */
+++ if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
+++ if (p2m_top[topidx][mididx] == p2m_identity)
+++ return true;
+++
+++ /* Swap over from MISSING to IDENTITY if needed. */
+++ if (p2m_top[topidx][mididx] == p2m_missing) {
+++ WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
+++ p2m_identity) != p2m_missing);
+++ return true;
+++ }
+++ }
+++
if (p2m_top[topidx][mididx] == p2m_missing)
return mfn == INVALID_P2M_ENTRY;
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
--- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
--- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
--- return true;
--- }
---
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!alloc_p2m(pfn))
return false;
{
unsigned long flags;
unsigned long pfn;
--- unsigned long address;
+++ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
unsigned long flags;
unsigned long mfn;
unsigned long pfn;
--- unsigned long address;
+++ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
return ret;
}
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
+++
+++#ifdef CONFIG_XEN_DEBUG_FS
+++
+++int p2m_dump_show(struct seq_file *m, void *v)
+++{
+++ static const char * const level_name[] = { "top", "middle",
+++ "entry", "abnormal" };
+++ static const char * const type_name[] = { "identity", "missing",
+++ "pfn", "abnormal"};
+++#define TYPE_IDENTITY 0
+++#define TYPE_MISSING 1
+++#define TYPE_PFN 2
+++#define TYPE_UNKNOWN 3
+++ unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
+++ unsigned int uninitialized_var(prev_level);
+++ unsigned int uninitialized_var(prev_type);
+++
+++ if (!p2m_top)
+++ return 0;
+++
+++ for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
+++ unsigned topidx = p2m_top_index(pfn);
+++ unsigned mididx = p2m_mid_index(pfn);
+++ unsigned idx = p2m_index(pfn);
+++ unsigned lvl, type;
+++
+++ lvl = 4;
+++ type = TYPE_UNKNOWN;
+++ if (p2m_top[topidx] == p2m_mid_missing) {
+++ lvl = 0; type = TYPE_MISSING;
+++ } else if (p2m_top[topidx] == NULL) {
+++ lvl = 0; type = TYPE_UNKNOWN;
+++ } else if (p2m_top[topidx][mididx] == NULL) {
+++ lvl = 1; type = TYPE_UNKNOWN;
+++ } else if (p2m_top[topidx][mididx] == p2m_identity) {
+++ lvl = 1; type = TYPE_IDENTITY;
+++ } else if (p2m_top[topidx][mididx] == p2m_missing) {
+++ lvl = 1; type = TYPE_MISSING;
+++ } else if (p2m_top[topidx][mididx][idx] == 0) {
+++ lvl = 2; type = TYPE_UNKNOWN;
+++ } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
+++ lvl = 2; type = TYPE_IDENTITY;
+++ } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
+++ lvl = 2; type = TYPE_MISSING;
+++ } else if (p2m_top[topidx][mididx][idx] == pfn) {
+++ lvl = 2; type = TYPE_PFN;
+++ } else if (p2m_top[topidx][mididx][idx] != pfn) {
+++ lvl = 2; type = TYPE_PFN;
+++ }
+++ if (pfn == 0) {
+++ prev_level = lvl;
+++ prev_type = type;
+++ }
+++ if (pfn == MAX_DOMAIN_PAGES-1) {
+++ lvl = 3;
+++ type = TYPE_UNKNOWN;
+++ }
+++ if (prev_type != type) {
+++ seq_printf(m, " [0x%lx->0x%lx] %s\n",
+++ prev_pfn_type, pfn, type_name[prev_type]);
+++ prev_pfn_type = pfn;
+++ prev_type = type;
+++ }
+++ if (prev_level != lvl) {
+++ seq_printf(m, " [0x%lx->0x%lx] level %s\n",
+++ prev_pfn_level, pfn, level_name[prev_level]);
+++ prev_pfn_level = pfn;
+++ prev_level = lvl;
+++ }
+++ }
+++ return 0;
+++#undef TYPE_IDENTITY
+++#undef TYPE_MISSING
+++#undef TYPE_PFN
+++#undef TYPE_UNKNOWN
+++}
+++#endif
static __initdata struct cpu_evtchn_s init_evtchn_mask = {
.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
};
--- static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
+++ static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
static inline unsigned long *cpu_evtchn_mask(int cpu)
{
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
--- cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
+++ cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
#endif
clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
/* By default all event channels notify CPU#0. */
for_each_irq_desc(i, desc) {
--- cpumask_copy(desc->affinity, cpumask_of(0));
+++ cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
}
#endif
put_cpu();
}
---static int get_nr_hw_irqs(void)
+++static int xen_allocate_irq_dynamic(void)
{
--- int ret = 1;
+++ int first = 0;
+++ int irq;
#ifdef CONFIG_X86_IO_APIC
--- ret = get_nr_irqs_gsi();
+++ /*
+++ * For an HVM guest or domain 0 which see "real" (emulated or
+++ * actual repectively) GSIs we allocate dynamic IRQs
+++ * e.g. those corresponding to event channels or MSIs
+++ * etc. from the range above those "real" GSIs to avoid
+++ * collisions.
+++ */
+++ if (xen_initial_domain() || xen_hvm_domain())
+++ first = get_nr_irqs_gsi();
#endif
--- return ret;
---}
+++retry:
+++ irq = irq_alloc_desc_from(first, -1);
---static int find_unbound_pirq(int type)
---{
--- int rc, i;
--- struct physdev_get_free_pirq op_get_free_pirq;
--- op_get_free_pirq.type = type;
+++ if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
+++ printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
+++ first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
+++ goto retry;
+++ }
--- rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
--- if (!rc)
--- return op_get_free_pirq.pirq;
+++ if (irq < 0)
+++ panic("No available IRQ to bind to: increase nr_irqs!\n");
--- for (i = 0; i < nr_irqs; i++) {
--- if (pirq_to_irq[i] < 0)
--- return i;
--- }
--- return -1;
+++ return irq;
}
---static int find_unbound_irq(void)
+++static int xen_allocate_irq_gsi(unsigned gsi)
{
--- struct irq_data *data;
--- int irq, res;
--- int bottom = get_nr_hw_irqs();
--- int top = nr_irqs-1;
---
--- if (bottom == nr_irqs)
--- goto no_irqs;
+++ int irq;
--- /* This loop starts from the top of IRQ space and goes down.
--- * We need this b/c if we have a PCI device in a Xen PV guest
--- * we do not have an IO-APIC (though the backend might have them)
--- * mapped in. To not have a collision of physical IRQs with the Xen
--- * event channels start at the top of the IRQ space for virtual IRQs.
+++ /*
+++ * A PV guest has no concept of a GSI (since it has no ACPI
+++ * nor access to/knowledge of the physical APICs). Therefore
+++ * all IRQs are dynamically allocated from the entire IRQ
+++ * space.
*/
--- for (irq = top; irq > bottom; irq--) {
--- data = irq_get_irq_data(irq);
--- /* only 15->0 have init'd desc; handle irq > 16 */
--- if (!data)
--- break;
--- if (data->chip == &no_irq_chip)
--- break;
--- if (data->chip != &xen_dynamic_chip)
--- continue;
--- if (irq_info[irq].type == IRQT_UNBOUND)
--- return irq;
--- }
---
--- if (irq == bottom)
--- goto no_irqs;
+++ if (xen_pv_domain() && !xen_initial_domain())
+++ return xen_allocate_irq_dynamic();
--- res = irq_alloc_desc_at(irq, -1);
+++ /* Legacy IRQ descriptors are already allocated by the arch. */
+++ if (gsi < NR_IRQS_LEGACY)
+++ return gsi;
--- if (WARN_ON(res != irq))
--- return -1;
+++ irq = irq_alloc_desc_at(gsi, -1);
+++ if (irq < 0)
+++ panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
return irq;
---
---no_irqs:
--- panic("No available IRQ to bind to: increase nr_irqs!\n");
}
---static bool identity_mapped_irq(unsigned irq)
+++static void xen_free_irq(unsigned irq)
{
--- /* identity map all the hardware irqs */
--- return irq < get_nr_hw_irqs();
+++ /* Legacy IRQ descriptors are managed by the arch. */
+++ if (irq < NR_IRQS_LEGACY)
+++ return;
+++
+++ irq_free_desc(irq);
}
static void pirq_unmask_notify(int irq)
return desc && desc->action == NULL;
}
---static unsigned int startup_pirq(unsigned int irq)
+++static unsigned int __startup_pirq(unsigned int irq)
{
struct evtchn_bind_pirq bind_pirq;
struct irq_info *info = info_for_irq(irq);
return 0;
}
---static void shutdown_pirq(unsigned int irq)
+++static unsigned int startup_pirq(struct irq_data *data)
+++{
+++ return __startup_pirq(data->irq);
+++}
+++
+++static void shutdown_pirq(struct irq_data *data)
{
struct evtchn_close close;
+++ unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
int evtchn = evtchn_from_irq(irq);
info->evtchn = 0;
}
---static void enable_pirq(unsigned int irq)
+++static void enable_pirq(struct irq_data *data)
{
--- startup_pirq(irq);
+++ startup_pirq(data);
}
---static void disable_pirq(unsigned int irq)
+++static void disable_pirq(struct irq_data *data)
{
}
---static void ack_pirq(unsigned int irq)
+++static void ack_pirq(struct irq_data *data)
{
--- int evtchn = evtchn_from_irq(irq);
+++ int evtchn = evtchn_from_irq(data->irq);
--- move_native_irq(irq);
+++ move_native_irq(data->irq);
if (VALID_EVTCHN(evtchn)) {
mask_evtchn(evtchn);
}
}
---static void end_pirq(unsigned int irq)
---{
--- int evtchn = evtchn_from_irq(irq);
--- struct irq_desc *desc = irq_to_desc(irq);
---
--- if (WARN_ON(!desc))
--- return;
---
--- if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
--- (IRQ_DISABLED|IRQ_PENDING)) {
--- shutdown_pirq(irq);
--- } else if (VALID_EVTCHN(evtchn)) {
--- unmask_evtchn(evtchn);
--- pirq_unmask_notify(irq);
--- }
---}
---
static int find_irq_by_gsi(unsigned gsi)
{
int irq;
goto out; /* XXX need refcount? */
}
--- /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
--- * we are using the !xen_initial_domain() to drop in the function.*/
--- if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
--- xen_pv_domain())) {
--- irq = gsi;
--- irq_alloc_desc_at(irq, -1);
--- } else
--- irq = find_unbound_irq();
+++ irq = xen_allocate_irq_gsi(gsi);
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
handle_level_irq, name);
* this in the priv domain. */
if (xen_initial_domain() &&
HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
--- irq_free_desc(irq);
+++ xen_free_irq(irq);
irq = -ENOSPC;
goto out;
}
}
#ifdef CONFIG_PCI_MSI
---#include <linux/msi.h>
---#include "../pci/msi.h"
---
---void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
+++int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
{
--- spin_lock(&irq_mapping_update_lock);
---
--- if (alloc & XEN_ALLOC_IRQ) {
--- *irq = find_unbound_irq();
--- if (*irq == -1)
--- goto out;
--- }
---
--- if (alloc & XEN_ALLOC_PIRQ) {
--- *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
--- if (*pirq == -1)
--- goto out;
--- }
+++ int rc;
+++ struct physdev_get_free_pirq op_get_free_pirq;
--- set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
--- handle_level_irq, name);
+++ op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
+++ rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
--- irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
--- pirq_to_irq[*pirq] = *irq;
+++ WARN_ONCE(rc == -ENOSYS,
+++ "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
---out:
--- spin_unlock(&irq_mapping_update_lock);
+++ return rc ? -1 : op_get_free_pirq.pirq;
}
---int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+++int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+++ int pirq, int vector, const char *name)
{
--- int irq = -1;
--- struct physdev_map_pirq map_irq;
--- int rc;
--- int pos;
--- u32 table_offset, bir;
---
--- memset(&map_irq, 0, sizeof(map_irq));
--- map_irq.domid = DOMID_SELF;
--- map_irq.type = MAP_PIRQ_TYPE_MSI;
--- map_irq.index = -1;
--- map_irq.pirq = -1;
--- map_irq.bus = dev->bus->number;
--- map_irq.devfn = dev->devfn;
---
--- if (type == PCI_CAP_ID_MSIX) {
--- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
---
--- pci_read_config_dword(dev, msix_table_offset_reg(pos),
--- &table_offset);
--- bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
---
--- map_irq.table_base = pci_resource_start(dev, bir);
--- map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
--- }
+++ int irq, ret;
spin_lock(&irq_mapping_update_lock);
--- irq = find_unbound_irq();
---
+++ irq = xen_allocate_irq_dynamic();
if (irq == -1)
goto out;
--- rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
--- if (rc) {
--- printk(KERN_WARNING "xen map irq failed %d\n", rc);
---
--- irq_free_desc(irq);
---
--- irq = -1;
--- goto out;
--- }
--- irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
---
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
--- handle_level_irq,
--- (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
+++ handle_level_irq, name);
+++ irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
+++ pirq_to_irq[pirq] = irq;
+++ ret = set_irq_msi(irq, msidesc);
+++ if (ret < 0)
+++ goto error_irq;
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
+++error_irq:
+++ spin_unlock(&irq_mapping_update_lock);
+++ xen_free_irq(irq);
+++ return -1;
}
#endif
printk(KERN_WARNING "unmap irq failed %d\n", rc);
goto out;
}
--- pirq_to_irq[info->u.pirq.pirq] = -1;
}
+++ pirq_to_irq[info->u.pirq.pirq] = -1;
+++
irq_info[irq] = mk_unbound_info();
--- irq_free_desc(irq);
+++ xen_free_irq(irq);
out:
spin_unlock(&irq_mapping_update_lock);
irq = evtchn_to_irq[evtchn];
if (irq == -1) {
--- irq = find_unbound_irq();
+++ irq = xen_allocate_irq_dynamic();
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_fasteoi_irq, "event");
irq = per_cpu(ipi_to_irq, cpu)[ipi];
if (irq == -1) {
--- irq = find_unbound_irq();
+++ irq = xen_allocate_irq_dynamic();
if (irq < 0)
goto out;
irq = per_cpu(virq_to_irq, cpu)[virq];
if (irq == -1) {
--- irq = find_unbound_irq();
+++ irq = xen_allocate_irq_dynamic();
set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
handle_percpu_irq, "virq");
if (irq_info[irq].type != IRQT_UNBOUND) {
irq_info[irq] = mk_unbound_info();
--- irq_free_desc(irq);
+++ xen_free_irq(irq);
}
spin_unlock(&irq_mapping_update_lock);
if (irq < 0)
return irq;
--- irqflags |= IRQF_NO_SUSPEND;
+++ irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
return 0;
}
---static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
+++static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
+++ bool force)
{
unsigned tcpu = cpumask_first(dest);
--- return rebind_irq_to_cpu(irq, tcpu);
+++ return rebind_irq_to_cpu(data->irq, tcpu);
}
int resend_irq_on_evtchn(unsigned int irq)
return 1;
}
---static void enable_dynirq(unsigned int irq)
+++static void enable_dynirq(struct irq_data *data)
{
--- int evtchn = evtchn_from_irq(irq);
+++ int evtchn = evtchn_from_irq(data->irq);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
---static void disable_dynirq(unsigned int irq)
+++static void disable_dynirq(struct irq_data *data)
{
--- int evtchn = evtchn_from_irq(irq);
+++ int evtchn = evtchn_from_irq(data->irq);
if (VALID_EVTCHN(evtchn))
mask_evtchn(evtchn);
}
---static void ack_dynirq(unsigned int irq)
+++static void ack_dynirq(struct irq_data *data)
{
--- int evtchn = evtchn_from_irq(irq);
+++ int evtchn = evtchn_from_irq(data->irq);
--- move_masked_irq(irq);
+++ move_masked_irq(data->irq);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
---static int retrigger_dynirq(unsigned int irq)
+++static int retrigger_dynirq(struct irq_data *data)
{
--- int evtchn = evtchn_from_irq(irq);
+++ int evtchn = evtchn_from_irq(data->irq);
struct shared_info *sh = HYPERVISOR_shared_info;
int ret = 0;
printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
--- startup_pirq(irq);
+++ __startup_pirq(irq);
}
}
void xen_irq_resume(void)
{
unsigned int cpu, irq, evtchn;
--- struct irq_desc *desc;
init_evtchn_cpu_bindings();
restore_cpu_ipis(cpu);
}
--- /*
--- * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
--- * are not handled by the IRQ core.
--- */
--- for_each_irq_desc(irq, desc) {
--- if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
--- continue;
--- if (desc->status & IRQ_DISABLED)
--- continue;
---
--- evtchn = evtchn_from_irq(irq);
--- if (evtchn == -1)
--- continue;
---
--- unmask_evtchn(evtchn);
--- }
---
restore_cpu_pirqs();
}
static struct irq_chip xen_dynamic_chip __read_mostly = {
--- .name = "xen-dyn",
+++ .name = "xen-dyn",
--- .disable = disable_dynirq,
--- .mask = disable_dynirq,
--- .unmask = enable_dynirq,
+++ .irq_disable = disable_dynirq,
+++ .irq_mask = disable_dynirq,
+++ .irq_unmask = enable_dynirq,
--- .eoi = ack_dynirq,
--- .set_affinity = set_affinity_irq,
--- .retrigger = retrigger_dynirq,
+++ .irq_eoi = ack_dynirq,
+++ .irq_set_affinity = set_affinity_irq,
+++ .irq_retrigger = retrigger_dynirq,
};
static struct irq_chip xen_pirq_chip __read_mostly = {
--- .name = "xen-pirq",
+++ .name = "xen-pirq",
--- .startup = startup_pirq,
--- .shutdown = shutdown_pirq,
+++ .irq_startup = startup_pirq,
+++ .irq_shutdown = shutdown_pirq,
--- .enable = enable_pirq,
--- .unmask = enable_pirq,
+++ .irq_enable = enable_pirq,
+++ .irq_unmask = enable_pirq,
--- .disable = disable_pirq,
--- .mask = disable_pirq,
+++ .irq_disable = disable_pirq,
+++ .irq_mask = disable_pirq,
--- .ack = ack_pirq,
--- .end = end_pirq,
+++ .irq_ack = ack_pirq,
--- .set_affinity = set_affinity_irq,
+++ .irq_set_affinity = set_affinity_irq,
--- .retrigger = retrigger_dynirq,
+++ .irq_retrigger = retrigger_dynirq,
};
static struct irq_chip xen_percpu_chip __read_mostly = {
--- .name = "xen-percpu",
+++ .name = "xen-percpu",
--- .disable = disable_dynirq,
--- .mask = disable_dynirq,
--- .unmask = enable_dynirq,
+++ .irq_disable = disable_dynirq,
+++ .irq_mask = disable_dynirq,
+++ .irq_unmask = enable_dynirq,
--- .ack = ack_dynirq,
+++ .irq_ack = ack_dynirq,
};
int xen_set_callback_via(uint64_t via)