bool
default y
- config ARCH_HAS_DMA_SET_COHERENT_MASK
- bool
-
config PPC
bool
default y
#
# Please keep this list sorted alphabetically.
#
+ select ARCH_32BIT_OFF_T if PPC32
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
- select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_KCOV
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_PTE_SPECIAL
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_NVRAM_OPS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JIT if !PPC64
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_KERNEL_GZIP
- select HAVE_KERNEL_XZ if PPC_BOOK3S
+ select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if PPC64 && CPU_LITTLE_ENDIAN
+ select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_IRQ_TIME_ACCOUNTING
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
select VIRT_TO_BUS if !PPC64
#
# Please keep this list sorted alphabetically.
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
- config GENERIC_CSUM
- def_bool n
-
config EARLY_PRINTK
bool
default y
depends on COMPAT && SYSVIPC
default y
-# All PPC32s use generic nvram driver through ppc_md
-config GENERIC_NVRAM
- bool
- default y if PPC32
-
config SCHED_OMIT_FRAME_POINTER
bool
default y
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
- config ARCH_HAS_WALK_MEMORY
- def_bool y
-
config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
config PPC_64K_PAGES
bool "64k page size"
- depends on !PPC_FSL_BOOK3E && (44x || PPC_BOOK3S_64 || PPC_BOOK3E_64)
+ depends on 44x || PPC_BOOK3S_64
select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES
endchoice
+ config PPC_PAGE_SHIFT
+ int
+ default 18 if PPC_256K_PAGES
+ default 16 if PPC_64K_PAGES
+ default 14 if PPC_16K_PAGES
+ default 12
+
config THREAD_SHIFT
int "Thread shift" if EXPERT
range 13 15
Used to define the stack size. The default is almost always what you
want. Only change this if you know what you are doing.
+ config ETEXT_SHIFT_BOOL
+ bool "Set custom etext alignment" if STRICT_KERNEL_RWX && \
+ (PPC_BOOK3S_32 || PPC_8xx)
+ depends on ADVANCED_OPTIONS
+ help
+ This option allows you to set the kernel end of text alignment. When
+ RAM is mapped by blocks, the alignment needs to fit the size and
+ number of possible blocks. The default should be OK for most configs.
+
+ Say N here unless you know what you are doing.
+
+ config ETEXT_SHIFT
+ int "_etext shift" if ETEXT_SHIFT_BOOL
+ range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
+ default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ default 19 if STRICT_KERNEL_RWX && PPC_8xx
+ default PPC_PAGE_SHIFT
+ help
+ On Book3S 32 (603+), IBATs are used to map kernel text.
+ Smaller is the alignment, greater is the number of necessary IBATs.
+
+ On 8xx, large pages (512kb or 8M) are used to map kernel linear
+ memory. Aligning to 8M reduces TLB misses as only 8M pages are used
+ in that case.
+
+ config DATA_SHIFT_BOOL
+ bool "Set custom data alignment" if STRICT_KERNEL_RWX && \
+ (PPC_BOOK3S_32 || PPC_8xx)
+ depends on ADVANCED_OPTIONS
+ help
+ This option allows you to set the kernel data alignment. When
+ RAM is mapped by blocks, the alignment needs to fit the size and
+ number of possible blocks. The default should be OK for most configs.
+
+ Say N here unless you know what you are doing.
+
+ config DATA_SHIFT
+ int "Data shift" if DATA_SHIFT_BOOL
+ default 24 if STRICT_KERNEL_RWX && PPC64
+ range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
+ default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ default 23 if STRICT_KERNEL_RWX && PPC_8xx
+ default PPC_PAGE_SHIFT
+ help
+ On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
+ Smaller is the alignment, greater is the number of necessary DBATs.
+
+ On 8xx, large pages (512kb or 8M) are used to map kernel linear
+ memory. Aligning to 8M reduces TLB misses as only 8M pages are used
+ in that case.
+
config FORCE_MAX_ZONEORDER
int "Maximum zone order"
range 8 9 if PPC64 && PPC_64K_PAGES
config FSL_PCI
bool
+ select ARCH_HAS_DMA_SET_MASK
select PPC_INDIRECT_PCI
select PCI_QUIRKS
return hash__set_pte_at(mm, addr, ptep, pte, percpu);
}
- #define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
+ #define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t prot)
return false;
}
- static inline void pmd_set(pmd_t *pmdp, unsigned long val)
- {
- *pmdp = __pmd(val);
- }
-
static inline void pmd_clear(pmd_t *pmdp)
{
*pmdp = __pmd(0);
return hash__pmd_bad(pmd);
}
- static inline void pud_set(pud_t *pudp, unsigned long val)
- {
- *pudp = __pud(val);
- }
-
static inline void pud_clear(pud_t *pudp)
{
*pudp = __pud(0);
}
#define pgd_write(pgd) pte_write(pgd_pte(pgd))
- static inline void pgd_set(pgd_t *pgdp, unsigned long val)
- {
- *pgdp = __pgd(val);
- }
static inline void pgd_clear(pgd_t *pgdp)
{
BUILD_BUG();
return 0;
}
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+ pte_t *, pte_t, pte_t);
+
+/*
+ * Returns true for a R -> RW upgrade of pte
+ */
+static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
+{
+ if (!(old_val & _PAGE_READ))
+ return false;
+
+ if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
+ return true;
+
+ return false;
+}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
#include <linux/pci.h>
#include <linux/list.h>
#include <linux/ioport.h>
+#include <linux/numa.h>
struct device_node;
struct pci_controller_ops {
void (*dma_dev_setup)(struct pci_dev *pdev);
void (*dma_bus_setup)(struct pci_bus *bus);
+ bool (*iommu_bypass_supported)(struct pci_dev *pdev,
+ u64 mask);
int (*probe_mode)(struct pci_bus *bus);
void (*teardown_msi_irqs)(struct pci_dev *pdev);
#endif
- int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask);
- u64 (*dma_get_required_mask)(struct pci_dev *pdev);
-
void (*shutdown)(struct pci_controller *hose);
};
#ifdef CONFIG_NUMA
#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = (NODE))
#else
-#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = -1)
+#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = NUMA_NO_NODE)
#endif
#endif /* CONFIG_PPC64 */
extern struct pci_controller *pci_find_hose_for_OF_device(
struct device_node* node);
+ extern struct pci_controller *pci_find_controller_for_domain(int domain_nr);
+
/* Fill up host controller resources from the OF node */
extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct device_node *dev, int primary);
#define PPC_INST_ADDI 0x38000000
#define PPC_INST_ADDIS 0x3c000000
#define PPC_INST_ADD 0x7c000214
+ #define PPC_INST_ADDC 0x7c000014
#define PPC_INST_SUB 0x7c000050
#define PPC_INST_BLR 0x4e800020
#define PPC_INST_BLRL 0x4e800021
#define PPC_INST_MULLW 0x7c0001d6
#define PPC_INST_MULHWU 0x7c000016
#define PPC_INST_MULLI 0x1c000000
+ #define PPC_INST_MADDHD 0x10000030
+ #define PPC_INST_MADDHDU 0x10000031
+ #define PPC_INST_MADDLD 0x10000033
#define PPC_INST_DIVWU 0x7c000396
#define PPC_INST_DIVD 0x7c0003d2
#define PPC_INST_RLWINM 0x54000000
+#define PPC_INST_RLWINM_DOT 0x54000001
#define PPC_INST_RLWIMI 0x50000000
#define PPC_INST_RLDICL 0x78000000
#define PPC_INST_RLDICR 0x78000004
/* macros to insert fields into opcodes */
#define ___PPC_RA(a) (((a) & 0x1f) << 16)
#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+ #define ___PPC_RC(c) (((c) & 0x1f) << 6)
#define ___PPC_RS(s) (((s) & 0x1f) << 21)
#define ___PPC_RT(t) ___PPC_RS(t)
#define ___PPC_R(r) (((r) & 0x1) << 16)
#define __PPC_WS(w) (((w) & 0x1f) << 11)
#define __PPC_SH(s) __PPC_WS(s)
#define __PPC_SH64(s) (__PPC_SH(s) | (((s) & 0x20) >> 4))
- #define __PPC_MB(s) (((s) & 0x1f) << 6)
+ #define __PPC_MB(s) ___PPC_RC(s)
#define __PPC_ME(s) (((s) & 0x1f) << 1)
#define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20))
#define __PPC_ME64(s) __PPC_MB64(s)
#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \
___PPC_RT(t) | ___PPC_RA(a) | \
___PPC_RB(b))
+ #define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHD | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b) | ___PPC_RC(c))
+ #define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHDU | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b) | ___PPC_RC(c))
+ #define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDLD | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b) | ___PPC_RC(c))
#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
___PPC_RB(b))
#define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC)
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/vgaarb.h>
+#include <linux/numa.h>
#include <asm/processor.h>
#include <asm/io.h>
EXPORT_SYMBOL(isa_mem_base);
- static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops;
+ static const struct dma_map_ops *pci_dma_ops;
void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
- const struct dma_map_ops *get_pci_dma_ops(void)
- {
- return pci_dma_ops;
- }
- EXPORT_SYMBOL(get_pci_dma_ops);
-
/*
* This function should run under locking protection, specifically
* hose_spinlock.
int nid = of_node_to_nid(dev);
if (nid < 0 || !node_online(nid))
- nid = -1;
+ nid = NUMA_NO_NODE;
PHB_SET_NODE(phb, nid);
}
return NULL;
}
+ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
+ {
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->global_number == domain_nr)
+ return hose;
+
+ return NULL;
+ }
+
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
/* Hook up default DMA ops */
set_dma_ops(&dev->dev, pci_dma_ops);
- set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
+ dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET;
/* Additional platform DMA/iommu setup */
phb = pci_bus_to_host(dev->bus);
#include <linux/console.h>
#include <linux/memblock.h>
#include <linux/export.h>
+#include <linux/nvram.h>
#include <asm/io.h>
#include <asm/prom.h>
}
__setup("l3cr=", ppc_setup_l3cr);
-#ifdef CONFIG_GENERIC_NVRAM
-
-/* Generic nvram hooks used by drivers/char/gen_nvram.c */
-unsigned char nvram_read_byte(int addr)
-{
- if (ppc_md.nvram_read_val)
- return ppc_md.nvram_read_val(addr);
- return 0xff;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
-void nvram_write_byte(unsigned char val, int addr)
-{
- if (ppc_md.nvram_write_val)
- ppc_md.nvram_write_val(addr, val);
-}
-EXPORT_SYMBOL(nvram_write_byte);
-
-ssize_t nvram_get_size(void)
-{
- if (ppc_md.nvram_size)
- return ppc_md.nvram_size();
- return -1;
-}
-EXPORT_SYMBOL(nvram_get_size);
-
-void nvram_sync(void)
-{
- if (ppc_md.nvram_sync)
- ppc_md.nvram_sync();
-}
-EXPORT_SYMBOL(nvram_sync);
-
-#endif /* CONFIG_NVRAM */
-
static int __init ppc_init(void)
{
/* clear the progress line */
}
arch_initcall(ppc_init);
+ static void *__init alloc_stack(void)
+ {
+ void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+
+ if (!ptr)
+ panic("cannot allocate %d bytes for stack at %pS\n",
+ THREAD_SIZE, (void *)_RET_IP_);
+
+ return ptr;
+ }
+
void __init irqstack_early_init(void)
{
unsigned int i;
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ softirq_ctx[i] = alloc_stack();
+ hardirq_ctx[i] = alloc_stack();
}
}
hw_cpu = 0;
#endif
- critirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ critirq_ctx[hw_cpu] = alloc_stack();
#ifdef CONFIG_BOOKE
- dbgirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
- mcheckirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ dbgirq_ctx[hw_cpu] = alloc_stack();
+ mcheckirq_ctx[hw_cpu] = alloc_stack();
#endif
}
}
real_pte_t rpte;
unsigned long vpn;
unsigned long old_pte, new_pte;
- unsigned long rflags, pa, sz;
+ unsigned long rflags, pa;
long slot, offset;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
offset = PTRS_PER_PMD;
rpte = __real_pte(__pte(old_pte), ptep, offset);
- sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
/* No CPU has hugepages but lacks no execute, so we
* don't need to worry about that case */
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long pte_val;
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep,
+ _PAGE_PRESENT, _PAGE_INVALID, 1);
+
+ return __pte(pte_val);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+
+ if (radix_enabled())
+ return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
+ old_pte, pte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/hugetlb.h>
+ #include <linux/security.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (high_limit - len >= addr &&
+ if (high_limit - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
*/
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
- info.low_limit = PAGE_SIZE;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
}
+
+void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * To avoid NMMU hang while relaxing access we need to flush the tlb before
+ * we set the new value.
+ */
+ if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ (atomic_read(&mm->context.copros) > 0))
+ radix__flush_hugetlb_page(vma, addr);
+
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
/* cpumask_of_node() will now work */
- dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
+ dbg("Node to cpumask map for %u nodes\n", nr_node_ids);
}
static int __init fake_numa_create_new_node(unsigned long end_pfn,
*/
static int associativity_to_nid(const __be32 *associativity)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
if (min_common_depth == -1)
goto out;
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= MAX_NUMNODES)
- nid = -1;
+ nid = NUMA_NO_NODE;
if (nid > 0 &&
of_read_number(associativity, 1) >= distance_ref_points_depth) {
*/
static int of_node_to_nid_single(struct device_node *device)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
const __be32 *tmp;
tmp = of_get_associativity(device);
/* Walk the device tree upwards, looking for an associativity id */
int of_node_to_nid(struct device_node *device)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
of_node_get(device);
while (device) {
*/
static int numa_setup_cpu(unsigned long lcpu)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
struct device_node *cpu;
/*
{
struct drmem_lmb *lmb;
unsigned long lmb_size;
- int nid = -1;
+ int nid = NUMA_NO_NODE;
lmb_size = drmem_lmb_size();
static int hot_add_node_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory;
- int nid = -1;
+ int nid = NUMA_NO_NODE;
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
#ifdef CONFIG_SMP
- static void stage_topology_update(int core_id)
- {
- cpumask_or(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
- reset_topology_timer();
- }
-
static int dt_update_callback(struct notifier_block *nb,
unsigned long action, void *data)
{
!of_prop_cmp(update->prop->name, "ibm,associativity")) {
u32 core_id;
of_property_read_u32(update->dn, "reg", &core_id);
- stage_topology_update(core_id);
+ rc = dlpar_cpu_readd(core_id);
rc = NOTIFY_OK;
}
break;
pnv_pci_ioda2_setup_dma_pe(phb, pe);
#ifdef CONFIG_IOMMU_API
+ iommu_register_group(&pe->table_group,
+ pe->phb->hose->global_number, pe->pe_number);
pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
#endif
}
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+ pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
/*
* Note: iommu_add_device() will fail here as
*/
}
- static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
- {
- unsigned short vendor = 0;
- struct pci_dev *pdev;
-
- if (pe->device_count == 1)
- return true;
-
- /* pe->pdev should be set if it's a single device, pe->pbus if not */
- if (!pe->pbus)
- return true;
-
- list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
- if (!vendor) {
- vendor = pdev->vendor;
- continue;
- }
-
- if (pdev->vendor != vendor)
- return false;
- }
-
- return true;
- }
-
/*
* Reconfigure TVE#0 to be usable as 64-bit DMA space.
*
return -EIO;
}
- static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
+ u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
- uint64_t top;
- bool bypass = false;
- s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
- top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
- bypass = (dma_mask >= top);
+ u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+ if (dma_mask >= top)
+ return true;
}
- if (bypass) {
- dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
- } else {
- /*
- * If the device can't set the TCE bypass bit but still wants
- * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
- * bypass the 32-bit region and be usable for 64-bit DMAs.
- * The device needs to be able to address all of this space.
- */
- if (dma_mask >> 32 &&
- dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
- pnv_pci_ioda_pe_single_vendor(pe) &&
- phb->model == PNV_PHB_MODEL_PHB3) {
- /* Configure the bypass mode */
- rc = pnv_pci_ioda_dma_64bit_bypass(pe);
- if (rc)
- return rc;
- /* 4GB offset bypasses 32-bit space */
- set_dma_offset(&pdev->dev, (1ULL << 32));
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
- } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) {
- /*
- * Fail the request if a DMA mask between 32 and 64 bits
- * was requested but couldn't be fulfilled. Ideally we
- * would do this for 64-bits but historically we have
- * always fallen back to 32-bits.
- */
- return -ENOMEM;
- } else {
- dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
- set_dma_ops(&pdev->dev, &dma_iommu_ops);
- }
+ /*
+ * If the device can't set the TCE bypass bit but still wants
+ * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+ * bypass the 32-bit region and be usable for 64-bit DMAs.
+ * The device needs to be able to address all of this space.
+ */
+ if (dma_mask >> 32 &&
+ dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+ /* pe->pdev should be set if it's a single device, pe->pbus if not */
+ (pe->device_count == 1 || !pe->pbus) &&
+ phb->model == PNV_PHB_MODEL_PHB3) {
+ /* Configure the bypass mode */
+ s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+ if (rc)
+ return rc;
+ /* 4GB offset bypasses 32-bit space */
+ pdev->dev.archdata.dma_offset = (1ULL << 32);
+ return true;
}
- *pdev->dev.dma_mask = dma_mask;
- /* Update peer npu devices */
- pnv_npu_try_dma_set_bypass(pdev, bypass);
-
- return 0;
- }
-
- static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
- {
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(pdev);
- struct pnv_ioda_pe *pe;
- u64 end, mask;
-
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return 0;
-
- pe = &phb->ioda.pe_array[pdn->pe_number];
- if (!pe->tce_bypass_enabled)
- return __dma_get_required_mask(&pdev->dev);
-
-
- end = pe->tce_bypass_base + memblock_end_of_DRAM();
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
+ return false;
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
- set_dma_offset(&dev->dev, pe->tce_bypass_base);
+ dev->dev.archdata.dma_offset = pe->tce_bypass_base;
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_bus_dma(pe, dev->subordinate);
int num, __u32 page_shift, __u64 window_size, __u32 levels,
struct iommu_table **ptbl)
{
- return pnv_pci_ioda2_create_table(table_group,
+ long ret = pnv_pci_ioda2_create_table(table_group,
num, page_shift, window_size, levels, true, ptbl);
+
+ if (!ret)
+ (*ptbl)->it_allocated_size = pnv_pci_ioda2_get_table_size(
+ page_shift, window_size, levels);
+ return ret;
}
static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.dma_bus_setup = pnv_pci_dma_bus_setup,
+ .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
.window_alignment = pnv_pci_window_alignment,
.setup_bridge = pnv_pci_setup_bridge,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_pci_ioda_dma_set_mask,
- .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
.shutdown = pnv_pci_ioda_shutdown,
};
- static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
- {
- dev_err_once(&npdev->dev,
- "%s operation unsupported for NVLink devices\n",
- __func__);
- return -EPERM;
- }
-
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.setup_msi_irqs = pnv_setup_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_npu_dma_set_mask,
.shutdown = pnv_pci_ioda_shutdown,
.disable_device = pnv_npu_disable_device,
};
* shutdown PCI devices correctly. We already got IODA table
* cleaned out. So we have to issue PHB reset to stop all PCI
* transactions from previous kernel. The ppc_pci_reset_phbs
- * kernel parameter will force this reset too.
+ * kernel parameter will force this reset too. Additionally,
+ * if the IODA reset above failed then use a bigger hammer.
+ * This can happen if we get a PHB fatal error in very early
+ * boot.
*/
- if (is_kdump_kernel() || pci_reset_phbs) {
+ if (is_kdump_kernel() || pci_reset_phbs || rc) {
pr_info(" Issue PHB reset ...\n");
pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
memblock_free_late(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
+ io_tlb_start = 0;
+ io_tlb_end = 0;
io_tlb_nslabs = 0;
max_segment = 0;
}
return true;
}
-
- /*
- * Return whether the given device DMA address mask can be supported
- * properly. For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
- int
- swiotlb_dma_supported(struct device *hwdev, u64 mask)
- {
- return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
- }