select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
+ select ARCH_STACKWALK
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP
+ select GENERIC_IRQ_IPI
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_VDSO_TIME_NS
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
+ select HAVE_MOVE_PMD
select HAVE_PCI
select HAVE_ACPI_APEI if (ACPI && EFI)
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select PCI_SYSCALL if PCI
select POWER_RESET
select POWER_SUPPLY
+ select SET_FS
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
default 14 if ARM64_16K_PAGES
default 12
- config ARM64_CONT_SHIFT
+ config ARM64_CONT_PTE_SHIFT
int
default 5 if ARM64_64K_PAGES
default 7 if ARM64_16K_PAGES
default 4
+ config ARM64_CONT_PMD_SHIFT
+ int
+ default 5 if ARM64_64K_PAGES
+ default 5 if ARM64_16K_PAGES
+ default 4
+
config ARCH_MMAP_RND_BITS_MIN
default 14 if ARM64_64K_PAGES
default 16 if ARM64_16K_PAGES
config KASAN_SHADOW_OFFSET
hex
depends on KASAN
- ------- default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
- ------- default 0xdfffd00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
- ------- default 0xdffffe8000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
- ------- default 0xdfffffd000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
- ------- default 0xdffffffa00000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
- ------- default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
- ------- default 0xefffc80000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
- ------- default 0xeffffe4000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
- ------- default 0xefffffc800000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
- ------- default 0xeffffff900000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
+ +++++++ default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
+ +++++++ default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
+ +++++++ default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
+ +++++++ default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
+ +++++++ default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
+ +++++++ default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
+ +++++++ default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
+ +++++++ default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
+ +++++++ default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
+ +++++++ default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
default 0xffffffffffffffff
source "arch/arm64/Kconfig.platforms"
If unsure, say Y.
+ config ARM64_ERRATUM_1508412
+ bool "Cortex-A77: 1508412: workaround deadlock on sequence of NC/Device load and store exclusive or PAR read"
+ default y
+ help
+ This option adds a workaround for Arm Cortex-A77 erratum 1508412.
+
+ Affected Cortex-A77 cores (r0p0, r1p0) could deadlock on a sequence
+ of a store-exclusive or read of PAR_EL1 and a load with device or
+ non-cacheable memory attributes. The workaround depends on a firmware
+ counterpart.
+
+ KVM guests must also have the workaround implemented or they can
+ deadlock the system.
+
+ Work around the issue by inserting DMB SY barriers around PAR_EL1
+ register reads and warning KVM users. The DMB barrier is sufficient
+ to prevent a speculative PAR_EL1 read.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)"
range 1 10
- default "2"
+ default "4"
depends on NEED_MULTIPLE_NODES
help
Specify the maximum number of NUMA Nodes available on the target
config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
- config SECCOMP
- bool "Enable seccomp to safely compute untrusted bytecode"
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
config PARAVIRT
bool "Enable paravirtualization code"
help
The feature is detected at runtime, and will remain as a 'nop'
instruction if the cpu does not implement the feature.
++ ++++++config AS_HAS_LDAPR
++ ++++++ def_bool $(as-instr,.arch_extension rcpc)
++ ++++++
config ARM64_LSE_ATOMICS
bool
default ARM64_USE_LSE_ATOMICS
depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
depends on !CC_IS_GCC || GCC_VERSION >= 100100
- # https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55
- depends on !CC_IS_CLANG || CLANG_VERSION >= 100001
depends on !(CC_IS_CLANG && GCOV_KERNEL)
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
help
provides a high bandwidth, cryptographically secure
hardware random number generator.
+ config ARM64_AS_HAS_MTE
+ # Initial support for MTE went in binutils 2.32.0, checked with
+ # ".arch armv8.5-a+memtag" below. However, this was incomplete
+ # as a late addition to the final architecture spec (LDGM/STGM)
+ # is only supported in the newer 2.32.x and 2.33 binutils
+ # versions, hence the extra "stgm" instruction check below.
+ def_bool $(as-instr,.arch armv8.5-a+memtag\nstgm xzr$(comma)[x0])
+
+ config ARM64_MTE
+ bool "Memory Tagging Extension support"
+ default y
+ depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI
+ select ARCH_USES_HIGH_VMA_FLAGS
+ help
+ Memory Tagging (part of the ARMv8.5 Extensions) provides
+ architectural support for run-time, always-on detection of
+ various classes of memory error to aid with software debugging
+ to eliminate vulnerabilities arising from memory-unsafe
+ languages.
+
+ This option enables the support for the Memory Tagging
+ Extension at EL0 (i.e. for userspace).
+
+ Selecting this option allows the feature to be detected at
+ runtime. Any secondary CPU not implementing this feature will
+ not be allowed a late bring-up.
+
+ Userspace binaries that want to use this feature must
+ explicitly opt in. The mechanism for the userspace is
+ described in:
+
+ Documentation/arm64/memory-tagging-extension.rst.
+
endmenu
config ARM64_SVE
entering them here. As a minimum, you should specify the the
root device (e.g. root=/dev/nfs).
++++++++ choice
++++++++ prompt "Kernel command line type" if CMDLINE != ""
++++++++ default CMDLINE_FROM_BOOTLOADER
++++++++ help
++++++++ Choose how the kernel will handle the provided default kernel
++++++++ command line string.
++++++++
++++++++ config CMDLINE_FROM_BOOTLOADER
++++++++ bool "Use bootloader kernel arguments if available"
++++++++ help
++++++++ Uses the command-line options passed by the boot loader. If
++++++++ the boot loader doesn't provide any, the default kernel command
++++++++ string provided in CMDLINE will be used.
++++++++
++++++++ config CMDLINE_EXTEND
++++++++ bool "Extend bootloader kernel arguments"
++++++++ help
++++++++ The command-line arguments provided by the boot loader will be
++++++++ appended to the default kernel command string.
++++++++
config CMDLINE_FORCE
bool "Always use the default kernel command string"
-------- depends on CMDLINE != ""
help
Always use the default kernel command string, even if the boot
loader passes other arguments to the kernel.
This is useful if you cannot or don't want to change the
command-line options your boot loader passes to the kernel.
++++++++ endchoice
++++++++
config EFI_STUB
bool
def_bool y
depends on HUGETLB_PAGE && MIGRATION
+ config ARCH_ENABLE_THP_MIGRATION
+ def_bool y
+ depends on TRANSPARENT_HUGEPAGE
+
menu "Power management options"
source "kernel/power/Kconfig"
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
+ #include <asm/mte.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
#include <asm/traps.h>
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0),
return 0;
return regp->sys_val;
}
+ EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
#define read_sysreg_case(r) \
case r: return read_sysreg_s(r)
write_sysreg(tcr, tcr_el1);
isb();
+ local_flush_tlb_all();
}
static bool cpu_has_broken_dbm(void)
return cpumask_test_cpu(cpu, &amu_cpus);
}
---- ----/* Initialize the use of AMU counters for frequency invariance */
---- ----extern void init_cpu_freq_invariance_counters(void);
++++ ++++int get_cpu_with_amu_feat(void)
++++ ++++{
++++ ++++ return cpumask_any(&amu_cpus);
++++ ++++}
static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
{
pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
smp_processor_id());
cpumask_set_cpu(smp_processor_id(), &amu_cpus);
---- ---- init_cpu_freq_invariance_counters();
++++ ++++ update_freq_counters_refs();
}
}
return true;
}
++++ ++++#else
++++ ++++int get_cpu_with_amu_feat(void)
++++ ++++{
++++ ++++ return nr_cpu_ids;
++++ ++++}
#endif
#ifdef CONFIG_ARM64_VHE
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_PTR_AUTH
- static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
- int __unused)
+ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope)
{
- return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
- __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF);
+ int boot_val, sec_val;
+
+ /* We don't expect to be called with SCOPE_SYSTEM */
+ WARN_ON(scope == SCOPE_SYSTEM);
+ /*
+ * The ptr-auth feature levels are not intercompatible with lower
+ * levels. Hence we must match ptr-auth feature level of the secondary
+ * CPUs with that of the boot CPU. The level of boot cpu is fetched
+ * from the sanitised register whereas direct register read is done for
+ * the secondary CPUs.
+ * The sanitised feature state is guaranteed to match that of the
+ * boot CPU as a mismatched secondary CPU is parked before it gets
+ * a chance to update the state, with the capability.
+ */
+ boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg),
+ entry->field_pos, entry->sign);
+ if (scope & SCOPE_BOOT_CPU)
+ return boot_val >= entry->min_field_value;
+ /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */
+ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
+ entry->field_pos, entry->sign);
+ return sec_val == boot_val;
+ }
+
+ static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
+ int scope)
+ {
+ return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) ||
+ has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
}
static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
}
#endif /* CONFIG_ARM64_BTI */
+ #ifdef CONFIG_ARM64_MTE
+ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+ {
+ static bool cleared_zero_page = false;
+
+ /*
+ * Clear the tags in the zero page. This needs to be done via the
+ * linear map which has the Tagged attribute.
+ */
+ if (!cleared_zero_page) {
+ cleared_zero_page = true;
+ mte_clear_page_tags(lm_alias(empty_zero_page));
+ }
+ }
+ #endif /* CONFIG_ARM64_MTE */
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_APA_SHIFT,
.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
- .matches = has_cpuid_feature,
+ .matches = has_address_auth_cpucap,
},
{
.desc = "Address authentication (IMP DEF algorithm)",
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64ISAR1_API_SHIFT,
.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
- .matches = has_cpuid_feature,
+ .matches = has_address_auth_cpucap,
},
{
.capability = ARM64_HAS_ADDRESS_AUTH,
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
- .matches = has_address_auth,
+ .matches = has_address_auth_metacap,
},
{
.desc = "Generic authentication (architected algorithm)",
.sign = FTR_UNSIGNED,
},
#endif
+ #ifdef CONFIG_ARM64_MTE
+ {
+ .desc = "Memory Tagging Extension",
+ .capability = ARM64_MTE,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64PFR1_EL1,
+ .field_pos = ID_AA64PFR1_MTE_SHIFT,
+ .min_field_value = ID_AA64PFR1_MTE,
+ .sign = FTR_UNSIGNED,
+ .cpu_enable = cpu_enable_mte,
+ },
+ #endif /* CONFIG_ARM64_MTE */
++ ++++++ {
++ ++++++ .desc = "RCpc load-acquire (LDAPR)",
++ ++++++ .capability = ARM64_HAS_LDAPR,
++ ++++++ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
++ ++++++ .sys_reg = SYS_ID_AA64ISAR1_EL1,
++ ++++++ .sign = FTR_UNSIGNED,
++ ++++++ .field_pos = ID_AA64ISAR1_LRCPC_SHIFT,
++ ++++++ .matches = has_cpuid_feature,
++ ++++++ .min_field_value = 1,
++ ++++++ },
{},
};
HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
#endif
+ #ifdef CONFIG_ARM64_MTE
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+ #endif /* CONFIG_ARM64_MTE */
{},
};
*/
#define RO_EXCEPTION_TABLE_ALIGN 8
+ #define RUNTIME_DISCARD_EXIT
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
* matching the same input section name. There is no documented
* order of matching.
*/
+ DISCARDS
/DISCARD/ : {
- EXIT_CALL
- *(.discard)
- *(.discard.*)
*(.interp .dynamic)
*(.dynsym .dynstr .hash .gnu.hash)
- *(.eh_frame)
}
- . = KIMAGE_VADDR + TEXT_OFFSET;
+ . = KIMAGE_VADDR;
.head.text : {
_text = .;
HEAD_TEXT
}
----- --- .text : { /* Real text segment */
+++++ +++ .text : ALIGN(SEGMENT_ALIGN) { /* Real text segment */
_stext = .; /* Text and read-only data */
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
*(.got) /* Global offset table */
}
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the lazy dispatch entries.
+ */
+ .got.plt : { *(.got.plt) }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18,
+ "Unexpected GOT/PLT entries detected!")
+
. = ALIGN(SEGMENT_ALIGN);
_etext = .; /* End of text section */
INIT_CALLS
CON_INITCALL
INIT_RAM_FS
-- ------ *(.init.rodata.* .init.bss) /* from the EFI stub */
++ ++++++ *(.init.altinstructions .init.rodata.* .init.bss) /* from the EFI stub */
}
.exit.data : {
EXIT_DATA
_end = .;
STABS_DEBUG
+ DWARF_DEBUG
+ ELF_DETAILS
HEAD_SYMBOLS
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+
+ .data.rel.ro : { *(.data.rel.ro) }
+ ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
}
#include "image-vars.h"
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
- ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
+ ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned")
struct kvm_pgtable *pgt;
write_fault = kvm_is_write_fault(vcpu);
- exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
+ exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
}
switch (vma_shift) {
++++++++#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
break;
fallthrough;
++++++++#endif
case CONT_PMD_SHIFT:
vma_shift = PMD_SHIFT;
fallthrough;
goto out;
}
- if (kvm_vcpu_dabt_iss1tw(vcpu)) {
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
ret = 1;
goto out_unlock;
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/dma-direct.h>
- #include <linux/dma-mapping.h>
- #include <linux/dma-contiguous.h>
+ #include <linux/dma-map-ops.h>
#include <linux/efi.h>
#include <linux/swiotlb.h>
#include <linux/vmalloc.h>
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/hugetlb.h>
++++++ ++#include <linux/acpi_iort.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
#include <asm/tlb.h>
#include <asm/alternative.h>
------ --#define ARM64_ZONE_DMA_BITS 30
------ --
/*
* We need to be able to catch inadvertent references to memstart_addr
* that occur (potentially in generic code) before arm64_memblock_init()
s64 memstart_addr __ro_after_init = -1;
EXPORT_SYMBOL(memstart_addr);
- s64 physvirt_offset __ro_after_init;
- EXPORT_SYMBOL(physvirt_offset);
-
- struct page *vmemmap __ro_after_init;
- EXPORT_SYMBOL(vmemmap);
-
/*
* We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
* memory as some devices, namely the Raspberry Pi 4, have peripherals with
#endif /* CONFIG_CRASH_DUMP */
/*
------ -- * Return the maximum physical address for a zone with a given address size
------ -- * limit. It currently assumes that for memory starting above 4G, 32-bit
------ -- * devices will use a DMA offset.
++++++ ++ * Return the maximum physical address for a zone accessible by the given bits
++++++ ++ * limit. If DRAM starts above 32-bit, expand the zone to the maximum
++++++ ++ * available memory, otherwise cap it at 32-bit.
*/
static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
{
------ -- phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
------ -- return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
++++++ ++ phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits);
++++++ ++ phys_addr_t phys_start = memblock_start_of_DRAM();
++++++ ++
++++++ ++ if (phys_start > U32_MAX)
++++++ ++ zone_mask = PHYS_ADDR_MAX;
++++++ ++ else if (phys_start > zone_mask)
++++++ ++ zone_mask = U32_MAX;
++++++ ++
++++++ ++ return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
}
static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
++++++ ++ unsigned int __maybe_unused acpi_zone_dma_bits;
++++++ ++ unsigned int __maybe_unused dt_zone_dma_bits;
#ifdef CONFIG_ZONE_DMA
++++++ ++ acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
++++++ ++ dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
++++++ ++ zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits);
++++++ ++ arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
#endif
#ifdef CONFIG_ZONE_DMA32
void __init arm64_memblock_init(void)
{
- ------- const s64 linear_region_size = BIT(vabits_actual - 1);
+ +++++++ const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
/* Handle linux,usable-memory-range property */
fdt_enforce_memory_region();
memstart_addr = round_down(memblock_start_of_DRAM(),
ARM64_MEMSTART_ALIGN);
- physvirt_offset = PHYS_OFFSET - PAGE_OFFSET;
-
- vmemmap = ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT));
-
- /*
- * If we are running with a 52-bit kernel VA config on a system that
- * does not support it, we have to offset our vmemmap and physvirt_offset
- * s.t. we avoid the 52-bit portion of the direct linear map
- */
- if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) {
- vmemmap += (_PAGE_OFFSET(48) - _PAGE_OFFSET(52)) >> PAGE_SHIFT;
- physvirt_offset = PHYS_OFFSET - _PAGE_OFFSET(48);
- }
-
/*
* Remove the memory that we will not be able to cover with the
* linear mapping. Take care not to clip the kernel which may be
memblock_remove(0, memstart_addr);
}
+ /*
+ * If we are running with a 52-bit kernel VA config on a system that
+ * does not support it, we have to place the available physical
+ * memory in the 48-bit addressable part of the linear region, i.e.,
+ * we have to move it upward. Since memstart_addr represents the
+ * physical address of PAGE_OFFSET, we have to *subtract* from it.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52))
+ memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52);
+
/*
* Apply the memory limit if it was set. Since the kernel may be loaded
* high up in memory, add back the kernel region that must be accessible
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
extern u16 memstart_offset_seed;
--- ----- u64 range = linear_region_size -
--- ----- (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+++ +++++ u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+++ +++++ int parange = cpuid_feature_extract_unsigned_field(
+++ +++++ mmfr0, ID_AA64MMFR0_PARANGE_SHIFT);
+++ +++++ s64 range = linear_region_size -
+++ +++++ BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
/*
* If the size of the linear region exceeds, by a sufficient
--- ----- * margin, the size of the region that the available physical
--- ----- * memory spans, randomize the linear region as well.
+++ +++++ * margin, the size of the region that the physical memory can
+++ +++++ * span, randomize the linear region as well.
*/
--- ----- if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+++ +++++ if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) {
range /= ARM64_MEMSTART_ALIGN;
memstart_addr -= ARM64_MEMSTART_ALIGN *
((range * memstart_offset_seed) >> 16);
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
----- --- memblock_reserve(__pa_symbol(_text), _end - _text);
+++++ +++ memblock_reserve(__pa_symbol(_stext), _end - _stext);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
early_init_fdt_scan_reserved_mem();
------ -- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
------ -- zone_dma_bits = ARM64_ZONE_DMA_BITS;
------ -- arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
------ -- }
------ --
if (IS_ENABLED(CONFIG_ZONE_DMA32))
arm64_dma32_phys_limit = max_zone_phys(32);
else
arm64_dma32_phys_limit = PHYS_MASK + 1;
------ -- reserve_crashkernel();
------ --
reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
arm64_hugetlb_cma_reserve();
#endif
+ dma_pernuma_cma_reserve();
+
/*
* sparse_init() tries to allocate memory from memblock, so must be
* done after the fixed reservations
sparse_init();
zone_sizes_init(min, max);
++++++ ++ /*
++++++ ++ * request_standard_resources() depends on crashkernel's memory being
++++++ ++ * reserved, so do it here.
++++++ ++ */
++++++ ++ reserve_crashkernel();
++++++ ++
memblock_dump_all();
}
*/
static void __init free_unused_memmap(void)
{
- unsigned long start, prev_end = 0;
- struct memblock_region *reg;
-
- for_each_memblock(memory, reg) {
- start = __phys_to_pfn(reg->base);
+ unsigned long start, end, prev_end = 0;
+ int i;
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
#ifdef CONFIG_SPARSEMEM
/*
* Take care not to free memmap entries that don't exist due
* memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES.
*/
- prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
- MAX_ORDER_NR_PAGES);
+ prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
#ifdef CONFIG_SPARSEMEM
* The following mapping attributes may be updated in live
* kernel mappings without the need for break-before-make.
*/
- static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
+ pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
/* creating or taking down mappings is always safe */
if (old == 0 || new == 0)
if (old & ~new & PTE_NG)
return false;
+ /*
+ * Changing the memory type between Normal and Normal-Tagged is safe
+ * since Tagged is considered a permission attribute from the
+ * mismatched attribute aliases perspective.
+ */
+ if (((old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
+ (old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)) &&
+ ((new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
+ (new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)))
+ mask |= PTE_ATTRINDX_MASK;
+
return ((old ^ new) & ~mask) == 0;
}
/*
* Remove the write permissions from the linear alias of .text/.rodata
*/
----- --- update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
----- --- (unsigned long)__init_begin - (unsigned long)_text,
+++++ +++ update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
+++++ +++ (unsigned long)__init_begin - (unsigned long)_stext,
PAGE_KERNEL_RO);
}
++++++ ++static bool crash_mem_map __initdata;
++++++ ++
++++++ ++static int __init enable_crash_mem_map(char *arg)
++++++ ++{
++++++ ++ /*
++++++ ++ * Proper parameter parsing is done by reserve_crashkernel(). We only
++++++ ++ * need to know if the linear map has to avoid block mappings so that
++++++ ++ * the crashkernel reservations can be unmapped later.
++++++ ++ */
++++++ ++ crash_mem_map = true;
++++++ ++
++++++ ++ return 0;
++++++ ++}
++++++ ++early_param("crashkernel", enable_crash_mem_map);
++++++ ++
static void __init map_mem(pgd_t *pgdp)
{
----- --- phys_addr_t kernel_start = __pa_symbol(_text);
+++++ +++ phys_addr_t kernel_start = __pa_symbol(_stext);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
- struct memblock_region *reg;
+ phys_addr_t start, end;
int flags = 0;
+ u64 i;
------ -- if (rodata_full || debug_pagealloc_enabled())
++++++ ++ if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
* the following for-loop
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
------ --#ifdef CONFIG_KEXEC_CORE
------ -- if (crashk_res.end)
------ -- memblock_mark_nomap(crashk_res.start,
------ -- resource_size(&crashk_res));
------ --#endif
/* map all the memory banks */
- for_each_memblock(memory, reg) {
- phys_addr_t start = reg->base;
- phys_addr_t end = start + reg->size;
-
+ for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
- if (memblock_is_nomap(reg))
- continue;
-
- __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
+ /*
+ * The linear map must allow allocation tags reading/writing
+ * if MTE is present. Otherwise, it has the same attributes as
+ * PAGE_KERNEL.
+ */
+ __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags);
}
/*
----- --- * Map the linear alias of the [_text, __init_begin) interval
+++++ +++ * Map the linear alias of the [_stext, __init_begin) interval
* as non-executable now, and remove the write permission in
* mark_linear_text_alias_ro() below (which will be called after
* alternative patching has completed). This makes the contents
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
------ --
------ --#ifdef CONFIG_KEXEC_CORE
------ -- /*
------ -- * Use page-level mappings here so that we can shrink the region
------ -- * in page granularity and put back unused memory to buddy system
------ -- * through /sys/kernel/kexec_crash_size interface.
------ -- */
------ -- if (crashk_res.end) {
------ -- __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
------ -- PAGE_KERNEL,
------ -- NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
------ -- memblock_clear_nomap(crashk_res.start,
------ -- resource_size(&crashk_res));
------ -- }
------ --#endif
}
void mark_rodata_ro(void)
* Only rodata will be remapped with different permissions later on,
* all other segments are allowed to use contiguous mappings.
*/
----- --- map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
+++++ +++ map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0,
VM_NO_GUARD);
map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
&vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
unsigned long pfn = arg->start_pfn;
--- ----- if (action != MEM_GOING_OFFLINE)
+++ +++++ if ((action != MEM_GOING_OFFLINE) && (action != MEM_OFFLINE))
return NOTIFY_OK;
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+++ +++++ unsigned long start = PFN_PHYS(pfn);
+++ +++++ unsigned long end = start + (1UL << PA_SECTION_SHIFT);
+++ +++++
ms = __pfn_to_section(pfn);
--- ----- if (early_section(ms))
+++ +++++ if (!early_section(ms))
+++ +++++ continue;
+++ +++++
+++ +++++ if (action == MEM_GOING_OFFLINE) {
+++ +++++ /*
+++ +++++ * Boot memory removal is not supported. Prevent
+++ +++++ * it via blocking any attempted offline request
+++ +++++ * for the boot memory and just report it.
+++ +++++ */
+++ +++++ pr_warn("Boot memory [%lx %lx] offlining attempted\n", start, end);
return NOTIFY_BAD;
+++ +++++ } else if (action == MEM_OFFLINE) {
+++ +++++ /*
+++ +++++ * This should have never happened. Boot memory
+++ +++++ * offlining should have been prevented by this
+++ +++++ * very notifier. Probably some memory removal
+++ +++++ * procedure might have changed which would then
+++ +++++ * require further debug.
+++ +++++ */
+++ +++++ pr_err("Boot memory [%lx %lx] offlined\n", start, end);
+++ +++++
+++ +++++ /*
+++ +++++ * Core memory hotplug does not process a return
+++ +++++ * code from the notifier for MEM_OFFLINE events.
+++ +++++ * The error condition has been reported. Return
+++ +++++ * from here as if ignored.
+++ +++++ */
+++ +++++ return NOTIFY_DONE;
+++ +++++ }
}
return NOTIFY_OK;
}
.notifier_call = prevent_bootmem_remove_notifier,
};
+++ +++++/*
+++ +++++ * This ensures that boot memory sections on the platform are online
+++ +++++ * from early boot. Memory sections could not be prevented from being
+++ +++++ * offlined, unless for some reason they are not online to begin with.
+++ +++++ * This helps validate the basic assumption on which the above memory
+++ +++++ * event notifier works to prevent boot memory section offlining and
+++ +++++ * its possible removal.
+++ +++++ */
+++ +++++static void validate_bootmem_online(void)
+++ +++++{
+++ +++++ phys_addr_t start, end, addr;
+++ +++++ struct mem_section *ms;
+++ +++++ u64 i;
+++ +++++
+++ +++++ /*
+++ +++++ * Scanning across all memblock might be expensive
+++ +++++ * on some big memory systems. Hence enable this
+++ +++++ * validation only with DEBUG_VM.
+++ +++++ */
+++ +++++ if (!IS_ENABLED(CONFIG_DEBUG_VM))
+++ +++++ return;
+++ +++++
+++ +++++ for_each_mem_range(i, &start, &end) {
+++ +++++ for (addr = start; addr < end; addr += (1UL << PA_SECTION_SHIFT)) {
+++ +++++ ms = __pfn_to_section(PHYS_PFN(addr));
+++ +++++
+++ +++++ /*
+++ +++++ * All memory ranges in the system at this point
+++ +++++ * should have been marked as early sections.
+++ +++++ */
+++ +++++ WARN_ON(!early_section(ms));
+++ +++++
+++ +++++ /*
+++ +++++ * Memory notifier mechanism here to prevent boot
+++ +++++ * memory offlining depends on the fact that each
+++ +++++ * early section memory on the system is initially
+++ +++++ * online. Otherwise a given memory section which
+++ +++++ * is already offline will be overlooked and can
+++ +++++ * be removed completely. Call out such sections.
+++ +++++ */
+++ +++++ if (!online_section(ms))
+++ +++++ pr_err("Boot memory [%llx %llx] is offline, can be removed\n",
+++ +++++ addr, addr + (1UL << PA_SECTION_SHIFT));
+++ +++++ }
+++ +++++ }
+++ +++++}
+++ +++++
static int __init prevent_bootmem_remove_init(void)
{
--- ----- return register_memory_notifier(&prevent_bootmem_remove_nb);
+++ +++++ int ret = 0;
+++ +++++
+++ +++++ if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
+++ +++++ return ret;
+++ +++++
+++ +++++ validate_bootmem_online();
+++ +++++ ret = register_memory_notifier(&prevent_bootmem_remove_nb);
+++ +++++ if (ret)
+++ +++++ pr_err("%s: Notifier registration failed %d\n", __func__, ret);
+++ +++++
+++ +++++ return ret;
}
--- -----device_initcall(prevent_bootmem_remove_init);
+++ +++++early_initcall(prevent_bootmem_remove_init);
#endif