return IS_ENABLED(CONFIG_ARM64_BTI);
case ARM64_HAS_TLB_RANGE:
return IS_ENABLED(CONFIG_ARM64_TLB_RANGE);
+ case ARM64_HAS_S1POE:
+ return IS_ENABLED(CONFIG_ARM64_POE);
case ARM64_UNMAP_KERNEL_AT_EL0:
return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
case ARM64_WORKAROUND_843419:
return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
case ARM64_WORKAROUND_SPECULATIVE_SSBS:
return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386);
+ case ARM64_MPAM:
+ /*
+ * KVM MPAM support doesn't rely on the host kernel supporting MPAM.
+ */
+ return true;
}
return true;
#include <asm/hwcap.h>
#include <asm/sysreg.h>
-#define MAX_CPU_FEATURES 128
+#define MAX_CPU_FEATURES 192
#define cpu_feature(x) KERNEL_HWCAP_ ## x
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
bool cpu_have_feature(unsigned int num);
unsigned long cpu_get_elf_hwcap(void);
unsigned long cpu_get_elf_hwcap2(void);
+unsigned long cpu_get_elf_hwcap3(void);
#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
return val > 0;
}
+ static inline bool id_aa64pfr0_mpam(u64 pfr0)
+ {
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT);
+
+ return val > 0;
+ }
+
static inline bool id_aa64pfr1_mte(u64 pfr1)
{
u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT);
static inline bool system_supports_poe(void)
{
- return IS_ENABLED(CONFIG_ARM64_POE) &&
- alternative_has_cap_unlikely(ARM64_HAS_S1POE);
+ return alternative_has_cap_unlikely(ARM64_HAS_S1POE);
+}
+
+static inline bool system_supports_gcs(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_GCS) &&
+ alternative_has_cap_unlikely(ARM64_HAS_GCS);
+}
+
+static inline bool system_supports_haft(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_HAFT) &&
+ cpus_have_final_cap(ARM64_HAFT);
}
+ static __always_inline bool system_supports_mpam(void)
+ {
+ return alternative_has_cap_unlikely(ARM64_MPAM);
+ }
+
+ static __always_inline bool system_supports_mpam_hcr(void)
+ {
+ return alternative_has_cap_unlikely(ARM64_MPAM_HCR);
+ }
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
cbz x0, .Lskip_hcrx_\@
mov_q x0, HCRX_HOST_FLAGS
+
+ /* Enable GCS if supported */
+ mrs_s x1, SYS_ID_AA64PFR1_EL1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+ cbz x1, .Lset_hcrx_\@
+ orr x0, x0, #HCRX_EL2_GCSEn
+
+.Lset_hcrx_\@:
msr_s SYS_HCRX_EL2, x0
.Lskip_hcrx_\@:
.endm
orr x0, x0, #HFGxTR_EL2_nPOR_EL0
.Lskip_poe_fgt_\@:
+ /* GCS depends on PIE so we don't check it if PIE is absent */
+ mrs_s x1, SYS_ID_AA64PFR1_EL1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+ cbz x1, .Lset_fgt_\@
+
+ /* Disable traps of access to GCS registers at EL0 and EL1 */
+ orr x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK
+ orr x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK
+
+.Lset_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
msr_s SYS_HFGWTR_EL2, x0
msr_s SYS_HFGITR_EL2, xzr
.Lskip_fgt_\@:
.endm
+.macro __init_el2_gcs
+ mrs_s x1, SYS_ID_AA64PFR1_EL1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+ cbz x1, .Lskip_gcs_\@
+
+ /* Ensure GCS is not enabled when we start trying to do BLs */
+ msr_s SYS_GCSCR_EL1, xzr
+ msr_s SYS_GCSCRE0_EL1, xzr
+.Lskip_gcs_\@:
+.endm
+
.macro __init_el2_nvhe_prepare_eret
mov x0, #INIT_PSTATE_EL1
msr spsr_el2, x0
.endm
+ .macro __init_el2_mpam
+ /* Memory Partitioning And Monitoring: disable EL2 traps */
+ mrs x1, id_aa64pfr0_el1
+ ubfx x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4
+ cbz x0, .Lskip_mpam_\@ // skip if no MPAM
+ msr_s SYS_MPAM2_EL2, xzr // use the default partition
+ // and disable lower traps
+ mrs_s x0, SYS_MPAMIDR_EL1
+ tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
+ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
+ .Lskip_mpam_\@:
+ .endm
+
/**
* Initialize EL2 registers to sane values. This should be called early on all
* cores that were booted in EL2. Note that everything gets initialised as
__init_el2_stage2
__init_el2_gicv3
__init_el2_hstr
+ __init_el2_mpam
__init_el2_nvhe_idregs
__init_el2_cptr
__init_el2_fgt
+ __init_el2_gcs
.endm
#ifndef __KVM_NVHE_HYPERVISOR__
#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
- #define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0)
- #define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1)
- #define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0)
- #define __SYS__MPAMVPMx_EL2(x) sys_reg(3, 4, 10, 6, x)
- #define SYS_MPAMVPM0_EL2 __SYS__MPAMVPMx_EL2(0)
- #define SYS_MPAMVPM1_EL2 __SYS__MPAMVPMx_EL2(1)
- #define SYS_MPAMVPM2_EL2 __SYS__MPAMVPMx_EL2(2)
- #define SYS_MPAMVPM3_EL2 __SYS__MPAMVPMx_EL2(3)
- #define SYS_MPAMVPM4_EL2 __SYS__MPAMVPMx_EL2(4)
- #define SYS_MPAMVPM5_EL2 __SYS__MPAMVPMx_EL2(5)
- #define SYS_MPAMVPM6_EL2 __SYS__MPAMVPMx_EL2(6)
- #define SYS_MPAMVPM7_EL2 __SYS__MPAMVPMx_EL2(7)
#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
/* Initial value for Permission Overlay Extension for EL0 */
#define POR_EL0_INIT POE_RXW
+/*
+ * Definitions for Guarded Control Stack
+ */
+
+#define GCS_CAP_ADDR_MASK GENMASK(63, 12)
+#define GCS_CAP_ADDR_SHIFT 12
+#define GCS_CAP_ADDR_WIDTH 52
+#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x)
+
+#define GCS_CAP_TOKEN_MASK GENMASK(11, 0)
+#define GCS_CAP_TOKEN_SHIFT 0
+#define GCS_CAP_TOKEN_WIDTH 12
+#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x)
+
+#define GCS_CAP_VALID_TOKEN 0x1
+#define GCS_CAP_IN_PROGRESS_TOKEN 0x5
+
+#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
+ GCS_CAP_VALID_TOKEN)
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
+unsigned int compat_elf_hwcap3 __read_mostly;
#endif
DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0),
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
ARM64_FTR_END,
};
+ static const struct arm64_ftr_bits ftr_mpamidr[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PMG_MAX_SHIFT, MPAMIDR_EL1_PMG_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_VPMR_MAX_SHIFT, MPAMIDR_EL1_VPMR_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_HAS_HCR_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PARTID_MAX_SHIFT, MPAMIDR_EL1_PARTID_MAX_WIDTH, 0),
+ ARM64_FTR_END,
+ };
+
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4),
+ /* Op1 = 0, CRn = 10, CRm = 4 */
+ ARM64_FTR_REG(SYS_MPAMIDR_EL1, ftr_mpamidr),
+
/* Op1 = 1, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
cpacr_restore(cpacr);
}
+ if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
+ init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr);
+
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
}
cpacr_restore(cpacr);
}
+ if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) {
+ taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu,
+ info->reg_mpamidr, boot->reg_mpamidr);
+ }
+
/*
* The kernel uses the LDGM/STGM instructions and the number of tags
* they read/write depends on the GMID_EL1.BS field. Check that the
}
#endif
+#ifdef CONFIG_ARM64_GCS
+static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused)
+{
+ /* GCSPR_EL0 is always readable */
+ write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
+}
+#endif
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
}
+ static bool
+ test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope)
+ {
+ if (!has_cpuid_feature(entry, scope))
+ return false;
+
+ /* Check firmware actually enabled MPAM on this cpu. */
+ return (read_sysreg_s(SYS_MPAM1_EL1) & MPAM1_EL1_MPAMEN);
+ }
+
+ static void
+ cpu_enable_mpam(const struct arm64_cpu_capabilities *entry)
+ {
+ /*
+ * Access by the kernel (at EL1) should use the reserved PARTID
+ * which is configured unrestricted. This avoids priority-inversion
+ * where latency sensitive tasks have to wait for a task that has
+ * been throttled to release the lock.
+ */
+ write_sysreg_s(0, SYS_MPAM1_EL1);
+ }
+
+ static bool
+ test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope)
+ {
+ u64 idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+
+ return idr & MPAMIDR_EL1_HAS_HCR;
+ }
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.capability = ARM64_ALWAYS_BOOT,
.cpus = &dbm_cpus,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
+#endif
+#ifdef CONFIG_ARM64_HAFT
+ {
+ .desc = "Hardware managed Access Flag for Table Descriptors",
+ /*
+ * Contrary to the page/block access flag, the table access flag
+ * cannot be emulated in software (no access fault will occur).
+ * Therefore this should be used only if it's supported system
+ * wide.
+ */
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAFT,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT)
+ },
#endif
{
.desc = "CRC32 instructions",
#endif
},
#endif
+ {
+ .desc = "Memory Partitioning And Monitoring",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM,
+ .matches = test_has_mpam,
+ .cpu_enable = cpu_enable_mpam,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1)
+ },
+ {
+ .desc = "Memory Partitioning And Monitoring Virtualisation",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM_HCR,
+ .matches = test_has_mpam_hcr,
+ },
{
.desc = "NV1",
.capability = ARM64_HAS_HCR_NV1,
.cpu_enable = cpu_enable_poe,
ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
},
+#endif
+#ifdef CONFIG_ARM64_GCS
+ {
+ .desc = "Guarded Control Stack (GCS)",
+ .capability = ARM64_HAS_GCS,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .cpu_enable = cpu_enable_gcs,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP)
+ },
#endif
{},
};
HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+#endif
+#ifdef CONFIG_ARM64_GCS
+ HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS),
#endif
HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
}
}
+ static void verify_mpam_capabilities(void)
+ {
+ u64 cpu_idr = read_cpuid(ID_AA64PFR0_EL1);
+ u64 sys_idr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ u16 cpu_partid_max, cpu_pmg_max, sys_partid_max, sys_pmg_max;
+
+ if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, cpu_idr) !=
+ FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, sys_idr)) {
+ pr_crit("CPU%d: MPAM version mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_idr = read_cpuid(MPAMIDR_EL1);
+ sys_idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+ if (FIELD_GET(MPAMIDR_EL1_HAS_HCR, cpu_idr) !=
+ FIELD_GET(MPAMIDR_EL1_HAS_HCR, sys_idr)) {
+ pr_crit("CPU%d: Missing MPAM HCR\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, cpu_idr);
+ cpu_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, cpu_idr);
+ sys_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, sys_idr);
+ sys_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, sys_idr);
+ if (cpu_partid_max < sys_partid_max || cpu_pmg_max < sys_pmg_max) {
+ pr_crit("CPU%d: MPAM PARTID/PMG max values are mismatched\n", smp_processor_id());
+ cpu_die_early();
+ }
+ }
+
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
if (is_hyp_mode_available())
verify_hyp_capabilities();
+
+ if (system_supports_mpam())
+ verify_mpam_capabilities();
}
void check_local_cpu_capabilities(void)
return elf_hwcap[1];
}
+unsigned long cpu_get_elf_hwcap3(void)
+{
+ return elf_hwcap[2];
+}
+
static void __init setup_boot_cpu_capabilities(void)
{
/*
[KERNEL_HWCAP_SB] = "sb",
[KERNEL_HWCAP_PACA] = "paca",
[KERNEL_HWCAP_PACG] = "pacg",
+ [KERNEL_HWCAP_GCS] = "gcs",
[KERNEL_HWCAP_DCPODP] = "dcpodp",
[KERNEL_HWCAP_SVE2] = "sve2",
[KERNEL_HWCAP_SVEAES] = "sveaes",
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
+ if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+
cpuinfo_detect_icache_policy(info);
}
}
while (length > 0) {
- kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
+ struct page *page = __gfn_to_page(kvm, gfn, write);
void *maddr;
unsigned long num_tags;
- struct page *page;
+ struct folio *folio;
- if (is_error_noslot_pfn(pfn)) {
+ if (!page) {
ret = -EFAULT;
goto out;
}
- page = pfn_to_online_page(pfn);
- if (!page) {
+ if (!pfn_to_online_page(page_to_pfn(page))) {
/* Reject ZONE_DEVICE memory */
- kvm_release_pfn_clean(pfn);
+ kvm_release_page_unused(page);
ret = -EFAULT;
goto out;
}
+ folio = page_folio(page);
maddr = page_address(page);
if (!write) {
- if (page_mte_tagged(page))
+ if ((folio_test_hugetlb(folio) &&
+ folio_test_hugetlb_mte_tagged(folio)) ||
+ page_mte_tagged(page))
num_tags = mte_copy_tags_to_user(tags, maddr,
MTE_GRANULES_PER_PAGE);
else
/* No tags in memory, so write zeros */
num_tags = MTE_GRANULES_PER_PAGE -
clear_user(tags, MTE_GRANULES_PER_PAGE);
- kvm_release_pfn_clean(pfn);
+ kvm_release_page_clean(page);
} else {
/*
* Only locking to serialise with a concurrent
* __set_ptes() in the VMM but still overriding the
* tags, hence ignoring the return value.
*/
- try_page_mte_tagging(page);
+ if (folio_test_hugetlb(folio))
+ folio_try_hugetlb_mte_tagging(folio);
+ else
+ try_page_mte_tagging(page);
num_tags = mte_copy_tags_from_user(maddr, tags,
MTE_GRANULES_PER_PAGE);
/* uaccess failed, don't leave stale tags */
if (num_tags != MTE_GRANULES_PER_PAGE)
mte_clear_page_tags(maddr);
- set_page_mte_tagged(page);
+ if (folio_test_hugetlb(folio))
+ folio_set_hugetlb_mte_tagged(folio);
+ else
+ set_page_mte_tagged(page);
+
- kvm_release_pfn_dirty(pfn);
+ kvm_release_page_dirty(page);
}
if (num_tags != MTE_GRANULES_PER_PAGE) {
{
unsigned long i, nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(pfn);
+ struct folio *folio = page_folio(page);
if (!kvm_has_mte(kvm))
return;
+ if (folio_test_hugetlb(folio)) {
+ /* Hugetlb has MTE flags set on head page only */
+ if (folio_try_hugetlb_mte_tagging(folio)) {
+ for (i = 0; i < nr_pages; i++, page++)
+ mte_clear_page_tags(page_address(page));
+ folio_set_hugetlb_mte_tagged(folio);
+ }
+ return;
+ }
+
for (i = 0; i < nr_pages; i++, page++) {
if (try_page_mte_tagging(page)) {
mte_clear_page_tags(page_address(page));
long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
+ struct page *page;
if (fault_is_perm)
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
/*
* Read mmu_invalidate_seq so that KVM can detect if the results of
- * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
+ * vma_lookup() or __kvm_faultin_pfn() become stale prior to
* acquiring kvm->mmu_lock.
*
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
mmap_read_unlock(current->mm);
- pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
- write_fault, &writable, NULL);
+ pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
+ &writable, &page);
if (pfn == KVM_PFN_ERR_HWPOISON) {
kvm_send_hwpoison_signal(hva, vma_shift);
return 0;
* If the page was identified as device early by looking at
* the VMA flags, vma_pagesize is already representing the
* largest quantity we can map. If instead it was mapped
- * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
+ * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
* and must not be upgraded.
*
* In both cases, we don't let transparent_hugepage_adjust()
}
out_unlock:
+ kvm_release_faultin_page(kvm, page, !!ret, writable);
read_unlock(&kvm->mmu_lock);
/* Mark the page dirty only if the fault is handled successfully */
- if (writable && !ret) {
- kvm_set_pfn_dirty(pfn);
+ if (writable && !ret)
mark_page_dirty_in_slot(kvm, memslot, gfn);
- }
- kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
}
/* Resolve the access fault by making the page young again. */
static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
- kvm_pte_t pte;
struct kvm_s2_mmu *mmu;
trace_kvm_access_fault(fault_ipa);
read_lock(&vcpu->kvm->mmu_lock);
mmu = vcpu->arch.hw_mmu;
- pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
+ kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
read_unlock(&vcpu->kvm->mmu_lock);
-
- if (kvm_pte_valid(pte))
- kvm_set_pfn_accessed(kvm_pte_to_pfn(pte));
}
/**
HAS_FPMR
HAS_FGT
HAS_FPSIMD
+HAS_GCS
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_VA52
HAS_VIRT_HOST_EXTN
HAS_WFXT
+HAFT
HW_DBM
KVM_HVHE
KVM_PROTECTED_MODE
MISMATCHED_CACHE_TYPE
+ MPAM
+ MPAM_HCR
MTE
MTE_ASYMM
SME
0b0001 IMP
0b0010 BRBE_V1P1
EndEnum
- Enum 51:48 MTPMU
+ SignedEnum 51:48 MTPMU
0b0000 NI_IMPDEF
0b0001 IMP
0b1111 NI
UnsignedEnum 47:44 TraceBuffer
0b0000 NI
0b0001 IMP
+ 0b0010 TRBE_V1P1
EndEnum
UnsignedEnum 43:40 TraceFilt
0b0000 NI
0b0011 V1P2
0b0100 V1P3
0b0101 V1P4
+ 0b0110 V1P5
EndEnum
Field 31:28 CTX_CMPs
- Res0 27:24
+ UnsignedEnum 27:24 SEBEP
+ 0b0000 NI
+ 0b0001 IMP
+ EndEnum
Field 23:20 WRPs
- Res0 19:16
+ UnsignedEnum 19:16 PMSS
+ 0b0000 NI
+ 0b0001 IMP
+ EndEnum
Field 15:12 BRPs
UnsignedEnum 11:8 PMUVer
0b0000 NI
Field 7:0 SYSPMUID
EndSysreg
+ Sysreg ID_AA64DFR2_EL1 3 0 0 5 2
+ Res0 63:28
+ UnsignedEnum 27:24 TRBE_EXC
+ 0b0000 NI
+ 0b0001 IMP
+ EndEnum
+ UnsignedEnum 23:20 SPE_nVM
+ 0b0000 NI
+ 0b0001 IMP
+ EndEnum
+ UnsignedEnum 19:16 SPE_EXC
+ 0b0000 NI
+ 0b0001 IMP
+ EndEnum
+ Res0 15:8
+ UnsignedEnum 7:4 BWE
+ 0b0000 NI
+ 0b0001 FEAT_BWE
+ 0b0002 FEAT_BWE2
+ EndEnum
+ UnsignedEnum 3:0 STEP
+ 0b0000 NI
+ 0b0001 IMP
+ EndEnum
+ EndSysreg
+
Sysreg ID_AA64AFR0_EL1 3 0 0 5 4
Res0 63:32
Field 31:28 IMPDEF7
UnsignedEnum 39:36 ETS
0b0000 NI
0b0001 IMP
+ 0b0010 ETS2
+ 0b0011 ETS3
EndEnum
UnsignedEnum 35:32 TWED
0b0000 NI
0b0001 AF
0b0010 DBM
0b0011 HAFT
+ 0b0100 HDBSS
EndEnum
EndSysreg
Field 3:0 ALIGN
EndSysreg
+Sysreg PMUACR_EL1 3 0 9 14 4
+Res0 63:33
+Field 32 F0
+Field 31 C
+Field 30:0 P
+EndSysreg
+
Sysreg PMSELR_EL0 3 3 9 12 5
Res0 63:5
Field 4:0 SEL
Field 0 AFSR0_EL1
EndSysregFields
+ Sysreg MDCR_EL2 3 4 1 1 1
+ Res0 63:51
+ Field 50 EnSTEPOP
+ Res0 49:44
+ Field 43 EBWE
+ Res0 42
+ Field 41:40 PMEE
+ Res0 39:37
+ Field 36 HPMFZS
+ Res0 35:32
+ Field 31:30 PMSSE
+ Field 29 HPMFZO
+ Field 28 MTPME
+ Field 27 TDCC
+ Field 26 HLP
+ Field 25:24 E2TB
+ Field 23 HCCD
+ Res0 22:20
+ Field 19 TTRF
+ Res0 18
+ Field 17 HPMD
+ Res0 16
+ Field 15 EnSPM
+ Field 14 TPMS
+ Field 13:12 E2PB
+ Field 11 TDRA
+ Field 10 TDOSA
+ Field 9 TDA
+ Field 8 TDE
+ Field 7 HPME
+ Field 6 TPM
+ Field 5 TPMCR
+ Field 4:0 HPMN
+ EndSysreg
+
Sysreg HFGRTR_EL2 3 4 1 1 4
Fields HFGxTR_EL2
EndSysreg
Field 0 E0HSPE
EndSysreg
+ Sysreg MPAMHCR_EL2 3 4 10 4 0
+ Res0 63:32
+ Field 31 TRAP_MPAMIDR_EL1
+ Res0 30:9
+ Field 8 GSTAPP_PLK
+ Res0 7:2
+ Field 1 EL1_VPMEN
+ Field 0 EL0_VPMEN
+ EndSysreg
+
+ Sysreg MPAMVPMV_EL2 3 4 10 4 1
+ Res0 63:32
+ Field 31 VPM_V31
+ Field 30 VPM_V30
+ Field 29 VPM_V29
+ Field 28 VPM_V28
+ Field 27 VPM_V27
+ Field 26 VPM_V26
+ Field 25 VPM_V25
+ Field 24 VPM_V24
+ Field 23 VPM_V23
+ Field 22 VPM_V22
+ Field 21 VPM_V21
+ Field 20 VPM_V20
+ Field 19 VPM_V19
+ Field 18 VPM_V18
+ Field 17 VPM_V17
+ Field 16 VPM_V16
+ Field 15 VPM_V15
+ Field 14 VPM_V14
+ Field 13 VPM_V13
+ Field 12 VPM_V12
+ Field 11 VPM_V11
+ Field 10 VPM_V10
+ Field 9 VPM_V9
+ Field 8 VPM_V8
+ Field 7 VPM_V7
+ Field 6 VPM_V6
+ Field 5 VPM_V5
+ Field 4 VPM_V4
+ Field 3 VPM_V3
+ Field 2 VPM_V2
+ Field 1 VPM_V1
+ Field 0 VPM_V0
+ EndSysreg
+
+ Sysreg MPAM2_EL2 3 4 10 5 0
+ Field 63 MPAMEN
+ Res0 62:59
+ Field 58 TIDR
+ Res0 57
+ Field 56 ALTSP_HFC
+ Field 55 ALTSP_EL2
+ Field 54 ALTSP_FRCD
+ Res0 53:51
+ Field 50 EnMPAMSM
+ Field 49 TRAPMPAM0EL1
+ Field 48 TRAPMPAM1EL1
+ Field 47:40 PMG_D
+ Field 39:32 PMG_I
+ Field 31:16 PARTID_D
+ Field 15:0 PARTID_I
+ EndSysreg
+
+ Sysreg MPAMVPM0_EL2 3 4 10 6 0
+ Field 63:48 PhyPARTID3
+ Field 47:32 PhyPARTID2
+ Field 31:16 PhyPARTID1
+ Field 15:0 PhyPARTID0
+ EndSysreg
+
+ Sysreg MPAMVPM1_EL2 3 4 10 6 1
+ Field 63:48 PhyPARTID7
+ Field 47:32 PhyPARTID6
+ Field 31:16 PhyPARTID5
+ Field 15:0 PhyPARTID4
+ EndSysreg
+
+ Sysreg MPAMVPM2_EL2 3 4 10 6 2
+ Field 63:48 PhyPARTID11
+ Field 47:32 PhyPARTID10
+ Field 31:16 PhyPARTID9
+ Field 15:0 PhyPARTID8
+ EndSysreg
+
+ Sysreg MPAMVPM3_EL2 3 4 10 6 3
+ Field 63:48 PhyPARTID15
+ Field 47:32 PhyPARTID14
+ Field 31:16 PhyPARTID13
+ Field 15:0 PhyPARTID12
+ EndSysreg
+
+ Sysreg MPAMVPM4_EL2 3 4 10 6 4
+ Field 63:48 PhyPARTID19
+ Field 47:32 PhyPARTID18
+ Field 31:16 PhyPARTID17
+ Field 15:0 PhyPARTID16
+ EndSysreg
+
+ Sysreg MPAMVPM5_EL2 3 4 10 6 5
+ Field 63:48 PhyPARTID23
+ Field 47:32 PhyPARTID22
+ Field 31:16 PhyPARTID21
+ Field 15:0 PhyPARTID20
+ EndSysreg
+
+ Sysreg MPAMVPM6_EL2 3 4 10 6 6
+ Field 63:48 PhyPARTID27
+ Field 47:32 PhyPARTID26
+ Field 31:16 PhyPARTID25
+ Field 15:0 PhyPARTID24
+ EndSysreg
+
+ Sysreg MPAMVPM7_EL2 3 4 10 6 7
+ Field 63:48 PhyPARTID31
+ Field 47:32 PhyPARTID30
+ Field 31:16 PhyPARTID29
+ Field 15:0 PhyPARTID28
+ EndSysreg
+
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
Field 63:0 ADDR
EndSysreg
+ Sysreg MPAM1_EL12 3 5 10 5 0
+ Fields MPAM1_ELx
+ EndSysreg
+
Sysreg CONTEXTIDR_EL12 3 5 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
Field 12 AMEC0
Field 11 HAFT
Field 10 PTTWI
- Field 9:8 SKL1
- Field 7:6 SKL0
+ Res0 9:6
Field 5 D128
Field 4 AIE
Field 3 POE
Fields PIRx_ELx
EndSysreg
+ Sysreg PIRE0_EL2 3 4 10 2 2
+ Fields PIRx_ELx
+ EndSysreg
+
Sysreg PIR_EL1 3 0 10 2 3
Fields PIRx_ELx
EndSysreg
Fields PIRx_ELx
EndSysreg
+ Sysreg POR_EL2 3 4 10 2 4
+ Fields PIRx_ELx
+ EndSysreg
+
Sysreg POR_EL12 3 5 10 2 4
Fields PIRx_ELx
EndSysreg
Field 0 EN
EndSysreg
+ Sysreg MPAMIDR_EL1 3 0 10 4 4
+ Res0 63:62
+ Field 61 HAS_SDEFLT
+ Field 60 HAS_FORCE_NS
+ Field 59 SP4
+ Field 58 HAS_TIDR
+ Field 57 HAS_ALTSP
+ Res0 56:40
+ Field 39:32 PMG_MAX
+ Res0 31:21
+ Field 20:18 VPMR_MAX
+ Field 17 HAS_HCR
+ Res0 16
+ Field 15:0 PARTID_MAX
+ EndSysreg
+
Sysreg LORID_EL1 3 0 10 4 7
Res0 63:24
Field 23:16 LD
Field 7:0 LR
EndSysreg
+ Sysreg MPAM1_EL1 3 0 10 5 0
+ Field 63 MPAMEN
+ Res0 62:61
+ Field 60 FORCED_NS
+ Res0 59:55
+ Field 54 ALTSP_FRCD
+ Res0 53:48
+ Field 47:40 PMG_D
+ Field 39:32 PMG_I
+ Field 31:16 PARTID_D
+ Field 15:0 PARTID_I
+ EndSysreg
+
+ Sysreg MPAM0_EL1 3 0 10 5 1
+ Res0 63:48
+ Field 47:40 PMG_D
+ Field 39:32 PMG_I
+ Field 31:16 PARTID_D
+ Field 15:0 PARTID_I
+ EndSysreg
+
Sysreg ISR_EL1 3 0 12 1 0
Res0 63:11
Field 10 IS
struct kvmppc_vcore *vc = vcpu->arch.vcore;
hr->pcr = vc->pcr | PCR_MASK;
- hr->dpdes = vc->dpdes;
+ hr->dpdes = vcpu->arch.doorbell_request;
hr->hfscr = vcpu->arch.hfscr;
hr->tb_offset = vc->tb_offset;
hr->dawr0 = vcpu->arch.dawr0;
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- hr->dpdes = vc->dpdes;
+ hr->dpdes = vcpu->arch.doorbell_request;
hr->purr = vcpu->arch.purr;
hr->spurr = vcpu->arch.spurr;
hr->ic = vcpu->arch.ic;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
vc->pcr = hr->pcr | PCR_MASK;
- vc->dpdes = hr->dpdes;
+ vcpu->arch.doorbell_request = hr->dpdes;
vcpu->arch.hfscr = hr->hfscr;
vcpu->arch.dawr0 = hr->dawr0;
vcpu->arch.dawrx0 = hr->dawrx0;
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- vc->dpdes = hr->dpdes;
+ /*
+ * This L2 vCPU might have received a doorbell while H_ENTER_NESTED was being handled.
+ * Make sure we preserve the doorbell if it was either:
+ * a) Sent after H_ENTER_NESTED was called on this vCPU (arch.doorbell_request would be 1)
+ * b) Doorbell was not handled and L2 exited for some other reason (hr->dpdes would be 1)
+ */
+ vcpu->arch.doorbell_request = vcpu->arch.doorbell_request | hr->dpdes;
vcpu->arch.hfscr = hr->hfscr;
vcpu->arch.purr = hr->purr;
vcpu->arch.spurr = hr->spurr;
if (rc == H_SUCCESS) {
unsigned long capabilities = 0;
+ if (cpu_has_feature(CPU_FTR_P11_PVR))
+ capabilities |= H_GUEST_CAP_POWER11;
if (cpu_has_feature(CPU_FTR_ARCH_31))
capabilities |= H_GUEST_CAP_POWER10;
if (cpu_has_feature(CPU_FTR_ARCH_300))
unsigned long n_gpa, gpa, gfn, perm = 0UL;
unsigned int shift, l1_shift, level;
bool writing = !!(dsisr & DSISR_ISSTORE);
- bool kvm_ro = false;
long int ret;
if (!gp->l1_gr_to_hr) {
ea, DSISR_ISSTORE | DSISR_PROTFAULT);
return RESUME_GUEST;
}
- kvm_ro = true;
}
/* 2. Find the host pte for this L1 guest real address */
if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
/* No suitable pte found -> try to insert a mapping */
ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
- writing, kvm_ro, &pte, &level);
+ writing, &pte, &level);
if (ret == -EAGAIN)
return RESUME_GUEST;
else if (ret)
r = 8 | 4 | 2 | 1;
}
break;
- case KVM_CAP_PPC_RMA:
- r = 0;
- break;
case KVM_CAP_PPC_HWRNG:
r = kvmppc_hwrng_present();
break;
#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC: {
- struct fd f;
+ CLASS(fd, f)(cap->args[0]);
struct kvm_device *dev;
r = -EBADF;
- f = fdget(cap->args[0]);
- if (!fd_file(f))
+ if (fd_empty(f))
break;
r = -EPERM;
if (dev)
r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
- fdput(f);
break;
}
#endif
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS: {
- struct fd f;
+ CLASS(fd, f)(cap->args[0]);
struct kvm_device *dev;
r = -EBADF;
- f = fdget(cap->args[0]);
- if (!fd_file(f))
+ if (fd_empty(f))
break;
r = -EPERM;
else
r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
}
-
- fdput(f);
break;
}
#endif /* CONFIG_KVM_XICS */
#ifdef CONFIG_KVM_XIVE
case KVM_CAP_PPC_IRQ_XIVE: {
- struct fd f;
+ CLASS(fd, f)(cap->args[0]);
struct kvm_device *dev;
r = -EBADF;
- f = fdget(cap->args[0]);
- if (!fd_file(f))
+ if (fd_empty(f))
break;
r = -ENXIO;
- if (!xive_enabled()) {
- fdput(f);
+ if (!xive_enabled())
break;
- }
r = -EPERM;
dev = kvm_device_from_filp(fd_file(f));
if (dev)
r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
cap->args[1]);
-
- fdput(f);
break;
}
#endif /* CONFIG_KVM_XIVE */
#ifndef _ASM_RISCV_PERF_EVENT_H
#define _ASM_RISCV_PERF_EVENT_H
+ #ifdef CONFIG_PERF_EVENTS
#include <linux/perf_event.h>
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->sp = current_stack_pointer; \
(regs)->status = SR_PP; \
}
+ #endif
+
#endif /* _ASM_RISCV_PERF_EVENT_H */
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
+ if (perf_guest_state()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+
arch_stack_walk_user(fill_callchain, entry, regs);
}
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
+ if (perf_guest_state()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+
walk_stackframe(NULL, regs, fill_callchain, entry);
}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
- if (perf_guest_state())
- return perf_guest_get_ip();
-
- return instruction_pointer(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
- unsigned int guest_state = perf_guest_state();
- unsigned long misc = 0;
-
- if (guest_state) {
- if (guest_state & PERF_GUEST_USER)
- misc |= PERF_RECORD_MISC_GUEST_USER;
- else
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
- } else {
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
- }
-
- return misc;
-}
#define ECD_MEF 0x08000000
#define ECD_ETOKENF 0x02000000
#define ECD_ECC 0x00200000
+ #define ECD_HMAC 0x00004000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
#define PGM_REGION_FIRST_TRANS 0x39
#define PGM_REGION_SECOND_TRANS 0x3a
#define PGM_REGION_THIRD_TRANS 0x3b
+#define PGM_SECURE_STORAGE_ACCESS 0x3d
+#define PGM_NON_SECURE_STORAGE_ACCESS 0x3e
+#define PGM_SECURE_STORAGE_VIOLATION 0x3f
#define PGM_MONITOR 0x40
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
struct gmap *gmap;
- /* backup location for the currently enabled gmap when scheduled out */
- struct gmap *enabled_gmap;
struct kvm_guestdbg_info_arch guestdbg;
unsigned long pfault_token;
unsigned long pfault_select;
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
+#include <asm/asm.h>
#include <asm/fpu.h>
#include <asm/ap.h>
#include <asm/uv.h>
" lgr 0,%[function]\n"
/* Parameter registers are ignored for "test bit" */
" plo 0,0,0,0(0)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
: [function] "d" (function)
- : "cc", "0");
- return cc == 0;
+ : CC_CLOBBER_LIST("0"));
+ return CC_TRANSFORM(cc) == 0;
}
+ static __always_inline void pfcr_query(u8 (*query)[16])
+ {
+ asm volatile(
+ " lghi 0,0\n"
+ " .insn rsy,0xeb0000000016,0,0,%[query]\n"
+ : [query] "=QS" (*query)
+ :
+ : "cc", "0");
+ }
+
static __always_inline void __sortl_query(u8 (*query)[32])
{
asm volatile(
if (test_facility(151)) /* DFLTCC */
__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
+ if (test_facility(201)) /* PFCR */
+ pfcr_query(&kvm_s390_available_subfunc.pfcr);
+
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
set_kvm_facility(kvm->arch.model.fac_mask, 192);
set_kvm_facility(kvm->arch.model.fac_list, 192);
}
+ if (test_facility(198)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 198);
+ set_kvm_facility(kvm->arch.model.fac_list, 198);
+ }
+ if (test_facility(199)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 199);
+ set_kvm_facility(kvm->arch.model.fac_list, 199);
+ }
r = 0;
} else
r = -EINVAL;
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: host PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- gmap_enable(vcpu->arch.enabled_gmap);
kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
- vcpu->arch.enabled_gmap = gmap_get_enabled();
- gmap_disable(vcpu->arch.enabled_gmap);
}
}
if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
- /* make vcpu_load load the right gmap on the first trigger */
- vcpu->arch.enabled_gmap = vcpu->arch.gmap;
}
static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
}
+ static bool kvm_has_pckmo_hmac(struct kvm *kvm)
+ {
+ /* At least one HMAC subfunction must be present */
+ return kvm_has_pckmo_subfunc(kvm, 118) ||
+ kvm_has_pckmo_subfunc(kvm, 122);
+ }
+
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
/*
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
vcpu->arch.sie_block->eca &= ~ECA_APIE;
- vcpu->arch.sie_block->ecd &= ~ECD_ECC;
+ vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
if (vcpu->kvm->arch.crypto.apie)
vcpu->arch.sie_block->eca |= ECA_APIE;
/* Set up protected key support */
if (vcpu->kvm->arch.crypto.aes_kw) {
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
- /* ecc is also wrapped with AES key */
+ /* ecc/hmac is also wrapped with AES key */
if (kvm_has_pckmo_ecc(vcpu->kvm))
vcpu->arch.sie_block->ecd |= ECD_ECC;
+ if (kvm_has_pckmo_hmac(vcpu->kvm))
+ vcpu->arch.sie_block->ecd |= ECD_HMAC;
}
if (vcpu->kvm->arch.crypto.dea_kw)
return 1;
}
-/**
- * kvm_arch_fault_in_page - fault-in guest page if necessary
- * @vcpu: The corresponding virtual cpu
- * @gpa: Guest physical address
- * @writable: Whether the page should be writable or not
- *
- * Make sure that a guest page has been faulted-in on the host.
- *
- * Return: Zero on success, negative error code otherwise.
- */
-long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
-{
- return gmap_fault(vcpu->arch.gmap, gpa,
- writable ? FAULT_FLAG_WRITE : 0);
-}
-
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
unsigned long token)
{
if (!vcpu->arch.gmap->pfault_enabled)
return false;
- hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
- hva += current->thread.gmap_addr & ~PAGE_MASK;
+ hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
return false;
- return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
vcpu->arch.sie_block->icptcode = 0;
+ current->thread.gmap_int_code = 0;
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
trace_kvm_s390_sie_enter(vcpu, cpuflags);
return 0;
}
-static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
{
struct kvm_s390_pgm_info pgm_info = {
.code = PGM_ADDRESSING,
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
+{
+ unsigned int flags = 0;
+ unsigned long gaddr;
+ int rc = 0;
+
+ gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
+ if (kvm_s390_cur_gmap_fault_is_write())
+ flags = FAULT_FLAG_WRITE;
+
+ switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
+ case 0:
+ vcpu->stat.exit_null++;
+ break;
+ case PGM_NON_SECURE_STORAGE_ACCESS:
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ /*
+ * This is normal operation; a page belonging to a protected
+ * guest has not been imported yet. Try to import the page into
+ * the protected guest.
+ */
+ if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+ break;
+ case PGM_SECURE_STORAGE_ACCESS:
+ case PGM_SECURE_STORAGE_VIOLATION:
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ /*
+ * This can happen after a reboot with asynchronous teardown;
+ * the new guest (normal or protected) will run on top of the
+ * previous protected guest. The old pages need to be destroyed
+ * so the new guest can use them.
+ */
+ if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
+ /*
+ * Either KVM messed up the secure guest mapping or the
+ * same page is mapped into multiple secure guests.
+ *
+ * This exception is only triggered when a guest 2 is
+ * running and can therefore never occur in kernel
+ * context.
+ */
+ pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
+ current->thread.gmap_int_code, current->comm,
+ current->pid);
+ send_sig(SIGSEGV, current, 0);
+ }
+ break;
+ case PGM_PROTECTION:
+ case PGM_SEGMENT_TRANSLATION:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_ASCE_TYPE:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ if (vcpu->arch.gmap->pfault_enabled) {
+ rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
+ if (rc == -EFAULT)
+ return vcpu_post_run_addressing_exception(vcpu);
+ if (rc == -EAGAIN) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ } else {
+ return rc;
+ }
+ }
+ rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
+ if (rc == -EFAULT) {
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = 0x10;
+ return -EREMOTE;
+ }
+ return vcpu_post_run_addressing_exception(vcpu);
+ }
+ break;
+ default:
+ KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ send_sig(SIGSEGV, current, 0);
+ break;
+ }
+ return rc;
+}
+
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
struct mcck_volatile_info *mcck_info;
struct sie_page *sie_page;
+ int rc;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
}
if (vcpu->arch.sie_block->icptcode > 0) {
- int rc = kvm_handle_sie_intercept(vcpu);
+ rc = kvm_handle_sie_intercept(vcpu);
if (rc != -EOPNOTSUPP)
return rc;
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
return -EREMOTE;
- } else if (exit_reason != -EFAULT) {
- vcpu->stat.exit_null++;
- return 0;
- } else if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code =
- current->thread.gmap_addr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- } else if (current->thread.gmap_pfault) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- current->thread.gmap_pfault = 0;
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
}
- return vcpu_post_run_fault_in_sie(vcpu);
+
+ return vcpu_post_run_handle_fault(vcpu);
}
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
}
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs,
- gmap_get_enabled()->asce);
+ vcpu->arch.gmap->asce);
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs,
/* we may only allow it if enabled for guest 2 */
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
(ECB3_AES | ECB3_DEA);
- ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
+ ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd &
+ (ECD_ECC | ECD_HMAC);
if (!ecb3_flags && !ecd_flags)
goto end;
struct page *page;
page = gfn_to_page(kvm, gpa_to_gfn(gpa));
- if (is_error_page(page))
+ if (!page)
return -EINVAL;
*hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
return 0;
/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
{
- kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
+ kvm_release_page_dirty(pfn_to_page(hpa >> PAGE_SHIFT));
/* mark the page always as dirty for migration */
mark_page_dirty(kvm, gpa_to_gfn(gpa));
}
{
int rc;
- if (current->thread.gmap_int_code == PGM_PROTECTION)
+ if ((current->thread.gmap_int_code & PGM_INT_CODE_MASK) == PGM_PROTECTION)
/* we can directly forward all protection exceptions */
return inject_fault(vcpu, PGM_PROTECTION,
- current->thread.gmap_addr, 1);
+ current->thread.gmap_teid.addr * PAGE_SIZE, 1);
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- current->thread.gmap_addr, NULL);
+ current->thread.gmap_teid.addr * PAGE_SIZE, NULL);
if (rc > 0) {
rc = inject_fault(vcpu, rc,
- current->thread.gmap_addr,
- current->thread.gmap_write_flag);
+ current->thread.gmap_teid.addr * PAGE_SIZE,
+ kvm_s390_cur_gmap_fault_is_write());
if (rc >= 0)
- vsie_page->fault_addr = current->thread.gmap_addr;
+ vsie_page->fault_addr = current->thread.gmap_teid.addr * PAGE_SIZE;
}
return rc;
}
* also kick the vSIE.
*/
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
+ current->thread.gmap_int_code = 0;
barrier();
if (!kvm_s390_vcpu_sie_inhibited(vcpu))
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs, gmap_get_enabled()->asce);
+ rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
if (rc > 0)
rc = 0; /* we could still have an icpt */
- else if (rc == -EFAULT)
+ else if (current->thread.gmap_int_code)
return handle_fault(vcpu, vsie_page);
switch (scb_s->icptcode) {
if (!rc)
rc = map_prefix(vcpu, vsie_page);
if (!rc) {
- gmap_enable(vsie_page->gmap);
update_intervention_requests(vsie_page);
rc = do_vsie_run(vcpu, vsie_page);
- gmap_enable(vcpu->arch.gmap);
}
atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */
+ #define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */
+ #define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
-#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
/*
* BUG word(s)
static int __sev_issue_cmd(int fd, int id, void *data, int *error)
{
- struct fd f;
- int ret;
+ CLASS(fd, f)(fd);
- f = fdget(fd);
- if (!fd_file(f))
+ if (fd_empty(f))
return -EBADF;
- ret = sev_issue_cmd_external_user(fd_file(f), id, data, error);
-
- fdput(f);
- return ret;
+ return sev_issue_cmd_external_user(fd_file(f), id, data, error);
}
static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
{
struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_info *src_sev, *cg_cleanup_sev;
- struct fd f = fdget(source_fd);
+ CLASS(fd, f)(source_fd);
struct kvm *source_kvm;
bool charged = false;
int ret;
- if (!fd_file(f))
+ if (fd_empty(f))
return -EBADF;
- if (!file_is_kvm(fd_file(f))) {
- ret = -EBADF;
- goto out_fput;
- }
+ if (!file_is_kvm(fd_file(f)))
+ return -EBADF;
source_kvm = fd_file(f)->private_data;
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
- goto out_fput;
+ return ret;
if (kvm->arch.vm_type != source_kvm->arch.vm_type ||
sev_guest(kvm) || !sev_guest(source_kvm)) {
cg_cleanup_sev->misc_cg = NULL;
out_unlock:
sev_unlock_two_vms(kvm, source_kvm);
-out_fput:
- fdput(f);
return ret;
}
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
{
- struct fd f = fdget(source_fd);
+ CLASS(fd, f)(source_fd);
struct kvm *source_kvm;
struct kvm_sev_info *source_sev, *mirror_sev;
int ret;
- if (!fd_file(f))
+ if (fd_empty(f))
return -EBADF;
- if (!file_is_kvm(fd_file(f))) {
- ret = -EBADF;
- goto e_source_fput;
- }
+ if (!file_is_kvm(fd_file(f)))
+ return -EBADF;
source_kvm = fd_file(f)->private_data;
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
- goto e_source_fput;
+ return ret;
/*
* Mirrors of mirrors should work, but let's not get silly. Also
e_unlock:
sev_unlock_two_vms(kvm, source_kvm);
-e_source_fput:
- fdput(f);
return ret;
}
sev_es_sync_to_ghcb(svm);
- kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
+ kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map);
svm->sev_es.ghcb = NULL;
}
if (VALID_PAGE(svm->sev_es.snp_vmsa_gpa)) {
gfn_t gfn = gpa_to_gfn(svm->sev_es.snp_vmsa_gpa);
struct kvm_memory_slot *slot;
+ struct page *page;
kvm_pfn_t pfn;
slot = gfn_to_memslot(vcpu->kvm, gfn);
* The new VMSA will be private memory guest memory, so
* retrieve the PFN from the gmem backend.
*/
- if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, NULL))
+ if (kvm_gmem_get_pfn(vcpu->kvm, slot, gfn, &pfn, &page, NULL))
return -EINVAL;
/*
* changes then care should be taken to ensure
* svm->sev_es.vmsa is pinned through some other means.
*/
- kvm_release_pfn_clean(pfn);
+ kvm_release_page_clean(page);
}
/*
struct kvm_memory_slot *slot;
struct kvm *kvm = vcpu->kvm;
int order, rmp_level, ret;
+ struct page *page;
bool assigned;
kvm_pfn_t pfn;
gfn_t gfn;
return;
}
- ret = kvm_gmem_get_pfn(kvm, slot, gfn, &pfn, &order);
+ ret = kvm_gmem_get_pfn(kvm, slot, gfn, &pfn, &page, &order);
if (ret) {
pr_warn_ratelimited("SEV: Unexpected RMP fault, no backing page for private GPA 0x%llx\n",
gpa);
out:
trace_kvm_rmp_fault(vcpu, gpa, pfn, error_code, rmp_level, ret);
out_no_trace:
- put_page(pfn_to_page(pfn));
+ kvm_release_page_unused(page);
}
static bool is_pfn_range_shared(kvm_pfn_t start, kvm_pfn_t end)