#include <linux/misc_cgroup.h>
#include <linux/processor.h>
#include <linux/trace_events.h>
+ #include <uapi/linux/sev-guest.h>
#include <asm/pkru.h>
#include <asm/trapnr.h>
sev_decommission(handle);
}
+ /*
+ * This sets up bounce buffers/firmware pages to handle SNP Guest Request
+ * messages (e.g. attestation requests). See "SNP Guest Request" in the GHCB
+ * 2.0 specification for more details.
+ *
+ * Technically, when an SNP Guest Request is issued, the guest will provide its
+ * own request/response pages, which could in theory be passed along directly
+ * to firmware rather than using bounce pages. However, these pages would need
+ * special care:
+ *
+ * - Both pages are from shared guest memory, so they need to be protected
+ * from migration/etc. occurring while firmware reads/writes to them. At a
+ * minimum, this requires elevating the ref counts and potentially needing
+ * an explicit pinning of the memory. This places additional restrictions
+ * on what type of memory backends userspace can use for shared guest
+ * memory since there is some reliance on using refcounted pages.
+ *
+ * - The response page needs to be switched to Firmware-owned[1] state
+ * before the firmware can write to it, which can lead to potential
+ * host RMP #PFs if the guest is misbehaved and hands the host a
+ * guest page that KVM might write to for other reasons (e.g. virtio
+ * buffers/etc.).
+ *
+ * Both of these issues can be avoided completely by using separately-allocated
+ * bounce pages for both the request/response pages and passing those to
+ * firmware instead. So that's what is being set up here.
+ *
+ * Guest requests rely on message sequence numbers to ensure requests are
+ * issued to firmware in the order the guest issues them, so concurrent guest
+ * requests generally shouldn't happen. But a misbehaved guest could issue
+ * concurrent guest requests in theory, so a mutex is used to serialize
+ * access to the bounce buffers.
+ *
+ * [1] See the "Page States" section of the SEV-SNP Firmware ABI for more
+ * details on Firmware-owned pages, along with "RMP and VMPL Access Checks"
+ * in the APM for details on the related RMP restrictions.
+ */
+ static int snp_guest_req_init(struct kvm *kvm)
+ {
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
+ struct page *req_page;
+
+ req_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!req_page)
+ return -ENOMEM;
+
+ sev->guest_resp_buf = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!sev->guest_resp_buf) {
+ __free_page(req_page);
+ return -EIO;
+ }
+
+ sev->guest_req_buf = page_address(req_page);
+ mutex_init(&sev->guest_req_mutex);
+
+ return 0;
+ }
+
+ static void snp_guest_req_cleanup(struct kvm *kvm)
+ {
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
+
+ if (sev->guest_resp_buf)
+ snp_free_firmware_page(sev->guest_resp_buf);
+
+ if (sev->guest_req_buf)
+ __free_page(virt_to_page(sev->guest_req_buf));
+
+ sev->guest_req_buf = NULL;
+ sev->guest_resp_buf = NULL;
+ }
+
static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct kvm_sev_init *data,
unsigned long vm_type)
if (ret)
goto e_free;
+ /* This needs to happen after SEV/SNP firmware initialization. */
+ if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm))
+ goto e_free;
+
INIT_LIST_HEAD(&sev->regions_list);
INIT_LIST_HEAD(&sev->mirror_vms);
sev->need_init = false;
*/
fpstate_set_confidential(&vcpu->arch.guest_fpu);
vcpu->arch.guest_state_protected = true;
+
+ /*
+ * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
+ * only after setting guest_state_protected because KVM_SET_MSRS allows
+ * dynamic toggling of LBRV (for performance reason) on write access to
+ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
+ */
+ svm_enable_lbrv(vcpu);
return 0;
}
}
svm->vcpu.arch.guest_state_protected = true;
+ /*
+ * SEV-ES (and thus SNP) guest mandates LBR Virtualization to
+ * be _always_ ON. Enable it only after setting
+ * guest_state_protected because KVM_SET_MSRS allows dynamic
+ * toggling of LBRV (for performance reason) on write access to
+ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
+ */
+ svm_enable_lbrv(vcpu);
}
return 0;
}
if (sev_snp_guest(kvm)) {
+ snp_guest_req_cleanup(kvm);
+
/*
* Decomission handles unbinding of the ASID. If it fails for
* some unexpected reason, just leak the ASID.
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
goto out;
+ if (!lbrv) {
+ WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
+ "LBRV must be present for SEV-ES support");
+ goto out;
+ }
+
/* Has the system been allocated ASIDs for SEV-ES? */
if (min_sev_asid == 1)
goto out;
if (!sev_snp_guest(vcpu->kvm) || !kvm_ghcb_sw_scratch_is_valid(svm))
goto vmgexit_err;
break;
+ case SVM_VMGEXIT_GUEST_REQUEST:
+ case SVM_VMGEXIT_EXT_GUEST_REQUEST:
+ if (!sev_snp_guest(vcpu->kvm) ||
+ !PAGE_ALIGNED(control->exit_info_1) ||
+ !PAGE_ALIGNED(control->exit_info_2) ||
+ control->exit_info_1 == control->exit_info_2)
+ goto vmgexit_err;
+ break;
default:
reason = GHCB_ERR_INVALID_EVENT;
goto vmgexit_err;
return ret;
}
+ static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
+ {
+ struct sev_data_snp_guest_request data = {0};
+ struct kvm *kvm = svm->vcpu.kvm;
+ struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
+ sev_ret_code fw_err = 0;
+ int ret;
+
+ if (!sev_snp_guest(kvm))
+ return -EINVAL;
+
+ mutex_lock(&sev->guest_req_mutex);
+
+ if (kvm_read_guest(kvm, req_gpa, sev->guest_req_buf, PAGE_SIZE)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ data.gctx_paddr = __psp_pa(sev->snp_context);
+ data.req_paddr = __psp_pa(sev->guest_req_buf);
+ data.res_paddr = __psp_pa(sev->guest_resp_buf);
+
+ /*
+ * Firmware failures are propagated on to guest, but any other failure
+ * condition along the way should be reported to userspace. E.g. if
+ * the PSP is dead and commands are timing out.
+ */
+ ret = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &fw_err);
+ if (ret && !fw_err)
+ goto out_unlock;
+
+ if (kvm_write_guest(kvm, resp_gpa, sev->guest_resp_buf, PAGE_SIZE)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(0, fw_err));
+
+ ret = 1; /* resume guest */
+
+ out_unlock:
+ mutex_unlock(&sev->guest_req_mutex);
+ return ret;
+ }
+
+ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
+ {
+ struct kvm *kvm = svm->vcpu.kvm;
+ u8 msg_type;
+
+ if (!sev_snp_guest(kvm))
+ return -EINVAL;
+
+ if (kvm_read_guest(kvm, req_gpa + offsetof(struct snp_guest_msg_hdr, msg_type),
+ &msg_type, 1))
+ return -EIO;
+
+ /*
+ * As per GHCB spec, requests of type MSG_REPORT_REQ also allow for
+ * additional certificate data to be provided alongside the attestation
+ * report via the guest-provided data pages indicated by RAX/RBX. The
+ * certificate data is optional and requires additional KVM enablement
+ * to provide an interface for userspace to provide it, but KVM still
+ * needs to be able to handle extended guest requests either way. So
+ * provide a stub implementation that will always return an empty
+ * certificate table in the guest-provided data pages.
+ */
+ if (msg_type == SNP_MSG_REPORT_REQ) {
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ u64 data_npages;
+ gpa_t data_gpa;
+
+ if (!kvm_ghcb_rax_is_valid(svm) || !kvm_ghcb_rbx_is_valid(svm))
+ goto request_invalid;
+
+ data_gpa = vcpu->arch.regs[VCPU_REGS_RAX];
+ data_npages = vcpu->arch.regs[VCPU_REGS_RBX];
+
+ if (!PAGE_ALIGNED(data_gpa))
+ goto request_invalid;
+
+ /*
+ * As per GHCB spec (see "SNP Extended Guest Request"), the
+ * certificate table is terminated by 24-bytes of zeroes.
+ */
+ if (data_npages && kvm_clear_guest(kvm, data_gpa, 24))
+ return -EIO;
+ }
+
+ return snp_handle_guest_req(svm, req_gpa, resp_gpa);
+
+ request_invalid:
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
+ return 1; /* resume guest */
+ }
+
static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
ret = 1;
break;
+ case SVM_VMGEXIT_GUEST_REQUEST:
+ ret = snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2);
+ break;
+ case SVM_VMGEXIT_EXT_GUEST_REQUEST:
+ ret = snp_handle_ext_guest_req(svm, control->exit_info_1, control->exit_info_2);
+ break;
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
/*
* An SEV-ES guest requires a VMSA area that is a separate from the
/* Clear intercepts on selected MSRs */
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}
void sev_init_vmcb(struct vcpu_svm *svm)
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
* by common SVM code).
*/
- hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ hostsa->xcr0 = kvm_host.xcr0;
hostsa->pkru = read_pkru();
- hostsa->xss = host_xss;
+ hostsa->xss = kvm_host.xss;
/*
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
}
}
-struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
+struct page *snp_safe_alloc_page_node(int node, gfp_t gfp)
{
unsigned long pfn;
struct page *p;
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ return alloc_pages_node(node, gfp | __GFP_ZERO, 0);
/*
* Allocate an SNP-safe page to workaround the SNP erratum where
* Allocate one extra page, choose a page which is not
* 2MB-aligned, and free the other.
*/
- p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+ p = alloc_pages_node(node, gfp | __GFP_ZERO, 1);
if (!p)
return NULL;
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 47
+#define MAX_DIRECT_ACCESS_MSRS 48
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern bool intercept_smi;
extern bool x2avic_enabled;
extern bool vnmi;
+extern int lbrv;
/*
* Clean bits in VMCB.
struct misc_cg *misc_cg; /* For misc cgroup accounting */
atomic_t migration_in_progress;
void *snp_context; /* SNP guest context page */
+ void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */
+ void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */
+ struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
};
struct kvm_svm {
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
void svm_vcpu_free_msrpm(u32 *msrpm);
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
/* avic.c */
#define AVIC_REQUIRED_APICV_INHIBITS \
( \
- BIT(APICV_INHIBIT_REASON_DISABLE) | \
+ BIT(APICV_INHIBIT_REASON_DISABLED) | \
BIT(APICV_INHIBIT_REASON_ABSENT) | \
BIT(APICV_INHIBIT_REASON_HYPERV) | \
BIT(APICV_INHIBIT_REASON_NESTED) | \
int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
/* These symbols are used in common code and are stubbed below. */
-struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
+
+struct page *snp_safe_alloc_page_node(int node, gfp_t gfp);
+static inline struct page *snp_safe_alloc_page(void)
+{
+ return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT);
+}
+
void sev_free_vcpu(struct kvm_vcpu *vcpu);
void sev_vm_destroy(struct kvm *kvm);
void __init sev_set_cpu_caps(void);
void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn);
#else
-static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) {
- return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp)
+{
+ return alloc_pages_node(node, gfp | __GFP_ZERO, 0);
+}
+
+static inline struct page *snp_safe_alloc_page(void)
+{
+ return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT);
}
static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {}
#include <asm/svm.h>
#include <asm/sev.h>
- #include "sev-guest.h"
-
#define DEVICE_NAME "sev-guest"
#define AAD_LEN 48
#define MSG_HDR_VER 1
*/
struct snp_guest_msg secret_request, secret_response;
- struct snp_secrets_page_layout *layout;
+ struct snp_secrets_page *secrets;
struct snp_req_data input;
union {
struct snp_report_req report;
.unlocked_ioctl = snp_guest_ioctl,
};
-static u8 *get_vmpck(int id, struct snp_secrets_page_layout *layout, u32 **seqno)
+static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno)
{
u8 *key = NULL;
switch (id) {
case 0:
- *seqno = &layout->os_area.msg_seqno_0;
- key = layout->vmpck0;
+ *seqno = &secrets->os_area.msg_seqno_0;
+ key = secrets->vmpck0;
break;
case 1:
- *seqno = &layout->os_area.msg_seqno_1;
- key = layout->vmpck1;
+ *seqno = &secrets->os_area.msg_seqno_1;
+ key = secrets->vmpck1;
break;
case 2:
- *seqno = &layout->os_area.msg_seqno_2;
- key = layout->vmpck2;
+ *seqno = &secrets->os_area.msg_seqno_2;
+ key = secrets->vmpck2;
break;
case 3:
- *seqno = &layout->os_area.msg_seqno_3;
- key = layout->vmpck3;
+ *seqno = &secrets->os_area.msg_seqno_3;
+ key = secrets->vmpck3;
break;
default:
break;
static int __init sev_guest_probe(struct platform_device *pdev)
{
- struct snp_secrets_page_layout *layout;
struct sev_guest_platform_data *data;
+ struct snp_secrets_page *secrets;
struct device *dev = &pdev->dev;
struct snp_guest_dev *snp_dev;
struct miscdevice *misc;
if (!mapping)
return -ENODEV;
- layout = (__force void *)mapping;
+ secrets = (__force void *)mapping;
ret = -ENOMEM;
snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL);
goto e_unmap;
ret = -EINVAL;
- snp_dev->vmpck = get_vmpck(vmpck_id, layout, &snp_dev->os_area_msg_seqno);
+ snp_dev->vmpck = get_vmpck(vmpck_id, secrets, &snp_dev->os_area_msg_seqno);
if (!snp_dev->vmpck) {
dev_err(dev, "invalid vmpck id %d\n", vmpck_id);
goto e_unmap;
platform_set_drvdata(pdev, snp_dev);
snp_dev->dev = dev;
- snp_dev->layout = layout;
+ snp_dev->secrets = secrets;
/* Allocate the shared page used for the request and response message. */
snp_dev->request = alloc_shared_pages(dev, sizeof(struct snp_guest_msg));