#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "qapi/error.h"
#include "sysemu/hw_accel.h"
#include "sysemu/runstate.h"
#include "cpu-models.h"
#include "trace.h"
#include "kvm_ppc.h"
+#include "hw/ppc/fdt.h"
#include "hw/ppc/spapr_ovec.h"
#include "mmu-book3s-v3.h"
#include "hw/mem/memory-device.h"
if (!is_ram_address(spapr, dst) || (dst & ~TARGET_PAGE_MASK) != 0) {
return H_PARAMETER;
}
- pdst = cpu_physical_memory_map(dst, &len, 1);
+ pdst = cpu_physical_memory_map(dst, &len, true);
if (!pdst || len != TARGET_PAGE_SIZE) {
return H_PARAMETER;
}
ret = H_PARAMETER;
goto unmap_out;
}
- psrc = cpu_physical_memory_map(src, &len, 0);
+ psrc = cpu_physical_memory_map(src, &len, false);
if (!psrc || len != TARGET_PAGE_SIZE) {
ret = H_PARAMETER;
goto unmap_out;
#define FLAGS_DEREGISTER_DTL 0x0000c00000000000ULL
#define FLAGS_DEREGISTER_SLBSHADOW 0x0000e00000000000ULL
-#define VPA_MIN_SIZE 640
-#define VPA_SIZE_OFFSET 0x4
-#define VPA_SHARED_PROC_OFFSET 0x9
-#define VPA_SHARED_PROC_VAL 0x2
-
static target_ulong register_vpa(PowerPCCPU *cpu, target_ulong vpa)
{
CPUState *cs = CPU(cpu);
{
CPUPPCState *env = &cpu->env;
CPUState *cs = CPU(cpu);
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
env->msr |= (1ULL << MSR_EE);
hreg_compute_hflags(env);
+
+ if (spapr_cpu->prod) {
+ spapr_cpu->prod = false;
+ return H_SUCCESS;
+ }
+
if (!cpu_has_work(cs)) {
cs->halted = 1;
cs->exception_index = EXCP_HLT;
cs->exit_request = 1;
}
+
+ return H_SUCCESS;
+}
+
+/*
+ * Confer to self, aka join. Cede could use the same pattern as well, if
+ * EXCP_HLT can be changed to ECXP_HALTED.
+ */
+static target_ulong h_confer_self(PowerPCCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ if (spapr_cpu->prod) {
+ spapr_cpu->prod = false;
+ return H_SUCCESS;
+ }
+ cs->halted = 1;
+ cs->exception_index = EXCP_HALTED;
+ cs->exit_request = 1;
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_join(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUPPCState *env = &cpu->env;
+ CPUState *cs;
+ bool last_unjoined = true;
+
+ if (env->msr & (1ULL << MSR_EE)) {
+ return H_BAD_MODE;
+ }
+
+ /*
+ * Must not join the last CPU running. Interestingly, no such restriction
+ * for H_CONFER-to-self, but that is probably not intended to be used
+ * when H_JOIN is available.
+ */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *c = POWERPC_CPU(cs);
+ CPUPPCState *e = &c->env;
+ if (c == cpu) {
+ continue;
+ }
+
+ /* Don't have a way to indicate joined, so use halted && MSR[EE]=0 */
+ if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
+ last_unjoined = false;
+ break;
+ }
+ }
+ if (last_unjoined) {
+ return H_CONTINUE;
+ }
+
+ return h_confer_self(cpu);
+}
+
+static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_long target = args[0];
+ uint32_t dispatch = args[1];
+ CPUState *cs = CPU(cpu);
+ SpaprCpuState *spapr_cpu;
+
+ /*
+ * -1 means confer to all other CPUs without dispatch counter check,
+ * otherwise it's a targeted confer.
+ */
+ if (target != -1) {
+ PowerPCCPU *target_cpu = spapr_find_cpu(target);
+ uint32_t target_dispatch;
+
+ if (!target_cpu) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * target == self is a special case, we wait until prodded, without
+ * dispatch counter check.
+ */
+ if (cpu == target_cpu) {
+ return h_confer_self(cpu);
+ }
+
+ spapr_cpu = spapr_cpu_state(target_cpu);
+ if (!spapr_cpu->vpa_addr || ((dispatch & 1) == 0)) {
+ return H_SUCCESS;
+ }
+
+ target_dispatch = ldl_be_phys(cs->as,
+ spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER);
+ if (target_dispatch != dispatch) {
+ return H_SUCCESS;
+ }
+
+ /*
+ * The targeted confer does not do anything special beyond yielding
+ * the current vCPU, but even this should be better than nothing.
+ * At least for single-threaded tcg, it gives the target a chance to
+ * run before we run again. Multi-threaded tcg does not really do
+ * anything with EXCP_YIELD yet.
+ */
+ }
+
+ cs->exception_index = EXCP_YIELD;
+ cs->exit_request = 1;
+ cpu_loop_exit(cs);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_prod(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_long target = args[0];
+ PowerPCCPU *tcpu;
+ CPUState *cs;
+ SpaprCpuState *spapr_cpu;
+
+ tcpu = spapr_find_cpu(target);
+ cs = CPU(tcpu);
+ if (!cs) {
+ return H_PARAMETER;
+ }
+
+ spapr_cpu = spapr_cpu_state(tcpu);
+ spapr_cpu->prod = true;
+ cs->halted = 0;
+ qemu_cpu_kick(cs);
+
return H_SUCCESS;
}
spapr_free_hpt(spapr);
} else if (!(patbe_new & PATE1_GR)) {
/* RADIX->HASH || NOTHING->HASH : Allocate HPT */
- spapr_setup_hpt_and_vrma(spapr);
+ spapr_setup_hpt(spapr);
}
return;
}
return best_compat;
}
-static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
- SpaprMachineState *spapr,
- target_ulong opcode,
- target_ulong *args)
+static void spapr_handle_transient_dev_before_cas(SpaprMachineState *spapr)
{
- /* Working address in data buffer */
- target_ulong addr = ppc64_phys_to_real(args[0]);
- target_ulong ov_table;
+ Object *drc_container;
+ ObjectProperty *prop;
+ ObjectPropertyIterator iter;
+
+ drc_container = container_get(object_get_root(), "/dr-connector");
+ object_property_iter_init(&iter, drc_container);
+ while ((prop = object_property_iter_next(&iter))) {
+ SpaprDrc *drc;
+
+ if (!strstart(prop->type, "link<", NULL)) {
+ continue;
+ }
+ drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container,
+ prop->name,
+ &error_abort));
+
+ if (spapr_drc_transient(drc)) {
+ spapr_drc_reset(drc);
+ }
+ }
+
+ spapr_clear_pending_hotplug_events(spapr);
+}
+
+target_ulong do_client_architecture_support(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong vec,
+ target_ulong fdt_bufsize)
+{
+ target_ulong ov_table; /* Working address in data buffer */
uint32_t cas_pvr;
- SpaprOptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates;
+ SpaprOptionVector *ov1_guest, *ov5_guest;
bool guest_radix;
Error *local_err = NULL;
bool raw_mode_supported = false;
bool guest_xive;
+ CPUState *cs;
+ void *fdt;
+
+ /* CAS is supposed to be called early when only the boot vCPU is active. */
+ CPU_FOREACH(cs) {
+ if (cs == CPU(cpu)) {
+ continue;
+ }
+ if (!cs->halted) {
+ warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
+ return H_MULTI_THREADS_ACTIVE;
+ }
+ }
- cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
+ cas_pvr = cas_check_pvr(spapr, cpu, &vec, &raw_mode_supported, &local_err);
if (local_err) {
error_report_err(local_err);
return H_HARDWARE;
}
/* For the future use: here @ov_table points to the first option vector */
- ov_table = addr;
+ ov_table = vec;
ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
+ if (!ov1_guest) {
+ warn_report("guest didn't provide option vector 1");
+ return H_PARAMETER;
+ }
ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+ if (!ov5_guest) {
+ spapr_ovec_cleanup(ov1_guest);
+ warn_report("guest didn't provide option vector 5");
+ return H_PARAMETER;
+ }
if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
error_report("guest requested hash and radix MMU, which is invalid.");
exit(EXIT_FAILURE);
exit(EXIT_FAILURE);
}
- /* The radix/hash bit in byte 24 requires special handling: */
guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
- spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT);
* by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
* to worry about this for now.
*/
- ov5_cas_old = spapr_ovec_clone(spapr->ov5_cas);
-
- /* also clear the radix/hash bit from the current ov5_cas bits to
- * be in sync with the newly ov5 bits. Else the radix bit will be
- * seen as being removed and this will generate a reset loop
- */
- spapr_ovec_clear(ov5_cas_old, OV5_MMU_RADIX_300);
/* full range of negotiated ov5 capabilities */
spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
spapr_ovec_cleanup(ov5_guest);
- /* capabilities that have been added since CAS-generated guest reset.
- * if capabilities have since been removed, generate another reset
- */
- ov5_updates = spapr_ovec_new();
- spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
- ov5_cas_old, spapr->ov5_cas);
- /* Now that processing is finished, set the radix/hash bit for the
- * guest if it requested a valid mode; otherwise terminate the boot. */
+
if (guest_radix) {
if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
error_report("Guest requested unavailable MMU mode (radix).");
exit(EXIT_FAILURE);
}
- spapr_ovec_set(spapr->ov5_cas, OV5_MMU_RADIX_300);
} else {
if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
&& !kvmppc_has_cap_mmu_hash_v3()) {
exit(EXIT_FAILURE);
}
}
- spapr->cas_legacy_guest_workaround = !spapr_ovec_test(ov1_guest,
- OV1_PPC_3_00);
- if (!spapr->cas_reboot) {
- /* If spapr_machine_reset() did not set up a HPT but one is necessary
- * (because the guest isn't going to use radix) then set it up here. */
- if ((spapr->patb_entry & PATE1_GR) && !guest_radix) {
- /* legacy hash or new hash: */
- spapr_setup_hpt_and_vrma(spapr);
- }
- spapr->cas_reboot =
- (spapr_h_cas_compose_response(spapr, args[1], args[2],
- ov5_updates) != 0);
- }
+ spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
+ spapr_ovec_cleanup(ov1_guest);
/*
- * Ensure the guest asks for an interrupt mode we support; otherwise
- * terminate the boot.
+ * Ensure the guest asks for an interrupt mode we support;
+ * otherwise terminate the boot.
*/
if (guest_xive) {
- if (spapr->irq->ov5 == SPAPR_OV5_XIVE_LEGACY) {
+ if (!spapr->irq->xive) {
error_report(
"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
exit(EXIT_FAILURE);
}
} else {
- if (spapr->irq->ov5 == SPAPR_OV5_XIVE_EXPLOIT) {
+ if (!spapr->irq->xics) {
error_report(
"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
exit(EXIT_FAILURE);
}
}
+ spapr_irq_update_active_intc(spapr);
+
+ spapr_handle_transient_dev_before_cas(spapr);
+
/*
- * Generate a machine reset when we have an update of the
- * interrupt mode. Only required when the machine supports both
- * modes.
+ * If spapr_machine_reset() did not set up a HPT but one is necessary
+ * (because the guest isn't going to use radix) then set it up here.
*/
- if (!spapr->cas_reboot) {
- spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
- && spapr->irq->ov5 & SPAPR_OV5_XIVE_BOTH;
+ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) {
+ /* legacy hash or new hash: */
+ spapr_setup_hpt(spapr);
}
- spapr_ovec_cleanup(ov5_updates);
+ fdt = spapr_build_fdt(spapr, false, fdt_bufsize);
- if (spapr->cas_reboot) {
- qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
- }
+ g_free(spapr->fdt_blob);
+ spapr->fdt_size = fdt_totalsize(fdt);
+ spapr->fdt_initial_size = spapr->fdt_size;
+ spapr->fdt_blob = fdt;
return H_SUCCESS;
}
+static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong vec = ppc64_phys_to_real(args[0]);
+ target_ulong fdt_buf = args[1];
+ target_ulong fdt_bufsize = args[2];
+ target_ulong ret;
+ SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 };
+
+ if (fdt_bufsize < sizeof(hdr)) {
+ error_report("SLOF provided insufficient CAS buffer "
+ TARGET_FMT_lu " (min: %zu)", fdt_bufsize, sizeof(hdr));
+ exit(EXIT_FAILURE);
+ }
+
+ fdt_bufsize -= sizeof(hdr);
+
+ ret = do_client_architecture_support(cpu, spapr, vec, fdt_bufsize);
+ if (ret == H_SUCCESS) {
+ _FDT((fdt_pack(spapr->fdt_blob)));
+ spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+ spapr->fdt_initial_size = spapr->fdt_size;
+
+ cpu_physical_memory_write(fdt_buf, &hdr, sizeof(hdr));
+ cpu_physical_memory_write(fdt_buf + sizeof(hdr), spapr->fdt_blob,
+ spapr->fdt_size);
+ trace_spapr_cas_continue(spapr->fdt_size + sizeof(hdr));
+ }
+
+ return ret;
+}
+
static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong opcode,
static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
+static spapr_hcall_fn svm_hypercall_table[(SVM_HCALL_MAX - SVM_HCALL_BASE) / 4 + 1];
void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
{
assert((opcode & 0x3) == 0);
slot = &papr_hypercall_table[opcode / 4];
+ } else if (opcode >= SVM_HCALL_BASE && opcode <= SVM_HCALL_MAX) {
+ /* we only have SVM-related hcall numbers assigned in multiples of 4 */
+ assert((opcode & 0x3) == 0);
+
+ slot = &svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4];
} else {
assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX));
&& ((opcode & 0x3) == 0)) {
spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
+ if (fn) {
+ return fn(cpu, spapr, opcode, args);
+ }
+ } else if ((opcode >= SVM_HCALL_BASE) &&
+ (opcode <= SVM_HCALL_MAX)) {
+ spapr_hcall_fn fn = svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4];
+
if (fn) {
return fn(cpu, spapr, opcode, args);
}
/* hcall-splpar */
spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
spapr_register_hypercall(H_CEDE, h_cede);
+ spapr_register_hypercall(H_CONFER, h_confer);
+ spapr_register_hypercall(H_PROD, h_prod);
+
+ /* hcall-join */
+ spapr_register_hypercall(H_JOIN, h_join);
+
spapr_register_hypercall(H_SIGNAL_SYS_RESET, h_signal_sys_reset);
/* processor register resource access h-calls */