]> Git Repo - qemu.git/blob - target/i386/whpx-all.c
Merge remote-tracking branch 'remotes/ehabkost/tags/machine-next-pull-request' into...
[qemu.git] / target / i386 / whpx-all.c
1 /*
2  * QEMU Windows Hypervisor Platform accelerator (WHPX)
3  *
4  * Copyright Microsoft Corp. 2017
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10
11 #include "qemu/osdep.h"
12 #include "cpu.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "sysemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "qemu/error-report.h"
23 #include "qapi/error.h"
24 #include "migration/blocker.h"
25 #include "whp-dispatch.h"
26
27 #include <WinHvPlatform.h>
28 #include <WinHvEmulation.h>
29
30 struct whpx_state {
31     uint64_t mem_quota;
32     WHV_PARTITION_HANDLE partition;
33 };
34
35 static const WHV_REGISTER_NAME whpx_register_names[] = {
36
37     /* X64 General purpose registers */
38     WHvX64RegisterRax,
39     WHvX64RegisterRcx,
40     WHvX64RegisterRdx,
41     WHvX64RegisterRbx,
42     WHvX64RegisterRsp,
43     WHvX64RegisterRbp,
44     WHvX64RegisterRsi,
45     WHvX64RegisterRdi,
46     WHvX64RegisterR8,
47     WHvX64RegisterR9,
48     WHvX64RegisterR10,
49     WHvX64RegisterR11,
50     WHvX64RegisterR12,
51     WHvX64RegisterR13,
52     WHvX64RegisterR14,
53     WHvX64RegisterR15,
54     WHvX64RegisterRip,
55     WHvX64RegisterRflags,
56
57     /* X64 Segment registers */
58     WHvX64RegisterEs,
59     WHvX64RegisterCs,
60     WHvX64RegisterSs,
61     WHvX64RegisterDs,
62     WHvX64RegisterFs,
63     WHvX64RegisterGs,
64     WHvX64RegisterLdtr,
65     WHvX64RegisterTr,
66
67     /* X64 Table registers */
68     WHvX64RegisterIdtr,
69     WHvX64RegisterGdtr,
70
71     /* X64 Control Registers */
72     WHvX64RegisterCr0,
73     WHvX64RegisterCr2,
74     WHvX64RegisterCr3,
75     WHvX64RegisterCr4,
76     WHvX64RegisterCr8,
77
78     /* X64 Debug Registers */
79     /*
80      * WHvX64RegisterDr0,
81      * WHvX64RegisterDr1,
82      * WHvX64RegisterDr2,
83      * WHvX64RegisterDr3,
84      * WHvX64RegisterDr6,
85      * WHvX64RegisterDr7,
86      */
87
88     /* X64 Floating Point and Vector Registers */
89     WHvX64RegisterXmm0,
90     WHvX64RegisterXmm1,
91     WHvX64RegisterXmm2,
92     WHvX64RegisterXmm3,
93     WHvX64RegisterXmm4,
94     WHvX64RegisterXmm5,
95     WHvX64RegisterXmm6,
96     WHvX64RegisterXmm7,
97     WHvX64RegisterXmm8,
98     WHvX64RegisterXmm9,
99     WHvX64RegisterXmm10,
100     WHvX64RegisterXmm11,
101     WHvX64RegisterXmm12,
102     WHvX64RegisterXmm13,
103     WHvX64RegisterXmm14,
104     WHvX64RegisterXmm15,
105     WHvX64RegisterFpMmx0,
106     WHvX64RegisterFpMmx1,
107     WHvX64RegisterFpMmx2,
108     WHvX64RegisterFpMmx3,
109     WHvX64RegisterFpMmx4,
110     WHvX64RegisterFpMmx5,
111     WHvX64RegisterFpMmx6,
112     WHvX64RegisterFpMmx7,
113     WHvX64RegisterFpControlStatus,
114     WHvX64RegisterXmmControlStatus,
115
116     /* X64 MSRs */
117     WHvX64RegisterEfer,
118 #ifdef TARGET_X86_64
119     WHvX64RegisterKernelGsBase,
120 #endif
121     WHvX64RegisterApicBase,
122     /* WHvX64RegisterPat, */
123     WHvX64RegisterSysenterCs,
124     WHvX64RegisterSysenterEip,
125     WHvX64RegisterSysenterEsp,
126     WHvX64RegisterStar,
127 #ifdef TARGET_X86_64
128     WHvX64RegisterLstar,
129     WHvX64RegisterCstar,
130     WHvX64RegisterSfmask,
131 #endif
132
133     /* Interrupt / Event Registers */
134     /*
135      * WHvRegisterPendingInterruption,
136      * WHvRegisterInterruptState,
137      * WHvRegisterPendingEvent0,
138      * WHvRegisterPendingEvent1
139      * WHvX64RegisterDeliverabilityNotifications,
140      */
141 };
142
143 struct whpx_register_set {
144     WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
145 };
146
147 struct whpx_vcpu {
148     WHV_EMULATOR_HANDLE emulator;
149     bool window_registered;
150     bool interruptable;
151     uint64_t tpr;
152     uint64_t apic_base;
153     bool interruption_pending;
154
155     /* Must be the last field as it may have a tail */
156     WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
157 };
158
159 static bool whpx_allowed;
160 static bool whp_dispatch_initialized;
161 static HMODULE hWinHvPlatform, hWinHvEmulation;
162
163 struct whpx_state whpx_global;
164 struct WHPDispatch whp_dispatch;
165
166
167 /*
168  * VP support
169  */
170
171 static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
172 {
173     return (struct whpx_vcpu *)cpu->hax_vcpu;
174 }
175
176 static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
177                                              int r86)
178 {
179     WHV_X64_SEGMENT_REGISTER hs;
180     unsigned flags = qs->flags;
181
182     hs.Base = qs->base;
183     hs.Limit = qs->limit;
184     hs.Selector = qs->selector;
185
186     if (v86) {
187         hs.Attributes = 0;
188         hs.SegmentType = 3;
189         hs.Present = 1;
190         hs.DescriptorPrivilegeLevel = 3;
191         hs.NonSystemSegment = 1;
192
193     } else {
194         hs.Attributes = (flags >> DESC_TYPE_SHIFT);
195
196         if (r86) {
197             /* hs.Base &= 0xfffff; */
198         }
199     }
200
201     return hs;
202 }
203
204 static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
205 {
206     SegmentCache qs;
207
208     qs.base = hs->Base;
209     qs.limit = hs->Limit;
210     qs.selector = hs->Selector;
211
212     qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
213
214     return qs;
215 }
216
217 static int whpx_set_tsc(CPUState *cpu)
218 {
219     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
220     WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
221     WHV_REGISTER_VALUE tsc_val;
222     HRESULT hr;
223     struct whpx_state *whpx = &whpx_global;
224
225     /*
226      * Suspend the partition prior to setting the TSC to reduce the variance
227      * in TSC across vCPUs. When the first vCPU runs post suspend, the
228      * partition is automatically resumed.
229      */
230     if (whp_dispatch.WHvSuspendPartitionTime) {
231
232         /*
233          * Unable to suspend partition while setting TSC is not a fatal
234          * error. It just increases the likelihood of TSC variance between
235          * vCPUs and some guest OS are able to handle that just fine.
236          */
237         hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
238         if (FAILED(hr)) {
239             warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
240         }
241     }
242
243     tsc_val.Reg64 = env->tsc;
244     hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
245         whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
246     if (FAILED(hr)) {
247         error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
248         return -1;
249     }
250
251     return 0;
252 }
253
254 static void whpx_set_registers(CPUState *cpu, int level)
255 {
256     struct whpx_state *whpx = &whpx_global;
257     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
258     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
259     X86CPU *x86_cpu = X86_CPU(cpu);
260     struct whpx_register_set vcxt;
261     HRESULT hr;
262     int idx;
263     int idx_next;
264     int i;
265     int v86, r86;
266
267     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
268
269     /*
270      * Following MSRs have side effects on the guest or are too heavy for
271      * runtime. Limit them to full state update.
272      */
273     if (level >= WHPX_SET_RESET_STATE) {
274         whpx_set_tsc(cpu);
275     }
276
277     memset(&vcxt, 0, sizeof(struct whpx_register_set));
278
279     v86 = (env->eflags & VM_MASK);
280     r86 = !(env->cr[0] & CR0_PE_MASK);
281
282     vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
283     vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
284
285     idx = 0;
286
287     /* Indexes for first 16 registers match between HV and QEMU definitions */
288     idx_next = 16;
289     for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
290         vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
291     }
292     idx = idx_next;
293
294     /* Same goes for RIP and RFLAGS */
295     assert(whpx_register_names[idx] == WHvX64RegisterRip);
296     vcxt.values[idx++].Reg64 = env->eip;
297
298     assert(whpx_register_names[idx] == WHvX64RegisterRflags);
299     vcxt.values[idx++].Reg64 = env->eflags;
300
301     /* Translate 6+4 segment registers. HV and QEMU order matches  */
302     assert(idx == WHvX64RegisterEs);
303     for (i = 0; i < 6; i += 1, idx += 1) {
304         vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
305     }
306
307     assert(idx == WHvX64RegisterLdtr);
308     vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
309
310     assert(idx == WHvX64RegisterTr);
311     vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
312
313     assert(idx == WHvX64RegisterIdtr);
314     vcxt.values[idx].Table.Base = env->idt.base;
315     vcxt.values[idx].Table.Limit = env->idt.limit;
316     idx += 1;
317
318     assert(idx == WHvX64RegisterGdtr);
319     vcxt.values[idx].Table.Base = env->gdt.base;
320     vcxt.values[idx].Table.Limit = env->gdt.limit;
321     idx += 1;
322
323     /* CR0, 2, 3, 4, 8 */
324     assert(whpx_register_names[idx] == WHvX64RegisterCr0);
325     vcxt.values[idx++].Reg64 = env->cr[0];
326     assert(whpx_register_names[idx] == WHvX64RegisterCr2);
327     vcxt.values[idx++].Reg64 = env->cr[2];
328     assert(whpx_register_names[idx] == WHvX64RegisterCr3);
329     vcxt.values[idx++].Reg64 = env->cr[3];
330     assert(whpx_register_names[idx] == WHvX64RegisterCr4);
331     vcxt.values[idx++].Reg64 = env->cr[4];
332     assert(whpx_register_names[idx] == WHvX64RegisterCr8);
333     vcxt.values[idx++].Reg64 = vcpu->tpr;
334
335     /* 8 Debug Registers - Skipped */
336
337     /* 16 XMM registers */
338     assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
339     idx_next = idx + 16;
340     for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
341         vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
342         vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
343     }
344     idx = idx_next;
345
346     /* 8 FP registers */
347     assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
348     for (i = 0; i < 8; i += 1, idx += 1) {
349         vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
350         /* vcxt.values[idx].Fp.AsUINT128.High64 =
351                env->fpregs[i].mmx.MMX_Q(1);
352         */
353     }
354
355     /* FP control status register */
356     assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
357     vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
358     vcxt.values[idx].FpControlStatus.FpStatus =
359         (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
360     vcxt.values[idx].FpControlStatus.FpTag = 0;
361     for (i = 0; i < 8; ++i) {
362         vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
363     }
364     vcxt.values[idx].FpControlStatus.Reserved = 0;
365     vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
366     vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
367     idx += 1;
368
369     /* XMM control status register */
370     assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
371     vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
372     vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
373     vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
374     idx += 1;
375
376     /* MSRs */
377     assert(whpx_register_names[idx] == WHvX64RegisterEfer);
378     vcxt.values[idx++].Reg64 = env->efer;
379 #ifdef TARGET_X86_64
380     assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
381     vcxt.values[idx++].Reg64 = env->kernelgsbase;
382 #endif
383
384     assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
385     vcxt.values[idx++].Reg64 = vcpu->apic_base;
386
387     /* WHvX64RegisterPat - Skipped */
388
389     assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
390     vcxt.values[idx++].Reg64 = env->sysenter_cs;
391     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
392     vcxt.values[idx++].Reg64 = env->sysenter_eip;
393     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
394     vcxt.values[idx++].Reg64 = env->sysenter_esp;
395     assert(whpx_register_names[idx] == WHvX64RegisterStar);
396     vcxt.values[idx++].Reg64 = env->star;
397 #ifdef TARGET_X86_64
398     assert(whpx_register_names[idx] == WHvX64RegisterLstar);
399     vcxt.values[idx++].Reg64 = env->lstar;
400     assert(whpx_register_names[idx] == WHvX64RegisterCstar);
401     vcxt.values[idx++].Reg64 = env->cstar;
402     assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
403     vcxt.values[idx++].Reg64 = env->fmask;
404 #endif
405
406     /* Interrupt / Event Registers - Skipped */
407
408     assert(idx == RTL_NUMBER_OF(whpx_register_names));
409
410     hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
411         whpx->partition, cpu->cpu_index,
412         whpx_register_names,
413         RTL_NUMBER_OF(whpx_register_names),
414         &vcxt.values[0]);
415
416     if (FAILED(hr)) {
417         error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
418                      hr);
419     }
420
421     return;
422 }
423
424 static int whpx_get_tsc(CPUState *cpu)
425 {
426     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
427     WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
428     WHV_REGISTER_VALUE tsc_val;
429     HRESULT hr;
430     struct whpx_state *whpx = &whpx_global;
431
432     hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
433         whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
434     if (FAILED(hr)) {
435         error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
436         return -1;
437     }
438
439     env->tsc = tsc_val.Reg64;
440     return 0;
441 }
442
443 static void whpx_get_registers(CPUState *cpu)
444 {
445     struct whpx_state *whpx = &whpx_global;
446     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
447     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
448     X86CPU *x86_cpu = X86_CPU(cpu);
449     struct whpx_register_set vcxt;
450     uint64_t tpr, apic_base;
451     HRESULT hr;
452     int idx;
453     int idx_next;
454     int i;
455
456     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
457
458     if (!env->tsc_valid) {
459         whpx_get_tsc(cpu);
460         env->tsc_valid = !runstate_is_running();
461     }
462
463     hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
464         whpx->partition, cpu->cpu_index,
465         whpx_register_names,
466         RTL_NUMBER_OF(whpx_register_names),
467         &vcxt.values[0]);
468     if (FAILED(hr)) {
469         error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
470                      hr);
471     }
472
473     idx = 0;
474
475     /* Indexes for first 16 registers match between HV and QEMU definitions */
476     idx_next = 16;
477     for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
478         env->regs[idx] = vcxt.values[idx].Reg64;
479     }
480     idx = idx_next;
481
482     /* Same goes for RIP and RFLAGS */
483     assert(whpx_register_names[idx] == WHvX64RegisterRip);
484     env->eip = vcxt.values[idx++].Reg64;
485     assert(whpx_register_names[idx] == WHvX64RegisterRflags);
486     env->eflags = vcxt.values[idx++].Reg64;
487
488     /* Translate 6+4 segment registers. HV and QEMU order matches  */
489     assert(idx == WHvX64RegisterEs);
490     for (i = 0; i < 6; i += 1, idx += 1) {
491         env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
492     }
493
494     assert(idx == WHvX64RegisterLdtr);
495     env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
496     assert(idx == WHvX64RegisterTr);
497     env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
498     assert(idx == WHvX64RegisterIdtr);
499     env->idt.base = vcxt.values[idx].Table.Base;
500     env->idt.limit = vcxt.values[idx].Table.Limit;
501     idx += 1;
502     assert(idx == WHvX64RegisterGdtr);
503     env->gdt.base = vcxt.values[idx].Table.Base;
504     env->gdt.limit = vcxt.values[idx].Table.Limit;
505     idx += 1;
506
507     /* CR0, 2, 3, 4, 8 */
508     assert(whpx_register_names[idx] == WHvX64RegisterCr0);
509     env->cr[0] = vcxt.values[idx++].Reg64;
510     assert(whpx_register_names[idx] == WHvX64RegisterCr2);
511     env->cr[2] = vcxt.values[idx++].Reg64;
512     assert(whpx_register_names[idx] == WHvX64RegisterCr3);
513     env->cr[3] = vcxt.values[idx++].Reg64;
514     assert(whpx_register_names[idx] == WHvX64RegisterCr4);
515     env->cr[4] = vcxt.values[idx++].Reg64;
516     assert(whpx_register_names[idx] == WHvX64RegisterCr8);
517     tpr = vcxt.values[idx++].Reg64;
518     if (tpr != vcpu->tpr) {
519         vcpu->tpr = tpr;
520         cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
521     }
522
523     /* 8 Debug Registers - Skipped */
524
525     /* 16 XMM registers */
526     assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
527     idx_next = idx + 16;
528     for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
529         env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
530         env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
531     }
532     idx = idx_next;
533
534     /* 8 FP registers */
535     assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
536     for (i = 0; i < 8; i += 1, idx += 1) {
537         env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
538         /* env->fpregs[i].mmx.MMX_Q(1) =
539                vcxt.values[idx].Fp.AsUINT128.High64;
540         */
541     }
542
543     /* FP control status register */
544     assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
545     env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
546     env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
547     env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
548     for (i = 0; i < 8; ++i) {
549         env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
550     }
551     env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
552     env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
553     idx += 1;
554
555     /* XMM control status register */
556     assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
557     env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
558     idx += 1;
559
560     /* MSRs */
561     assert(whpx_register_names[idx] == WHvX64RegisterEfer);
562     env->efer = vcxt.values[idx++].Reg64;
563 #ifdef TARGET_X86_64
564     assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
565     env->kernelgsbase = vcxt.values[idx++].Reg64;
566 #endif
567
568     assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
569     apic_base = vcxt.values[idx++].Reg64;
570     if (apic_base != vcpu->apic_base) {
571         vcpu->apic_base = apic_base;
572         cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
573     }
574
575     /* WHvX64RegisterPat - Skipped */
576
577     assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
578     env->sysenter_cs = vcxt.values[idx++].Reg64;
579     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
580     env->sysenter_eip = vcxt.values[idx++].Reg64;
581     assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
582     env->sysenter_esp = vcxt.values[idx++].Reg64;
583     assert(whpx_register_names[idx] == WHvX64RegisterStar);
584     env->star = vcxt.values[idx++].Reg64;
585 #ifdef TARGET_X86_64
586     assert(whpx_register_names[idx] == WHvX64RegisterLstar);
587     env->lstar = vcxt.values[idx++].Reg64;
588     assert(whpx_register_names[idx] == WHvX64RegisterCstar);
589     env->cstar = vcxt.values[idx++].Reg64;
590     assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
591     env->fmask = vcxt.values[idx++].Reg64;
592 #endif
593
594     /* Interrupt / Event Registers - Skipped */
595
596     assert(idx == RTL_NUMBER_OF(whpx_register_names));
597
598     return;
599 }
600
601 static HRESULT CALLBACK whpx_emu_ioport_callback(
602     void *ctx,
603     WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
604 {
605     MemTxAttrs attrs = { 0 };
606     address_space_rw(&address_space_io, IoAccess->Port, attrs,
607                      &IoAccess->Data, IoAccess->AccessSize,
608                      IoAccess->Direction);
609     return S_OK;
610 }
611
612 static HRESULT CALLBACK whpx_emu_mmio_callback(
613     void *ctx,
614     WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
615 {
616     cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
617                            ma->Direction);
618     return S_OK;
619 }
620
621 static HRESULT CALLBACK whpx_emu_getreg_callback(
622     void *ctx,
623     const WHV_REGISTER_NAME *RegisterNames,
624     UINT32 RegisterCount,
625     WHV_REGISTER_VALUE *RegisterValues)
626 {
627     HRESULT hr;
628     struct whpx_state *whpx = &whpx_global;
629     CPUState *cpu = (CPUState *)ctx;
630
631     hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
632         whpx->partition, cpu->cpu_index,
633         RegisterNames, RegisterCount,
634         RegisterValues);
635     if (FAILED(hr)) {
636         error_report("WHPX: Failed to get virtual processor registers,"
637                      " hr=%08lx", hr);
638     }
639
640     return hr;
641 }
642
643 static HRESULT CALLBACK whpx_emu_setreg_callback(
644     void *ctx,
645     const WHV_REGISTER_NAME *RegisterNames,
646     UINT32 RegisterCount,
647     const WHV_REGISTER_VALUE *RegisterValues)
648 {
649     HRESULT hr;
650     struct whpx_state *whpx = &whpx_global;
651     CPUState *cpu = (CPUState *)ctx;
652
653     hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
654         whpx->partition, cpu->cpu_index,
655         RegisterNames, RegisterCount,
656         RegisterValues);
657     if (FAILED(hr)) {
658         error_report("WHPX: Failed to set virtual processor registers,"
659                      " hr=%08lx", hr);
660     }
661
662     /*
663      * The emulator just successfully wrote the register state. We clear the
664      * dirty state so we avoid the double write on resume of the VP.
665      */
666     cpu->vcpu_dirty = false;
667
668     return hr;
669 }
670
671 static HRESULT CALLBACK whpx_emu_translate_callback(
672     void *ctx,
673     WHV_GUEST_VIRTUAL_ADDRESS Gva,
674     WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
675     WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
676     WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
677 {
678     HRESULT hr;
679     struct whpx_state *whpx = &whpx_global;
680     CPUState *cpu = (CPUState *)ctx;
681     WHV_TRANSLATE_GVA_RESULT res;
682
683     hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
684                                       Gva, TranslateFlags, &res, Gpa);
685     if (FAILED(hr)) {
686         error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
687     } else {
688         *TranslationResult = res.ResultCode;
689     }
690
691     return hr;
692 }
693
694 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
695     .Size = sizeof(WHV_EMULATOR_CALLBACKS),
696     .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
697     .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
698     .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
699     .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
700     .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
701 };
702
703 static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
704 {
705     HRESULT hr;
706     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
707     WHV_EMULATOR_STATUS emu_status;
708
709     hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
710         vcpu->emulator, cpu,
711         &vcpu->exit_ctx.VpContext, ctx,
712         &emu_status);
713     if (FAILED(hr)) {
714         error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
715         return -1;
716     }
717
718     if (!emu_status.EmulationSuccessful) {
719         error_report("WHPX: Failed to emulate MMIO access with"
720                      " EmulatorReturnStatus: %u", emu_status.AsUINT32);
721         return -1;
722     }
723
724     return 0;
725 }
726
727 static int whpx_handle_portio(CPUState *cpu,
728                               WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
729 {
730     HRESULT hr;
731     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
732     WHV_EMULATOR_STATUS emu_status;
733
734     hr = whp_dispatch.WHvEmulatorTryIoEmulation(
735         vcpu->emulator, cpu,
736         &vcpu->exit_ctx.VpContext, ctx,
737         &emu_status);
738     if (FAILED(hr)) {
739         error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
740         return -1;
741     }
742
743     if (!emu_status.EmulationSuccessful) {
744         error_report("WHPX: Failed to emulate PortIO access with"
745                      " EmulatorReturnStatus: %u", emu_status.AsUINT32);
746         return -1;
747     }
748
749     return 0;
750 }
751
752 static int whpx_handle_halt(CPUState *cpu)
753 {
754     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
755     int ret = 0;
756
757     qemu_mutex_lock_iothread();
758     if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
759           (env->eflags & IF_MASK)) &&
760         !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
761         cpu->exception_index = EXCP_HLT;
762         cpu->halted = true;
763         ret = 1;
764     }
765     qemu_mutex_unlock_iothread();
766
767     return ret;
768 }
769
770 static void whpx_vcpu_pre_run(CPUState *cpu)
771 {
772     HRESULT hr;
773     struct whpx_state *whpx = &whpx_global;
774     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
775     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
776     X86CPU *x86_cpu = X86_CPU(cpu);
777     int irq;
778     uint8_t tpr;
779     WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
780     UINT32 reg_count = 0;
781     WHV_REGISTER_VALUE reg_values[3];
782     WHV_REGISTER_NAME reg_names[3];
783
784     memset(&new_int, 0, sizeof(new_int));
785     memset(reg_values, 0, sizeof(reg_values));
786
787     qemu_mutex_lock_iothread();
788
789     /* Inject NMI */
790     if (!vcpu->interruption_pending &&
791         cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
792         if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
793             cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
794             vcpu->interruptable = false;
795             new_int.InterruptionType = WHvX64PendingNmi;
796             new_int.InterruptionPending = 1;
797             new_int.InterruptionVector = 2;
798         }
799         if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
800             cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
801         }
802     }
803
804     /*
805      * Force the VCPU out of its inner loop to process any INIT requests or
806      * commit pending TPR access.
807      */
808     if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
809         if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
810             !(env->hflags & HF_SMM_MASK)) {
811             cpu->exit_request = 1;
812         }
813         if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
814             cpu->exit_request = 1;
815         }
816     }
817
818     /* Get pending hard interruption or replay one that was overwritten */
819     if (!vcpu->interruption_pending &&
820         vcpu->interruptable && (env->eflags & IF_MASK)) {
821         assert(!new_int.InterruptionPending);
822         if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
823             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
824             irq = cpu_get_pic_interrupt(env);
825             if (irq >= 0) {
826                 new_int.InterruptionType = WHvX64PendingInterrupt;
827                 new_int.InterruptionPending = 1;
828                 new_int.InterruptionVector = irq;
829             }
830         }
831     }
832
833     /* Setup interrupt state if new one was prepared */
834     if (new_int.InterruptionPending) {
835         reg_values[reg_count].PendingInterruption = new_int;
836         reg_names[reg_count] = WHvRegisterPendingInterruption;
837         reg_count += 1;
838     }
839
840     /* Sync the TPR to the CR8 if was modified during the intercept */
841     tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
842     if (tpr != vcpu->tpr) {
843         vcpu->tpr = tpr;
844         reg_values[reg_count].Reg64 = tpr;
845         cpu->exit_request = 1;
846         reg_names[reg_count] = WHvX64RegisterCr8;
847         reg_count += 1;
848     }
849
850     /* Update the state of the interrupt delivery notification */
851     if (!vcpu->window_registered &&
852         cpu->interrupt_request & CPU_INTERRUPT_HARD) {
853         reg_values[reg_count].DeliverabilityNotifications.InterruptNotification
854             = 1;
855         vcpu->window_registered = 1;
856         reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
857         reg_count += 1;
858     }
859
860     qemu_mutex_unlock_iothread();
861
862     if (reg_count) {
863         hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
864             whpx->partition, cpu->cpu_index,
865             reg_names, reg_count, reg_values);
866         if (FAILED(hr)) {
867             error_report("WHPX: Failed to set interrupt state registers,"
868                          " hr=%08lx", hr);
869         }
870     }
871
872     return;
873 }
874
875 static void whpx_vcpu_post_run(CPUState *cpu)
876 {
877     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
878     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
879     X86CPU *x86_cpu = X86_CPU(cpu);
880
881     env->eflags = vcpu->exit_ctx.VpContext.Rflags;
882
883     uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
884     if (vcpu->tpr != tpr) {
885         vcpu->tpr = tpr;
886         qemu_mutex_lock_iothread();
887         cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
888         qemu_mutex_unlock_iothread();
889     }
890
891     vcpu->interruption_pending =
892         vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
893
894     vcpu->interruptable =
895         !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
896
897     return;
898 }
899
900 static void whpx_vcpu_process_async_events(CPUState *cpu)
901 {
902     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
903     X86CPU *x86_cpu = X86_CPU(cpu);
904     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
905
906     if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
907         !(env->hflags & HF_SMM_MASK)) {
908         whpx_cpu_synchronize_state(cpu);
909         do_cpu_init(x86_cpu);
910         vcpu->interruptable = true;
911     }
912
913     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
914         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
915         apic_poll_irq(x86_cpu->apic_state);
916     }
917
918     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
919          (env->eflags & IF_MASK)) ||
920         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
921         cpu->halted = false;
922     }
923
924     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
925         whpx_cpu_synchronize_state(cpu);
926         do_cpu_sipi(x86_cpu);
927     }
928
929     if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
930         cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
931         whpx_cpu_synchronize_state(cpu);
932         apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
933                                       env->tpr_access_type);
934     }
935
936     return;
937 }
938
939 static int whpx_vcpu_run(CPUState *cpu)
940 {
941     HRESULT hr;
942     struct whpx_state *whpx = &whpx_global;
943     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
944     int ret;
945
946     whpx_vcpu_process_async_events(cpu);
947     if (cpu->halted) {
948         cpu->exception_index = EXCP_HLT;
949         atomic_set(&cpu->exit_request, false);
950         return 0;
951     }
952
953     qemu_mutex_unlock_iothread();
954     cpu_exec_start(cpu);
955
956     do {
957         if (cpu->vcpu_dirty) {
958             whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
959             cpu->vcpu_dirty = false;
960         }
961
962         whpx_vcpu_pre_run(cpu);
963
964         if (atomic_read(&cpu->exit_request)) {
965             whpx_vcpu_kick(cpu);
966         }
967
968         hr = whp_dispatch.WHvRunVirtualProcessor(
969             whpx->partition, cpu->cpu_index,
970             &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
971
972         if (FAILED(hr)) {
973             error_report("WHPX: Failed to exec a virtual processor,"
974                          " hr=%08lx", hr);
975             ret = -1;
976             break;
977         }
978
979         whpx_vcpu_post_run(cpu);
980
981         switch (vcpu->exit_ctx.ExitReason) {
982         case WHvRunVpExitReasonMemoryAccess:
983             ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
984             break;
985
986         case WHvRunVpExitReasonX64IoPortAccess:
987             ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
988             break;
989
990         case WHvRunVpExitReasonX64InterruptWindow:
991             vcpu->window_registered = 0;
992             ret = 0;
993             break;
994
995         case WHvRunVpExitReasonX64Halt:
996             ret = whpx_handle_halt(cpu);
997             break;
998
999         case WHvRunVpExitReasonCanceled:
1000             cpu->exception_index = EXCP_INTERRUPT;
1001             ret = 1;
1002             break;
1003
1004         case WHvRunVpExitReasonX64MsrAccess: {
1005             WHV_REGISTER_VALUE reg_values[3] = {0};
1006             WHV_REGISTER_NAME reg_names[3];
1007             UINT32 reg_count;
1008
1009             reg_names[0] = WHvX64RegisterRip;
1010             reg_names[1] = WHvX64RegisterRax;
1011             reg_names[2] = WHvX64RegisterRdx;
1012
1013             reg_values[0].Reg64 =
1014                 vcpu->exit_ctx.VpContext.Rip +
1015                 vcpu->exit_ctx.VpContext.InstructionLength;
1016
1017             /*
1018              * For all unsupported MSR access we:
1019              *     ignore writes
1020              *     return 0 on read.
1021              */
1022             reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1023                         1 : 3;
1024
1025             hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1026                 whpx->partition,
1027                 cpu->cpu_index,
1028                 reg_names, reg_count,
1029                 reg_values);
1030
1031             if (FAILED(hr)) {
1032                 error_report("WHPX: Failed to set MsrAccess state "
1033                              " registers, hr=%08lx", hr);
1034             }
1035             ret = 0;
1036             break;
1037         }
1038         case WHvRunVpExitReasonX64Cpuid: {
1039             WHV_REGISTER_VALUE reg_values[5];
1040             WHV_REGISTER_NAME reg_names[5];
1041             UINT32 reg_count = 5;
1042             UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1043             X86CPU *x86_cpu = X86_CPU(cpu);
1044             CPUX86State *env = &x86_cpu->env;
1045
1046             memset(reg_values, 0, sizeof(reg_values));
1047
1048             rip = vcpu->exit_ctx.VpContext.Rip +
1049                   vcpu->exit_ctx.VpContext.InstructionLength;
1050             cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1051
1052             /*
1053              * Ideally, these should be supplied to the hypervisor during VCPU
1054              * initialization and it should be able to satisfy this request.
1055              * But, currently, WHPX doesn't support setting CPUID values in the
1056              * hypervisor once the partition has been setup, which is too late
1057              * since VCPUs are realized later. For now, use the values from
1058              * QEMU to satisfy these requests, until WHPX adds support for
1059              * being able to set these values in the hypervisor at runtime.
1060              */
1061             cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1062                 (UINT32 *)&rcx, (UINT32 *)&rdx);
1063             switch (cpuid_fn) {
1064             case 0x80000001:
1065                 /* Remove any support of OSVW */
1066                 rcx &= ~CPUID_EXT3_OSVW;
1067                 break;
1068             }
1069
1070             reg_names[0] = WHvX64RegisterRip;
1071             reg_names[1] = WHvX64RegisterRax;
1072             reg_names[2] = WHvX64RegisterRcx;
1073             reg_names[3] = WHvX64RegisterRdx;
1074             reg_names[4] = WHvX64RegisterRbx;
1075
1076             reg_values[0].Reg64 = rip;
1077             reg_values[1].Reg64 = rax;
1078             reg_values[2].Reg64 = rcx;
1079             reg_values[3].Reg64 = rdx;
1080             reg_values[4].Reg64 = rbx;
1081
1082             hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1083                 whpx->partition, cpu->cpu_index,
1084                 reg_names,
1085                 reg_count,
1086                 reg_values);
1087
1088             if (FAILED(hr)) {
1089                 error_report("WHPX: Failed to set CpuidAccess state registers,"
1090                              " hr=%08lx", hr);
1091             }
1092             ret = 0;
1093             break;
1094         }
1095         case WHvRunVpExitReasonNone:
1096         case WHvRunVpExitReasonUnrecoverableException:
1097         case WHvRunVpExitReasonInvalidVpRegisterValue:
1098         case WHvRunVpExitReasonUnsupportedFeature:
1099         case WHvRunVpExitReasonException:
1100         default:
1101             error_report("WHPX: Unexpected VP exit code %d",
1102                          vcpu->exit_ctx.ExitReason);
1103             whpx_get_registers(cpu);
1104             qemu_mutex_lock_iothread();
1105             qemu_system_guest_panicked(cpu_get_crash_info(cpu));
1106             qemu_mutex_unlock_iothread();
1107             break;
1108         }
1109
1110     } while (!ret);
1111
1112     cpu_exec_end(cpu);
1113     qemu_mutex_lock_iothread();
1114     current_cpu = cpu;
1115
1116     atomic_set(&cpu->exit_request, false);
1117
1118     return ret < 0;
1119 }
1120
1121 static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
1122 {
1123     if (!cpu->vcpu_dirty) {
1124         whpx_get_registers(cpu);
1125         cpu->vcpu_dirty = true;
1126     }
1127 }
1128
1129 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
1130                                                run_on_cpu_data arg)
1131 {
1132     whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
1133     cpu->vcpu_dirty = false;
1134 }
1135
1136 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
1137                                               run_on_cpu_data arg)
1138 {
1139     whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
1140     cpu->vcpu_dirty = false;
1141 }
1142
1143 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
1144                                                run_on_cpu_data arg)
1145 {
1146     cpu->vcpu_dirty = true;
1147 }
1148
1149 /*
1150  * CPU support.
1151  */
1152
1153 void whpx_cpu_synchronize_state(CPUState *cpu)
1154 {
1155     if (!cpu->vcpu_dirty) {
1156         run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
1157     }
1158 }
1159
1160 void whpx_cpu_synchronize_post_reset(CPUState *cpu)
1161 {
1162     run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
1163 }
1164
1165 void whpx_cpu_synchronize_post_init(CPUState *cpu)
1166 {
1167     run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
1168 }
1169
1170 void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
1171 {
1172     run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
1173 }
1174
1175 /*
1176  * Vcpu support.
1177  */
1178
1179 static Error *whpx_migration_blocker;
1180
1181 static void whpx_cpu_update_state(void *opaque, int running, RunState state)
1182 {
1183     CPUX86State *env = opaque;
1184
1185     if (running) {
1186         env->tsc_valid = false;
1187     }
1188 }
1189
1190 int whpx_init_vcpu(CPUState *cpu)
1191 {
1192     HRESULT hr;
1193     struct whpx_state *whpx = &whpx_global;
1194     struct whpx_vcpu *vcpu;
1195     Error *local_error = NULL;
1196
1197     /* Add migration blockers for all unsupported features of the
1198      * Windows Hypervisor Platform
1199      */
1200     if (whpx_migration_blocker == NULL) {
1201         error_setg(&whpx_migration_blocker,
1202                "State blocked due to non-migratable CPUID feature support,"
1203                "dirty memory tracking support, and XSAVE/XRSTOR support");
1204
1205         (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
1206         if (local_error) {
1207             error_report_err(local_error);
1208             migrate_del_blocker(whpx_migration_blocker);
1209             error_free(whpx_migration_blocker);
1210             return -EINVAL;
1211         }
1212     }
1213
1214     vcpu = g_malloc0(sizeof(struct whpx_vcpu));
1215
1216     if (!vcpu) {
1217         error_report("WHPX: Failed to allocte VCPU context.");
1218         return -ENOMEM;
1219     }
1220
1221     hr = whp_dispatch.WHvEmulatorCreateEmulator(
1222         &whpx_emu_callbacks,
1223         &vcpu->emulator);
1224     if (FAILED(hr)) {
1225         error_report("WHPX: Failed to setup instruction completion support,"
1226                      " hr=%08lx", hr);
1227         g_free(vcpu);
1228         return -EINVAL;
1229     }
1230
1231     hr = whp_dispatch.WHvCreateVirtualProcessor(
1232         whpx->partition, cpu->cpu_index, 0);
1233     if (FAILED(hr)) {
1234         error_report("WHPX: Failed to create a virtual processor,"
1235                      " hr=%08lx", hr);
1236         whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1237         g_free(vcpu);
1238         return -EINVAL;
1239     }
1240
1241     vcpu->interruptable = true;
1242
1243     cpu->vcpu_dirty = true;
1244     cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
1245     qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
1246
1247     return 0;
1248 }
1249
1250 int whpx_vcpu_exec(CPUState *cpu)
1251 {
1252     int ret;
1253     int fatal;
1254
1255     for (;;) {
1256         if (cpu->exception_index >= EXCP_INTERRUPT) {
1257             ret = cpu->exception_index;
1258             cpu->exception_index = -1;
1259             break;
1260         }
1261
1262         fatal = whpx_vcpu_run(cpu);
1263
1264         if (fatal) {
1265             error_report("WHPX: Failed to exec a virtual processor");
1266             abort();
1267         }
1268     }
1269
1270     return ret;
1271 }
1272
1273 void whpx_destroy_vcpu(CPUState *cpu)
1274 {
1275     struct whpx_state *whpx = &whpx_global;
1276     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1277
1278     whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
1279     whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1280     g_free(cpu->hax_vcpu);
1281     return;
1282 }
1283
1284 void whpx_vcpu_kick(CPUState *cpu)
1285 {
1286     struct whpx_state *whpx = &whpx_global;
1287     whp_dispatch.WHvCancelRunVirtualProcessor(
1288         whpx->partition, cpu->cpu_index, 0);
1289 }
1290
1291 /*
1292  * Memory support.
1293  */
1294
1295 static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
1296                                 void *host_va, int add, int rom,
1297                                 const char *name)
1298 {
1299     struct whpx_state *whpx = &whpx_global;
1300     HRESULT hr;
1301
1302     /*
1303     if (add) {
1304         printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1305                (void*)start_pa, (void*)size, host_va,
1306                (rom ? "ROM" : "RAM"), name);
1307     } else {
1308         printf("WHPX: DEL PA:%p Size:%p, Host:%p,      '%s'\n",
1309                (void*)start_pa, (void*)size, host_va, name);
1310     }
1311     */
1312
1313     if (add) {
1314         hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
1315                                          host_va,
1316                                          start_pa,
1317                                          size,
1318                                          (WHvMapGpaRangeFlagRead |
1319                                           WHvMapGpaRangeFlagExecute |
1320                                           (rom ? 0 : WHvMapGpaRangeFlagWrite)));
1321     } else {
1322         hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
1323                                            start_pa,
1324                                            size);
1325     }
1326
1327     if (FAILED(hr)) {
1328         error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1329                      " Host:%p, hr=%08lx",
1330                      (add ? "MAP" : "UNMAP"), name,
1331                      (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
1332     }
1333 }
1334
1335 static void whpx_process_section(MemoryRegionSection *section, int add)
1336 {
1337     MemoryRegion *mr = section->mr;
1338     hwaddr start_pa = section->offset_within_address_space;
1339     ram_addr_t size = int128_get64(section->size);
1340     unsigned int delta;
1341     uint64_t host_va;
1342
1343     if (!memory_region_is_ram(mr)) {
1344         return;
1345     }
1346
1347     delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
1348     delta &= ~qemu_real_host_page_mask;
1349     if (delta > size) {
1350         return;
1351     }
1352     start_pa += delta;
1353     size -= delta;
1354     size &= qemu_real_host_page_mask;
1355     if (!size || (start_pa & ~qemu_real_host_page_mask)) {
1356         return;
1357     }
1358
1359     host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
1360             + section->offset_within_region + delta;
1361
1362     whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
1363                         memory_region_is_rom(mr), mr->name);
1364 }
1365
1366 static void whpx_region_add(MemoryListener *listener,
1367                            MemoryRegionSection *section)
1368 {
1369     memory_region_ref(section->mr);
1370     whpx_process_section(section, 1);
1371 }
1372
1373 static void whpx_region_del(MemoryListener *listener,
1374                            MemoryRegionSection *section)
1375 {
1376     whpx_process_section(section, 0);
1377     memory_region_unref(section->mr);
1378 }
1379
1380 static void whpx_transaction_begin(MemoryListener *listener)
1381 {
1382 }
1383
1384 static void whpx_transaction_commit(MemoryListener *listener)
1385 {
1386 }
1387
1388 static void whpx_log_sync(MemoryListener *listener,
1389                          MemoryRegionSection *section)
1390 {
1391     MemoryRegion *mr = section->mr;
1392
1393     if (!memory_region_is_ram(mr)) {
1394         return;
1395     }
1396
1397     memory_region_set_dirty(mr, 0, int128_get64(section->size));
1398 }
1399
1400 static MemoryListener whpx_memory_listener = {
1401     .begin = whpx_transaction_begin,
1402     .commit = whpx_transaction_commit,
1403     .region_add = whpx_region_add,
1404     .region_del = whpx_region_del,
1405     .log_sync = whpx_log_sync,
1406     .priority = 10,
1407 };
1408
1409 static void whpx_memory_init(void)
1410 {
1411     memory_listener_register(&whpx_memory_listener, &address_space_memory);
1412 }
1413
1414 static void whpx_handle_interrupt(CPUState *cpu, int mask)
1415 {
1416     cpu->interrupt_request |= mask;
1417
1418     if (!qemu_cpu_is_self(cpu)) {
1419         qemu_cpu_kick(cpu);
1420     }
1421 }
1422
1423 /*
1424  * Load the functions from the given library, using the given handle. If a
1425  * handle is provided, it is used, otherwise the library is opened. The
1426  * handle will be updated on return with the opened one.
1427  */
1428 static bool load_whp_dispatch_fns(HMODULE *handle,
1429     WHPFunctionList function_list)
1430 {
1431     HMODULE hLib = *handle;
1432
1433     #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1434     #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1435     #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1436         whp_dispatch.function_name = \
1437             (function_name ## _t)GetProcAddress(hLib, #function_name); \
1438
1439     #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1440         whp_dispatch.function_name = \
1441             (function_name ## _t)GetProcAddress(hLib, #function_name); \
1442         if (!whp_dispatch.function_name) { \
1443             error_report("Could not load function %s", #function_name); \
1444             goto error; \
1445         } \
1446
1447     #define WHP_LOAD_LIB(lib_name, handle_lib) \
1448     if (!handle_lib) { \
1449         handle_lib = LoadLibrary(lib_name); \
1450         if (!handle_lib) { \
1451             error_report("Could not load library %s.", lib_name); \
1452             goto error; \
1453         } \
1454     } \
1455
1456     switch (function_list) {
1457     case WINHV_PLATFORM_FNS_DEFAULT:
1458         WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1459         LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
1460         break;
1461
1462     case WINHV_EMULATION_FNS_DEFAULT:
1463         WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
1464         LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
1465         break;
1466
1467     case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
1468         WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1469         LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
1470         break;
1471     }
1472
1473     *handle = hLib;
1474     return true;
1475
1476 error:
1477     if (hLib) {
1478         FreeLibrary(hLib);
1479     }
1480
1481     return false;
1482 }
1483
1484 /*
1485  * Partition support
1486  */
1487
1488 static int whpx_accel_init(MachineState *ms)
1489 {
1490     struct whpx_state *whpx;
1491     int ret;
1492     HRESULT hr;
1493     WHV_CAPABILITY whpx_cap;
1494     UINT32 whpx_cap_size;
1495     WHV_PARTITION_PROPERTY prop;
1496
1497     whpx = &whpx_global;
1498
1499     if (!init_whp_dispatch()) {
1500         ret = -ENOSYS;
1501         goto error;
1502     }
1503
1504     memset(whpx, 0, sizeof(struct whpx_state));
1505     whpx->mem_quota = ms->ram_size;
1506
1507     hr = whp_dispatch.WHvGetCapability(
1508         WHvCapabilityCodeHypervisorPresent, &whpx_cap,
1509         sizeof(whpx_cap), &whpx_cap_size);
1510     if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
1511         error_report("WHPX: No accelerator found, hr=%08lx", hr);
1512         ret = -ENOSPC;
1513         goto error;
1514     }
1515
1516     hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
1517     if (FAILED(hr)) {
1518         error_report("WHPX: Failed to create partition, hr=%08lx", hr);
1519         ret = -EINVAL;
1520         goto error;
1521     }
1522
1523     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1524     prop.ProcessorCount = ms->smp.cpus;
1525     hr = whp_dispatch.WHvSetPartitionProperty(
1526         whpx->partition,
1527         WHvPartitionPropertyCodeProcessorCount,
1528         &prop,
1529         sizeof(WHV_PARTITION_PROPERTY));
1530
1531     if (FAILED(hr)) {
1532         error_report("WHPX: Failed to set partition core count to %d,"
1533                      " hr=%08lx", ms->smp.cores, hr);
1534         ret = -EINVAL;
1535         goto error;
1536     }
1537
1538     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1539     prop.ExtendedVmExits.X64MsrExit = 1;
1540     prop.ExtendedVmExits.X64CpuidExit = 1;
1541     hr = whp_dispatch.WHvSetPartitionProperty(
1542         whpx->partition,
1543         WHvPartitionPropertyCodeExtendedVmExits,
1544         &prop,
1545         sizeof(WHV_PARTITION_PROPERTY));
1546
1547     if (FAILED(hr)) {
1548         error_report("WHPX: Failed to enable partition extended X64MsrExit and"
1549                      " X64CpuidExit hr=%08lx", hr);
1550         ret = -EINVAL;
1551         goto error;
1552     }
1553
1554     UINT32 cpuidExitList[] = {1, 0x80000001};
1555     hr = whp_dispatch.WHvSetPartitionProperty(
1556         whpx->partition,
1557         WHvPartitionPropertyCodeCpuidExitList,
1558         cpuidExitList,
1559         RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1560
1561     if (FAILED(hr)) {
1562         error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1563                      hr);
1564         ret = -EINVAL;
1565         goto error;
1566     }
1567
1568     hr = whp_dispatch.WHvSetupPartition(whpx->partition);
1569     if (FAILED(hr)) {
1570         error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
1571         ret = -EINVAL;
1572         goto error;
1573     }
1574
1575     whpx_memory_init();
1576
1577     cpu_interrupt_handler = whpx_handle_interrupt;
1578
1579     printf("Windows Hypervisor Platform accelerator is operational\n");
1580     return 0;
1581
1582   error:
1583
1584     if (NULL != whpx->partition) {
1585         whp_dispatch.WHvDeletePartition(whpx->partition);
1586         whpx->partition = NULL;
1587     }
1588
1589
1590     return ret;
1591 }
1592
1593 int whpx_enabled(void)
1594 {
1595     return whpx_allowed;
1596 }
1597
1598 static void whpx_accel_class_init(ObjectClass *oc, void *data)
1599 {
1600     AccelClass *ac = ACCEL_CLASS(oc);
1601     ac->name = "WHPX";
1602     ac->init_machine = whpx_accel_init;
1603     ac->allowed = &whpx_allowed;
1604 }
1605
1606 static const TypeInfo whpx_accel_type = {
1607     .name = ACCEL_CLASS_NAME("whpx"),
1608     .parent = TYPE_ACCEL,
1609     .class_init = whpx_accel_class_init,
1610 };
1611
1612 static void whpx_type_init(void)
1613 {
1614     type_register_static(&whpx_accel_type);
1615 }
1616
1617 bool init_whp_dispatch(void)
1618 {
1619     if (whp_dispatch_initialized) {
1620         return true;
1621     }
1622
1623     if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
1624         goto error;
1625     }
1626
1627     if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
1628         goto error;
1629     }
1630
1631     assert(load_whp_dispatch_fns(&hWinHvPlatform,
1632         WINHV_PLATFORM_FNS_SUPPLEMENTAL));
1633     whp_dispatch_initialized = true;
1634
1635     return true;
1636 error:
1637     if (hWinHvPlatform) {
1638         FreeLibrary(hWinHvPlatform);
1639     }
1640
1641     if (hWinHvEmulation) {
1642         FreeLibrary(hWinHvEmulation);
1643     }
1644
1645     return false;
1646 }
1647
1648 type_init(whpx_type_init);
This page took 0.113552 seconds and 4 git commands to generate.