]>
Commit | Line | Data |
---|---|---|
549e984e SL |
1 | /* |
2 | * Copyright (c) 2003-2004 Fabrice Bellard | |
3 | * Copyright (c) 2019 Red Hat, Inc. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
6 | * of this software and associated documentation files (the "Software"), to deal | |
7 | * in the Software without restriction, including without limitation the rights | |
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
9 | * copies of the Software, and to permit persons to whom the Software is | |
10 | * furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
21 | * THE SOFTWARE. | |
22 | */ | |
23 | #include "qemu/osdep.h" | |
24 | #include "qemu/error-report.h" | |
25 | #include "qemu/option.h" | |
26 | #include "qemu/cutils.h" | |
27 | #include "qemu/units.h" | |
28 | #include "qemu-common.h" | |
29 | #include "qapi/error.h" | |
30 | #include "qapi/qmp/qerror.h" | |
31 | #include "qapi/qapi-visit-common.h" | |
32 | #include "qapi/visitor.h" | |
33 | #include "sysemu/qtest.h" | |
34 | #include "sysemu/numa.h" | |
35 | #include "sysemu/replay.h" | |
36 | #include "sysemu/sysemu.h" | |
89a289c7 | 37 | #include "trace.h" |
549e984e SL |
38 | |
39 | #include "hw/i386/x86.h" | |
549e984e SL |
40 | #include "target/i386/cpu.h" |
41 | #include "hw/i386/topology.h" | |
42 | #include "hw/i386/fw_cfg.h" | |
852c27e2 | 43 | #include "hw/intc/i8259.h" |
0cca1a91 | 44 | #include "hw/rtc/mc146818rtc.h" |
549e984e SL |
45 | |
46 | #include "hw/acpi/cpu_hotplug.h" | |
89a289c7 | 47 | #include "hw/irq.h" |
549e984e SL |
48 | #include "hw/nmi.h" |
49 | #include "hw/loader.h" | |
50 | #include "multiboot.h" | |
51 | #include "elf.h" | |
52 | #include "standard-headers/asm-x86/bootparam.h" | |
2becc36a | 53 | #include CONFIG_DEVICES |
89a289c7 | 54 | #include "kvm_i386.h" |
549e984e SL |
55 | |
56 | #define BIOS_FILENAME "bios.bin" | |
57 | ||
58 | /* Physical Address of PVH entry point read from kernel ELF NOTE */ | |
59 | static size_t pvh_start_addr; | |
60 | ||
53a5e7bd BM |
61 | inline void init_topo_info(X86CPUTopoInfo *topo_info, |
62 | const X86MachineState *x86ms) | |
63 | { | |
64 | MachineState *ms = MACHINE(x86ms); | |
65 | ||
66 | topo_info->dies_per_pkg = x86ms->smp_dies; | |
67 | topo_info->cores_per_die = ms->smp.cores; | |
68 | topo_info->threads_per_core = ms->smp.threads; | |
69 | } | |
70 | ||
549e984e SL |
71 | /* |
72 | * Calculates initial APIC ID for a specific CPU index | |
73 | * | |
74 | * Currently we need to be able to calculate the APIC ID from the CPU index | |
75 | * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have | |
76 | * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of | |
77 | * all CPUs up to max_cpus. | |
78 | */ | |
703a548a | 79 | uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, |
549e984e SL |
80 | unsigned int cpu_index) |
81 | { | |
f0bb276b | 82 | X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms); |
53a5e7bd | 83 | X86CPUTopoInfo topo_info; |
549e984e SL |
84 | uint32_t correct_id; |
85 | static bool warned; | |
86 | ||
53a5e7bd BM |
87 | init_topo_info(&topo_info, x86ms); |
88 | ||
dfe7ed0a | 89 | correct_id = x86_apicid_from_cpu_idx(&topo_info, cpu_index); |
f0bb276b | 90 | if (x86mc->compat_apic_id_mode) { |
549e984e SL |
91 | if (cpu_index != correct_id && !warned && !qtest_enabled()) { |
92 | error_report("APIC IDs set in compatibility mode, " | |
93 | "CPU topology won't match the configuration"); | |
94 | warned = true; | |
95 | } | |
96 | return cpu_index; | |
97 | } else { | |
98 | return correct_id; | |
99 | } | |
100 | } | |
101 | ||
703a548a SL |
102 | |
103 | void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) | |
549e984e | 104 | { |
18d588fe | 105 | Object *cpu = object_new(MACHINE(x86ms)->cpu_type); |
549e984e | 106 | |
992861fb | 107 | if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { |
18d588fe MA |
108 | goto out; |
109 | } | |
992861fb | 110 | qdev_realize(DEVICE(cpu), NULL, errp); |
549e984e | 111 | |
18d588fe | 112 | out: |
549e984e | 113 | object_unref(cpu); |
549e984e SL |
114 | } |
115 | ||
703a548a | 116 | void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) |
549e984e SL |
117 | { |
118 | int i; | |
119 | const CPUArchIdList *possible_cpus; | |
703a548a SL |
120 | MachineState *ms = MACHINE(x86ms); |
121 | MachineClass *mc = MACHINE_GET_CLASS(x86ms); | |
549e984e | 122 | |
703a548a | 123 | x86_cpu_set_default_version(default_cpu_version); |
549e984e SL |
124 | |
125 | /* | |
126 | * Calculates the limit to CPU APIC ID values | |
127 | * | |
128 | * Limit for the APIC ID value, so that all | |
703a548a | 129 | * CPU APIC IDs are < x86ms->apic_id_limit. |
549e984e SL |
130 | * |
131 | * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). | |
132 | */ | |
703a548a | 133 | x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, |
f0bb276b | 134 | ms->smp.max_cpus - 1) + 1; |
549e984e SL |
135 | possible_cpus = mc->possible_cpu_arch_ids(ms); |
136 | for (i = 0; i < ms->smp.cpus; i++) { | |
703a548a | 137 | x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); |
549e984e SL |
138 | } |
139 | } | |
0cca1a91 GH |
140 | |
141 | void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) | |
142 | { | |
143 | if (cpus_count > 0xff) { | |
144 | /* | |
145 | * If the number of CPUs can't be represented in 8 bits, the | |
146 | * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just | |
147 | * to make old BIOSes fail more predictably. | |
148 | */ | |
149 | rtc_set_memory(rtc, 0x5f, 0); | |
150 | } else { | |
151 | rtc_set_memory(rtc, 0x5f, cpus_count - 1); | |
152 | } | |
153 | } | |
154 | ||
155 | static int x86_apic_cmp(const void *a, const void *b) | |
156 | { | |
157 | CPUArchId *apic_a = (CPUArchId *)a; | |
158 | CPUArchId *apic_b = (CPUArchId *)b; | |
159 | ||
160 | return apic_a->arch_id - apic_b->arch_id; | |
161 | } | |
162 | ||
163 | /* | |
164 | * returns pointer to CPUArchId descriptor that matches CPU's apic_id | |
165 | * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no | |
166 | * entry corresponding to CPU's apic_id returns NULL. | |
167 | */ | |
168 | CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) | |
169 | { | |
170 | CPUArchId apic_id, *found_cpu; | |
171 | ||
172 | apic_id.arch_id = id; | |
173 | found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, | |
174 | ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), | |
175 | x86_apic_cmp); | |
176 | if (found_cpu && idx) { | |
177 | *idx = found_cpu - ms->possible_cpus->cpus; | |
178 | } | |
179 | return found_cpu; | |
180 | } | |
181 | ||
182 | void x86_cpu_plug(HotplugHandler *hotplug_dev, | |
183 | DeviceState *dev, Error **errp) | |
184 | { | |
185 | CPUArchId *found_cpu; | |
186 | Error *local_err = NULL; | |
187 | X86CPU *cpu = X86_CPU(dev); | |
188 | X86MachineState *x86ms = X86_MACHINE(hotplug_dev); | |
189 | ||
190 | if (x86ms->acpi_dev) { | |
191 | hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); | |
192 | if (local_err) { | |
193 | goto out; | |
194 | } | |
195 | } | |
196 | ||
197 | /* increment the number of CPUs */ | |
198 | x86ms->boot_cpus++; | |
199 | if (x86ms->rtc) { | |
200 | x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); | |
201 | } | |
202 | if (x86ms->fw_cfg) { | |
203 | fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); | |
204 | } | |
205 | ||
206 | found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); | |
207 | found_cpu->cpu = OBJECT(dev); | |
208 | out: | |
209 | error_propagate(errp, local_err); | |
210 | } | |
211 | ||
212 | void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, | |
213 | DeviceState *dev, Error **errp) | |
214 | { | |
215 | int idx = -1; | |
216 | X86CPU *cpu = X86_CPU(dev); | |
217 | X86MachineState *x86ms = X86_MACHINE(hotplug_dev); | |
218 | ||
219 | if (!x86ms->acpi_dev) { | |
220 | error_setg(errp, "CPU hot unplug not supported without ACPI"); | |
221 | return; | |
222 | } | |
223 | ||
224 | x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); | |
225 | assert(idx != -1); | |
226 | if (idx == 0) { | |
227 | error_setg(errp, "Boot CPU is unpluggable"); | |
228 | return; | |
229 | } | |
230 | ||
231 | hotplug_handler_unplug_request(x86ms->acpi_dev, dev, | |
232 | errp); | |
233 | } | |
234 | ||
235 | void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, | |
236 | DeviceState *dev, Error **errp) | |
237 | { | |
238 | CPUArchId *found_cpu; | |
239 | Error *local_err = NULL; | |
240 | X86CPU *cpu = X86_CPU(dev); | |
241 | X86MachineState *x86ms = X86_MACHINE(hotplug_dev); | |
242 | ||
243 | hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); | |
244 | if (local_err) { | |
245 | goto out; | |
246 | } | |
247 | ||
248 | found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); | |
249 | found_cpu->cpu = NULL; | |
250 | qdev_unrealize(dev); | |
251 | ||
252 | /* decrement the number of CPUs */ | |
253 | x86ms->boot_cpus--; | |
254 | /* Update the number of CPUs in CMOS */ | |
255 | x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); | |
256 | fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); | |
257 | out: | |
258 | error_propagate(errp, local_err); | |
259 | } | |
260 | ||
261 | void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, | |
262 | DeviceState *dev, Error **errp) | |
263 | { | |
264 | int idx; | |
265 | CPUState *cs; | |
266 | CPUArchId *cpu_slot; | |
267 | X86CPUTopoIDs topo_ids; | |
268 | X86CPU *cpu = X86_CPU(dev); | |
269 | CPUX86State *env = &cpu->env; | |
270 | MachineState *ms = MACHINE(hotplug_dev); | |
271 | X86MachineState *x86ms = X86_MACHINE(hotplug_dev); | |
272 | unsigned int smp_cores = ms->smp.cores; | |
273 | unsigned int smp_threads = ms->smp.threads; | |
274 | X86CPUTopoInfo topo_info; | |
275 | ||
276 | if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { | |
277 | error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", | |
278 | ms->cpu_type); | |
279 | return; | |
280 | } | |
281 | ||
c5be7517 IM |
282 | if (x86ms->acpi_dev) { |
283 | Error *local_err = NULL; | |
284 | ||
285 | hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, | |
286 | &local_err); | |
287 | if (local_err) { | |
288 | error_propagate(errp, local_err); | |
289 | return; | |
290 | } | |
291 | } | |
292 | ||
0cca1a91 GH |
293 | init_topo_info(&topo_info, x86ms); |
294 | ||
295 | env->nr_dies = x86ms->smp_dies; | |
296 | ||
297 | /* | |
298 | * If APIC ID is not set, | |
299 | * set it based on socket/die/core/thread properties. | |
300 | */ | |
301 | if (cpu->apic_id == UNASSIGNED_APIC_ID) { | |
302 | int max_socket = (ms->smp.max_cpus - 1) / | |
303 | smp_threads / smp_cores / x86ms->smp_dies; | |
304 | ||
305 | /* | |
306 | * die-id was optional in QEMU 4.0 and older, so keep it optional | |
307 | * if there's only one die per socket. | |
308 | */ | |
309 | if (cpu->die_id < 0 && x86ms->smp_dies == 1) { | |
310 | cpu->die_id = 0; | |
311 | } | |
312 | ||
313 | if (cpu->socket_id < 0) { | |
314 | error_setg(errp, "CPU socket-id is not set"); | |
315 | return; | |
316 | } else if (cpu->socket_id > max_socket) { | |
317 | error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", | |
318 | cpu->socket_id, max_socket); | |
319 | return; | |
320 | } | |
321 | if (cpu->die_id < 0) { | |
322 | error_setg(errp, "CPU die-id is not set"); | |
323 | return; | |
324 | } else if (cpu->die_id > x86ms->smp_dies - 1) { | |
325 | error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", | |
326 | cpu->die_id, x86ms->smp_dies - 1); | |
327 | return; | |
328 | } | |
329 | if (cpu->core_id < 0) { | |
330 | error_setg(errp, "CPU core-id is not set"); | |
331 | return; | |
332 | } else if (cpu->core_id > (smp_cores - 1)) { | |
333 | error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", | |
334 | cpu->core_id, smp_cores - 1); | |
335 | return; | |
336 | } | |
337 | if (cpu->thread_id < 0) { | |
338 | error_setg(errp, "CPU thread-id is not set"); | |
339 | return; | |
340 | } else if (cpu->thread_id > (smp_threads - 1)) { | |
341 | error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", | |
342 | cpu->thread_id, smp_threads - 1); | |
343 | return; | |
344 | } | |
345 | ||
346 | topo_ids.pkg_id = cpu->socket_id; | |
347 | topo_ids.die_id = cpu->die_id; | |
348 | topo_ids.core_id = cpu->core_id; | |
349 | topo_ids.smt_id = cpu->thread_id; | |
350 | cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); | |
351 | } | |
352 | ||
353 | cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); | |
354 | if (!cpu_slot) { | |
355 | MachineState *ms = MACHINE(x86ms); | |
356 | ||
357 | x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); | |
358 | error_setg(errp, | |
359 | "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" | |
360 | " APIC ID %" PRIu32 ", valid index range 0:%d", | |
361 | topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, | |
362 | cpu->apic_id, ms->possible_cpus->len - 1); | |
363 | return; | |
364 | } | |
365 | ||
366 | if (cpu_slot->cpu) { | |
367 | error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", | |
368 | idx, cpu->apic_id); | |
369 | return; | |
370 | } | |
371 | ||
372 | /* if 'address' properties socket-id/core-id/thread-id are not set, set them | |
373 | * so that machine_query_hotpluggable_cpus would show correct values | |
374 | */ | |
375 | /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() | |
376 | * once -smp refactoring is complete and there will be CPU private | |
377 | * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ | |
378 | x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); | |
379 | if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { | |
380 | error_setg(errp, "property socket-id: %u doesn't match set apic-id:" | |
381 | " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, | |
382 | topo_ids.pkg_id); | |
383 | return; | |
384 | } | |
385 | cpu->socket_id = topo_ids.pkg_id; | |
386 | ||
387 | if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { | |
388 | error_setg(errp, "property die-id: %u doesn't match set apic-id:" | |
389 | " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); | |
390 | return; | |
391 | } | |
392 | cpu->die_id = topo_ids.die_id; | |
393 | ||
394 | if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { | |
395 | error_setg(errp, "property core-id: %u doesn't match set apic-id:" | |
396 | " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, | |
397 | topo_ids.core_id); | |
398 | return; | |
399 | } | |
400 | cpu->core_id = topo_ids.core_id; | |
401 | ||
402 | if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { | |
403 | error_setg(errp, "property thread-id: %u doesn't match set apic-id:" | |
404 | " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, | |
405 | topo_ids.smt_id); | |
406 | return; | |
407 | } | |
408 | cpu->thread_id = topo_ids.smt_id; | |
409 | ||
410 | if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && | |
411 | !kvm_hv_vpindex_settable()) { | |
412 | error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); | |
413 | return; | |
414 | } | |
415 | ||
416 | cs = CPU(cpu); | |
417 | cs->cpu_index = idx; | |
418 | ||
419 | numa_cpu_pre_plug(cpu_slot, dev, errp); | |
420 | } | |
549e984e SL |
421 | |
422 | CpuInstanceProperties | |
423 | x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) | |
424 | { | |
425 | MachineClass *mc = MACHINE_GET_CLASS(ms); | |
426 | const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); | |
427 | ||
428 | assert(cpu_index < possible_cpus->len); | |
429 | return possible_cpus->cpus[cpu_index].props; | |
430 | } | |
431 | ||
432 | int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) | |
433 | { | |
dcf08bc6 | 434 | X86CPUTopoIDs topo_ids; |
f0bb276b | 435 | X86MachineState *x86ms = X86_MACHINE(ms); |
53a5e7bd BM |
436 | X86CPUTopoInfo topo_info; |
437 | ||
438 | init_topo_info(&topo_info, x86ms); | |
549e984e SL |
439 | |
440 | assert(idx < ms->possible_cpus->len); | |
dfe7ed0a BM |
441 | x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, |
442 | &topo_info, &topo_ids); | |
dcf08bc6 | 443 | return topo_ids.pkg_id % ms->numa_state->num_nodes; |
549e984e SL |
444 | } |
445 | ||
446 | const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) | |
447 | { | |
f0bb276b | 448 | X86MachineState *x86ms = X86_MACHINE(ms); |
549e984e | 449 | unsigned int max_cpus = ms->smp.max_cpus; |
53a5e7bd BM |
450 | X86CPUTopoInfo topo_info; |
451 | int i; | |
549e984e SL |
452 | |
453 | if (ms->possible_cpus) { | |
454 | /* | |
455 | * make sure that max_cpus hasn't changed since the first use, i.e. | |
456 | * -smp hasn't been parsed after it | |
457 | */ | |
458 | assert(ms->possible_cpus->len == max_cpus); | |
459 | return ms->possible_cpus; | |
460 | } | |
461 | ||
462 | ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + | |
463 | sizeof(CPUArchId) * max_cpus); | |
464 | ms->possible_cpus->len = max_cpus; | |
53a5e7bd BM |
465 | |
466 | init_topo_info(&topo_info, x86ms); | |
467 | ||
549e984e | 468 | for (i = 0; i < ms->possible_cpus->len; i++) { |
dcf08bc6 | 469 | X86CPUTopoIDs topo_ids; |
549e984e SL |
470 | |
471 | ms->possible_cpus->cpus[i].type = ms->cpu_type; | |
472 | ms->possible_cpus->cpus[i].vcpus_count = 1; | |
dfe7ed0a BM |
473 | ms->possible_cpus->cpus[i].arch_id = |
474 | x86_cpu_apic_id_from_index(x86ms, i); | |
475 | x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, | |
476 | &topo_info, &topo_ids); | |
549e984e | 477 | ms->possible_cpus->cpus[i].props.has_socket_id = true; |
dcf08bc6 | 478 | ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id; |
f0bb276b | 479 | if (x86ms->smp_dies > 1) { |
549e984e | 480 | ms->possible_cpus->cpus[i].props.has_die_id = true; |
dcf08bc6 | 481 | ms->possible_cpus->cpus[i].props.die_id = topo_ids.die_id; |
549e984e SL |
482 | } |
483 | ms->possible_cpus->cpus[i].props.has_core_id = true; | |
dcf08bc6 | 484 | ms->possible_cpus->cpus[i].props.core_id = topo_ids.core_id; |
549e984e | 485 | ms->possible_cpus->cpus[i].props.has_thread_id = true; |
dcf08bc6 | 486 | ms->possible_cpus->cpus[i].props.thread_id = topo_ids.smt_id; |
549e984e SL |
487 | } |
488 | return ms->possible_cpus; | |
489 | } | |
490 | ||
f0bb276b PB |
491 | static void x86_nmi(NMIState *n, int cpu_index, Error **errp) |
492 | { | |
493 | /* cpu index isn't used */ | |
494 | CPUState *cs; | |
495 | ||
496 | CPU_FOREACH(cs) { | |
497 | X86CPU *cpu = X86_CPU(cs); | |
498 | ||
499 | if (!cpu->apic_state) { | |
500 | cpu_interrupt(cs, CPU_INTERRUPT_NMI); | |
501 | } else { | |
502 | apic_deliver_nmi(cpu->apic_state); | |
503 | } | |
504 | } | |
505 | } | |
506 | ||
549e984e SL |
507 | static long get_file_size(FILE *f) |
508 | { | |
509 | long where, size; | |
510 | ||
511 | /* XXX: on Unix systems, using fstat() probably makes more sense */ | |
512 | ||
513 | where = ftell(f); | |
514 | fseek(f, 0, SEEK_END); | |
515 | size = ftell(f); | |
516 | fseek(f, where, SEEK_SET); | |
517 | ||
518 | return size; | |
519 | } | |
520 | ||
89a289c7 PB |
521 | /* TSC handling */ |
522 | uint64_t cpu_get_tsc(CPUX86State *env) | |
523 | { | |
524 | return cpu_get_ticks(); | |
525 | } | |
526 | ||
527 | /* IRQ handling */ | |
528 | static void pic_irq_request(void *opaque, int irq, int level) | |
529 | { | |
530 | CPUState *cs = first_cpu; | |
531 | X86CPU *cpu = X86_CPU(cs); | |
532 | ||
533 | trace_x86_pic_interrupt(irq, level); | |
534 | if (cpu->apic_state && !kvm_irqchip_in_kernel()) { | |
535 | CPU_FOREACH(cs) { | |
536 | cpu = X86_CPU(cs); | |
537 | if (apic_accept_pic_intr(cpu->apic_state)) { | |
538 | apic_deliver_pic_intr(cpu->apic_state, level); | |
539 | } | |
540 | } | |
541 | } else { | |
542 | if (level) { | |
543 | cpu_interrupt(cs, CPU_INTERRUPT_HARD); | |
544 | } else { | |
545 | cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); | |
546 | } | |
547 | } | |
548 | } | |
549 | ||
550 | qemu_irq x86_allocate_cpu_irq(void) | |
551 | { | |
552 | return qemu_allocate_irq(pic_irq_request, NULL, 0); | |
553 | } | |
554 | ||
555 | int cpu_get_pic_interrupt(CPUX86State *env) | |
556 | { | |
557 | X86CPU *cpu = env_archcpu(env); | |
558 | int intno; | |
559 | ||
560 | if (!kvm_irqchip_in_kernel()) { | |
561 | intno = apic_get_interrupt(cpu->apic_state); | |
562 | if (intno >= 0) { | |
563 | return intno; | |
564 | } | |
565 | /* read the irq from the PIC */ | |
566 | if (!apic_accept_pic_intr(cpu->apic_state)) { | |
567 | return -1; | |
568 | } | |
569 | } | |
570 | ||
571 | intno = pic_read_irq(isa_pic); | |
572 | return intno; | |
573 | } | |
574 | ||
575 | DeviceState *cpu_get_current_apic(void) | |
576 | { | |
577 | if (current_cpu) { | |
578 | X86CPU *cpu = X86_CPU(current_cpu); | |
579 | return cpu->apic_state; | |
580 | } else { | |
581 | return NULL; | |
582 | } | |
583 | } | |
584 | ||
585 | void gsi_handler(void *opaque, int n, int level) | |
586 | { | |
587 | GSIState *s = opaque; | |
588 | ||
589 | trace_x86_gsi_interrupt(n, level); | |
590 | if (n < ISA_NUM_IRQS) { | |
64c033ba | 591 | /* Under KVM, Kernel will forward to both PIC and IOAPIC */ |
89a289c7 PB |
592 | qemu_set_irq(s->i8259_irq[n], level); |
593 | } | |
594 | qemu_set_irq(s->ioapic_irq[n], level); | |
595 | } | |
596 | ||
597 | void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) | |
598 | { | |
599 | DeviceState *dev; | |
600 | SysBusDevice *d; | |
601 | unsigned int i; | |
602 | ||
14a1bb48 | 603 | assert(parent_name); |
89a289c7 | 604 | if (kvm_ioapic_in_kernel()) { |
3e80f690 | 605 | dev = qdev_new(TYPE_KVM_IOAPIC); |
89a289c7 | 606 | } else { |
3e80f690 | 607 | dev = qdev_new(TYPE_IOAPIC); |
89a289c7 | 608 | } |
14a1bb48 | 609 | object_property_add_child(object_resolve_path(parent_name, NULL), |
d2623129 | 610 | "ioapic", OBJECT(dev)); |
89a289c7 | 611 | d = SYS_BUS_DEVICE(dev); |
3c6ef471 | 612 | sysbus_realize_and_unref(d, &error_fatal); |
89a289c7 PB |
613 | sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); |
614 | ||
615 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | |
616 | gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); | |
617 | } | |
618 | } | |
619 | ||
549e984e SL |
620 | struct setup_data { |
621 | uint64_t next; | |
622 | uint32_t type; | |
623 | uint32_t len; | |
f7795e40 | 624 | uint8_t data[]; |
549e984e SL |
625 | } __attribute__((packed)); |
626 | ||
627 | ||
628 | /* | |
629 | * The entry point into the kernel for PVH boot is different from | |
630 | * the native entry point. The PVH entry is defined by the x86/HVM | |
631 | * direct boot ABI and is available in an ELFNOTE in the kernel binary. | |
632 | * | |
633 | * This function is passed to load_elf() when it is called from | |
634 | * load_elfboot() which then additionally checks for an ELF Note of | |
635 | * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to | |
636 | * parse the PVH entry address from the ELF Note. | |
637 | * | |
638 | * Due to trickery in elf_opts.h, load_elf() is actually available as | |
639 | * load_elf32() or load_elf64() and this routine needs to be able | |
640 | * to deal with being called as 32 or 64 bit. | |
641 | * | |
642 | * The address of the PVH entry point is saved to the 'pvh_start_addr' | |
643 | * global variable. (although the entry point is 32-bit, the kernel | |
644 | * binary can be either 32-bit or 64-bit). | |
645 | */ | |
646 | static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) | |
647 | { | |
648 | size_t *elf_note_data_addr; | |
649 | ||
650 | /* Check if ELF Note header passed in is valid */ | |
651 | if (arg1 == NULL) { | |
652 | return 0; | |
653 | } | |
654 | ||
655 | if (is64) { | |
656 | struct elf64_note *nhdr64 = (struct elf64_note *)arg1; | |
657 | uint64_t nhdr_size64 = sizeof(struct elf64_note); | |
658 | uint64_t phdr_align = *(uint64_t *)arg2; | |
659 | uint64_t nhdr_namesz = nhdr64->n_namesz; | |
660 | ||
661 | elf_note_data_addr = | |
662 | ((void *)nhdr64) + nhdr_size64 + | |
663 | QEMU_ALIGN_UP(nhdr_namesz, phdr_align); | |
664 | } else { | |
665 | struct elf32_note *nhdr32 = (struct elf32_note *)arg1; | |
666 | uint32_t nhdr_size32 = sizeof(struct elf32_note); | |
667 | uint32_t phdr_align = *(uint32_t *)arg2; | |
668 | uint32_t nhdr_namesz = nhdr32->n_namesz; | |
669 | ||
670 | elf_note_data_addr = | |
671 | ((void *)nhdr32) + nhdr_size32 + | |
672 | QEMU_ALIGN_UP(nhdr_namesz, phdr_align); | |
673 | } | |
674 | ||
675 | pvh_start_addr = *elf_note_data_addr; | |
676 | ||
677 | return pvh_start_addr; | |
678 | } | |
679 | ||
680 | static bool load_elfboot(const char *kernel_filename, | |
681 | int kernel_file_size, | |
682 | uint8_t *header, | |
683 | size_t pvh_xen_start_addr, | |
684 | FWCfgState *fw_cfg) | |
685 | { | |
686 | uint32_t flags = 0; | |
687 | uint32_t mh_load_addr = 0; | |
688 | uint32_t elf_kernel_size = 0; | |
689 | uint64_t elf_entry; | |
690 | uint64_t elf_low, elf_high; | |
691 | int kernel_size; | |
692 | ||
693 | if (ldl_p(header) != 0x464c457f) { | |
694 | return false; /* no elfboot */ | |
695 | } | |
696 | ||
697 | bool elf_is64 = header[EI_CLASS] == ELFCLASS64; | |
698 | flags = elf_is64 ? | |
699 | ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; | |
700 | ||
701 | if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ | |
702 | error_report("elfboot unsupported flags = %x", flags); | |
703 | exit(1); | |
704 | } | |
705 | ||
706 | uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; | |
707 | kernel_size = load_elf(kernel_filename, read_pvh_start_addr, | |
708 | NULL, &elf_note_type, &elf_entry, | |
6cdda0ff | 709 | &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, |
549e984e SL |
710 | 0, 0); |
711 | ||
712 | if (kernel_size < 0) { | |
713 | error_report("Error while loading elf kernel"); | |
714 | exit(1); | |
715 | } | |
716 | mh_load_addr = elf_low; | |
717 | elf_kernel_size = elf_high - elf_low; | |
718 | ||
719 | if (pvh_start_addr == 0) { | |
720 | error_report("Error loading uncompressed kernel without PVH ELF Note"); | |
721 | exit(1); | |
722 | } | |
723 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); | |
724 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); | |
725 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); | |
726 | ||
727 | return true; | |
728 | } | |
729 | ||
703a548a SL |
730 | void x86_load_linux(X86MachineState *x86ms, |
731 | FWCfgState *fw_cfg, | |
732 | int acpi_data_size, | |
733 | bool pvh_enabled, | |
734 | bool linuxboot_dma_enabled) | |
549e984e SL |
735 | { |
736 | uint16_t protocol; | |
737 | int setup_size, kernel_size, cmdline_size; | |
738 | int dtb_size, setup_data_offset; | |
739 | uint32_t initrd_max; | |
740 | uint8_t header[8192], *setup, *kernel; | |
741 | hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; | |
742 | FILE *f; | |
743 | char *vmode; | |
703a548a | 744 | MachineState *machine = MACHINE(x86ms); |
549e984e SL |
745 | struct setup_data *setup_data; |
746 | const char *kernel_filename = machine->kernel_filename; | |
747 | const char *initrd_filename = machine->initrd_filename; | |
748 | const char *dtb_filename = machine->dtb; | |
749 | const char *kernel_cmdline = machine->kernel_cmdline; | |
750 | ||
751 | /* Align to 16 bytes as a paranoia measure */ | |
752 | cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; | |
753 | ||
754 | /* load the kernel header */ | |
755 | f = fopen(kernel_filename, "rb"); | |
756 | if (!f) { | |
757 | fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", | |
758 | kernel_filename, strerror(errno)); | |
759 | exit(1); | |
760 | } | |
761 | ||
762 | kernel_size = get_file_size(f); | |
763 | if (!kernel_size || | |
764 | fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != | |
765 | MIN(ARRAY_SIZE(header), kernel_size)) { | |
766 | fprintf(stderr, "qemu: could not load kernel '%s': %s\n", | |
767 | kernel_filename, strerror(errno)); | |
768 | exit(1); | |
769 | } | |
770 | ||
771 | /* kernel protocol version */ | |
772 | if (ldl_p(header + 0x202) == 0x53726448) { | |
773 | protocol = lduw_p(header + 0x206); | |
774 | } else { | |
775 | /* | |
776 | * This could be a multiboot kernel. If it is, let's stop treating it | |
777 | * like a Linux kernel. | |
778 | * Note: some multiboot images could be in the ELF format (the same of | |
779 | * PVH), so we try multiboot first since we check the multiboot magic | |
780 | * header before to load it. | |
781 | */ | |
782 | if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, | |
783 | kernel_cmdline, kernel_size, header)) { | |
784 | return; | |
785 | } | |
786 | /* | |
787 | * Check if the file is an uncompressed kernel file (ELF) and load it, | |
788 | * saving the PVH entry point used by the x86/HVM direct boot ABI. | |
789 | * If load_elfboot() is successful, populate the fw_cfg info. | |
790 | */ | |
703a548a | 791 | if (pvh_enabled && |
549e984e SL |
792 | load_elfboot(kernel_filename, kernel_size, |
793 | header, pvh_start_addr, fw_cfg)) { | |
794 | fclose(f); | |
795 | ||
796 | fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, | |
797 | strlen(kernel_cmdline) + 1); | |
798 | fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); | |
799 | ||
800 | fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); | |
801 | fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, | |
802 | header, sizeof(header)); | |
803 | ||
804 | /* load initrd */ | |
805 | if (initrd_filename) { | |
806 | GMappedFile *mapped_file; | |
807 | gsize initrd_size; | |
808 | gchar *initrd_data; | |
809 | GError *gerr = NULL; | |
810 | ||
811 | mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); | |
812 | if (!mapped_file) { | |
813 | fprintf(stderr, "qemu: error reading initrd %s: %s\n", | |
814 | initrd_filename, gerr->message); | |
815 | exit(1); | |
816 | } | |
f0bb276b | 817 | x86ms->initrd_mapped_file = mapped_file; |
549e984e SL |
818 | |
819 | initrd_data = g_mapped_file_get_contents(mapped_file); | |
820 | initrd_size = g_mapped_file_get_length(mapped_file); | |
703a548a | 821 | initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; |
549e984e SL |
822 | if (initrd_size >= initrd_max) { |
823 | fprintf(stderr, "qemu: initrd is too large, cannot support." | |
824 | "(max: %"PRIu32", need %"PRId64")\n", | |
825 | initrd_max, (uint64_t)initrd_size); | |
826 | exit(1); | |
827 | } | |
828 | ||
829 | initrd_addr = (initrd_max - initrd_size) & ~4095; | |
830 | ||
831 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); | |
832 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); | |
833 | fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, | |
834 | initrd_size); | |
835 | } | |
836 | ||
837 | option_rom[nb_option_roms].bootindex = 0; | |
838 | option_rom[nb_option_roms].name = "pvh.bin"; | |
839 | nb_option_roms++; | |
840 | ||
841 | return; | |
842 | } | |
843 | protocol = 0; | |
844 | } | |
845 | ||
846 | if (protocol < 0x200 || !(header[0x211] & 0x01)) { | |
847 | /* Low kernel */ | |
848 | real_addr = 0x90000; | |
849 | cmdline_addr = 0x9a000 - cmdline_size; | |
850 | prot_addr = 0x10000; | |
851 | } else if (protocol < 0x202) { | |
852 | /* High but ancient kernel */ | |
853 | real_addr = 0x90000; | |
854 | cmdline_addr = 0x9a000 - cmdline_size; | |
855 | prot_addr = 0x100000; | |
856 | } else { | |
857 | /* High and recent kernel */ | |
858 | real_addr = 0x10000; | |
859 | cmdline_addr = 0x20000; | |
860 | prot_addr = 0x100000; | |
861 | } | |
862 | ||
863 | /* highest address for loading the initrd */ | |
864 | if (protocol >= 0x20c && | |
865 | lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { | |
866 | /* | |
867 | * Linux has supported initrd up to 4 GB for a very long time (2007, | |
868 | * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), | |
869 | * though it only sets initrd_max to 2 GB to "work around bootloader | |
870 | * bugs". Luckily, QEMU firmware(which does something like bootloader) | |
871 | * has supported this. | |
872 | * | |
873 | * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can | |
874 | * be loaded into any address. | |
875 | * | |
876 | * In addition, initrd_max is uint32_t simply because QEMU doesn't | |
877 | * support the 64-bit boot protocol (specifically the ext_ramdisk_image | |
878 | * field). | |
879 | * | |
880 | * Therefore here just limit initrd_max to UINT32_MAX simply as well. | |
881 | */ | |
882 | initrd_max = UINT32_MAX; | |
883 | } else if (protocol >= 0x203) { | |
884 | initrd_max = ldl_p(header + 0x22c); | |
885 | } else { | |
886 | initrd_max = 0x37ffffff; | |
887 | } | |
888 | ||
703a548a SL |
889 | if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { |
890 | initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; | |
549e984e SL |
891 | } |
892 | ||
893 | fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); | |
894 | fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); | |
895 | fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); | |
896 | ||
897 | if (protocol >= 0x202) { | |
898 | stl_p(header + 0x228, cmdline_addr); | |
899 | } else { | |
900 | stw_p(header + 0x20, 0xA33F); | |
901 | stw_p(header + 0x22, cmdline_addr - real_addr); | |
902 | } | |
903 | ||
904 | /* handle vga= parameter */ | |
905 | vmode = strstr(kernel_cmdline, "vga="); | |
906 | if (vmode) { | |
907 | unsigned int video_mode; | |
a88c40f0 | 908 | const char *end; |
549e984e SL |
909 | int ret; |
910 | /* skip "vga=" */ | |
911 | vmode += 4; | |
912 | if (!strncmp(vmode, "normal", 6)) { | |
913 | video_mode = 0xffff; | |
914 | } else if (!strncmp(vmode, "ext", 3)) { | |
915 | video_mode = 0xfffe; | |
916 | } else if (!strncmp(vmode, "ask", 3)) { | |
917 | video_mode = 0xfffd; | |
918 | } else { | |
a88c40f0 PW |
919 | ret = qemu_strtoui(vmode, &end, 0, &video_mode); |
920 | if (ret != 0 || (*end && *end != ' ')) { | |
921 | fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); | |
549e984e SL |
922 | exit(1); |
923 | } | |
924 | } | |
925 | stw_p(header + 0x1fa, video_mode); | |
926 | } | |
927 | ||
928 | /* loader type */ | |
929 | /* | |
930 | * High nybble = B reserved for QEMU; low nybble is revision number. | |
931 | * If this code is substantially changed, you may want to consider | |
932 | * incrementing the revision. | |
933 | */ | |
934 | if (protocol >= 0x200) { | |
935 | header[0x210] = 0xB0; | |
936 | } | |
937 | /* heap */ | |
938 | if (protocol >= 0x201) { | |
939 | header[0x211] |= 0x80; /* CAN_USE_HEAP */ | |
940 | stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); | |
941 | } | |
942 | ||
943 | /* load initrd */ | |
944 | if (initrd_filename) { | |
945 | GMappedFile *mapped_file; | |
946 | gsize initrd_size; | |
947 | gchar *initrd_data; | |
948 | GError *gerr = NULL; | |
949 | ||
950 | if (protocol < 0x200) { | |
951 | fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); | |
952 | exit(1); | |
953 | } | |
954 | ||
955 | mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); | |
956 | if (!mapped_file) { | |
957 | fprintf(stderr, "qemu: error reading initrd %s: %s\n", | |
958 | initrd_filename, gerr->message); | |
959 | exit(1); | |
960 | } | |
f0bb276b | 961 | x86ms->initrd_mapped_file = mapped_file; |
549e984e SL |
962 | |
963 | initrd_data = g_mapped_file_get_contents(mapped_file); | |
964 | initrd_size = g_mapped_file_get_length(mapped_file); | |
965 | if (initrd_size >= initrd_max) { | |
966 | fprintf(stderr, "qemu: initrd is too large, cannot support." | |
967 | "(max: %"PRIu32", need %"PRId64")\n", | |
968 | initrd_max, (uint64_t)initrd_size); | |
969 | exit(1); | |
970 | } | |
971 | ||
972 | initrd_addr = (initrd_max - initrd_size) & ~4095; | |
973 | ||
974 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); | |
975 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); | |
976 | fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); | |
977 | ||
978 | stl_p(header + 0x218, initrd_addr); | |
979 | stl_p(header + 0x21c, initrd_size); | |
980 | } | |
981 | ||
982 | /* load kernel and setup */ | |
983 | setup_size = header[0x1f1]; | |
984 | if (setup_size == 0) { | |
985 | setup_size = 4; | |
986 | } | |
987 | setup_size = (setup_size + 1) * 512; | |
988 | if (setup_size > kernel_size) { | |
989 | fprintf(stderr, "qemu: invalid kernel header\n"); | |
990 | exit(1); | |
991 | } | |
992 | kernel_size -= setup_size; | |
993 | ||
994 | setup = g_malloc(setup_size); | |
995 | kernel = g_malloc(kernel_size); | |
996 | fseek(f, 0, SEEK_SET); | |
997 | if (fread(setup, 1, setup_size, f) != setup_size) { | |
998 | fprintf(stderr, "fread() failed\n"); | |
999 | exit(1); | |
1000 | } | |
1001 | if (fread(kernel, 1, kernel_size, f) != kernel_size) { | |
1002 | fprintf(stderr, "fread() failed\n"); | |
1003 | exit(1); | |
1004 | } | |
1005 | fclose(f); | |
1006 | ||
1007 | /* append dtb to kernel */ | |
1008 | if (dtb_filename) { | |
1009 | if (protocol < 0x209) { | |
1010 | fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); | |
1011 | exit(1); | |
1012 | } | |
1013 | ||
1014 | dtb_size = get_image_size(dtb_filename); | |
1015 | if (dtb_size <= 0) { | |
1016 | fprintf(stderr, "qemu: error reading dtb %s: %s\n", | |
1017 | dtb_filename, strerror(errno)); | |
1018 | exit(1); | |
1019 | } | |
1020 | ||
1021 | setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); | |
1022 | kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; | |
1023 | kernel = g_realloc(kernel, kernel_size); | |
1024 | ||
1025 | stq_p(header + 0x250, prot_addr + setup_data_offset); | |
1026 | ||
1027 | setup_data = (struct setup_data *)(kernel + setup_data_offset); | |
1028 | setup_data->next = 0; | |
1029 | setup_data->type = cpu_to_le32(SETUP_DTB); | |
1030 | setup_data->len = cpu_to_le32(dtb_size); | |
1031 | ||
1032 | load_image_size(dtb_filename, setup_data->data, dtb_size); | |
1033 | } | |
1034 | ||
1035 | memcpy(setup, header, MIN(sizeof(header), setup_size)); | |
1036 | ||
1037 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); | |
1038 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); | |
1039 | fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); | |
1040 | ||
1041 | fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); | |
1042 | fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); | |
1043 | fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); | |
1044 | ||
1045 | option_rom[nb_option_roms].bootindex = 0; | |
1046 | option_rom[nb_option_roms].name = "linuxboot.bin"; | |
703a548a | 1047 | if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { |
549e984e SL |
1048 | option_rom[nb_option_roms].name = "linuxboot_dma.bin"; |
1049 | } | |
1050 | nb_option_roms++; | |
1051 | } | |
1052 | ||
1053 | void x86_bios_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) | |
1054 | { | |
1055 | char *filename; | |
1056 | MemoryRegion *bios, *isa_bios; | |
1057 | int bios_size, isa_bios_size; | |
1058 | int ret; | |
1059 | ||
1060 | /* BIOS load */ | |
1061 | if (bios_name == NULL) { | |
1062 | bios_name = BIOS_FILENAME; | |
1063 | } | |
1064 | filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); | |
1065 | if (filename) { | |
1066 | bios_size = get_image_size(filename); | |
1067 | } else { | |
1068 | bios_size = -1; | |
1069 | } | |
1070 | if (bios_size <= 0 || | |
1071 | (bios_size % 65536) != 0) { | |
1072 | goto bios_error; | |
1073 | } | |
1074 | bios = g_malloc(sizeof(*bios)); | |
1075 | memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); | |
1076 | if (!isapc_ram_fw) { | |
1077 | memory_region_set_readonly(bios, true); | |
1078 | } | |
1079 | ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); | |
1080 | if (ret != 0) { | |
1081 | bios_error: | |
1082 | fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); | |
1083 | exit(1); | |
1084 | } | |
1085 | g_free(filename); | |
1086 | ||
1087 | /* map the last 128KB of the BIOS in ISA space */ | |
1088 | isa_bios_size = MIN(bios_size, 128 * KiB); | |
1089 | isa_bios = g_malloc(sizeof(*isa_bios)); | |
1090 | memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, | |
1091 | bios_size - isa_bios_size, isa_bios_size); | |
1092 | memory_region_add_subregion_overlap(rom_memory, | |
1093 | 0x100000 - isa_bios_size, | |
1094 | isa_bios, | |
1095 | 1); | |
1096 | if (!isapc_ram_fw) { | |
1097 | memory_region_set_readonly(isa_bios, true); | |
1098 | } | |
1099 | ||
1100 | /* map all the bios at the top of memory */ | |
1101 | memory_region_add_subregion(rom_memory, | |
1102 | (uint32_t)(-bios_size), | |
1103 | bios); | |
1104 | } | |
f0bb276b | 1105 | |
9927a632 | 1106 | bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) |
ed9e923c PB |
1107 | { |
1108 | bool smm_available = false; | |
1109 | ||
1110 | if (x86ms->smm == ON_OFF_AUTO_OFF) { | |
1111 | return false; | |
1112 | } | |
1113 | ||
1114 | if (tcg_enabled() || qtest_enabled()) { | |
1115 | smm_available = true; | |
1116 | } else if (kvm_enabled()) { | |
1117 | smm_available = kvm_has_smm(); | |
1118 | } | |
1119 | ||
1120 | if (smm_available) { | |
1121 | return true; | |
1122 | } | |
1123 | ||
1124 | if (x86ms->smm == ON_OFF_AUTO_ON) { | |
1125 | error_report("System Management Mode not supported by this hypervisor."); | |
1126 | exit(1); | |
1127 | } | |
1128 | return false; | |
1129 | } | |
1130 | ||
1131 | static void x86_machine_get_smm(Object *obj, Visitor *v, const char *name, | |
1132 | void *opaque, Error **errp) | |
1133 | { | |
1134 | X86MachineState *x86ms = X86_MACHINE(obj); | |
1135 | OnOffAuto smm = x86ms->smm; | |
1136 | ||
1137 | visit_type_OnOffAuto(v, name, &smm, errp); | |
1138 | } | |
1139 | ||
1140 | static void x86_machine_set_smm(Object *obj, Visitor *v, const char *name, | |
1141 | void *opaque, Error **errp) | |
1142 | { | |
1143 | X86MachineState *x86ms = X86_MACHINE(obj); | |
1144 | ||
1145 | visit_type_OnOffAuto(v, name, &x86ms->smm, errp); | |
1146 | } | |
1147 | ||
9927a632 | 1148 | bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms) |
17e89077 GH |
1149 | { |
1150 | if (x86ms->acpi == ON_OFF_AUTO_OFF) { | |
1151 | return false; | |
1152 | } | |
1153 | return true; | |
1154 | } | |
1155 | ||
1156 | static void x86_machine_get_acpi(Object *obj, Visitor *v, const char *name, | |
1157 | void *opaque, Error **errp) | |
1158 | { | |
1159 | X86MachineState *x86ms = X86_MACHINE(obj); | |
1160 | OnOffAuto acpi = x86ms->acpi; | |
1161 | ||
1162 | visit_type_OnOffAuto(v, name, &acpi, errp); | |
1163 | } | |
1164 | ||
1165 | static void x86_machine_set_acpi(Object *obj, Visitor *v, const char *name, | |
1166 | void *opaque, Error **errp) | |
1167 | { | |
1168 | X86MachineState *x86ms = X86_MACHINE(obj); | |
1169 | ||
1170 | visit_type_OnOffAuto(v, name, &x86ms->acpi, errp); | |
1171 | } | |
1172 | ||
f0bb276b PB |
1173 | static void x86_machine_initfn(Object *obj) |
1174 | { | |
1175 | X86MachineState *x86ms = X86_MACHINE(obj); | |
1176 | ||
ed9e923c | 1177 | x86ms->smm = ON_OFF_AUTO_AUTO; |
17e89077 | 1178 | x86ms->acpi = ON_OFF_AUTO_AUTO; |
f0bb276b PB |
1179 | x86ms->smp_dies = 1; |
1180 | } | |
1181 | ||
1182 | static void x86_machine_class_init(ObjectClass *oc, void *data) | |
1183 | { | |
1184 | MachineClass *mc = MACHINE_CLASS(oc); | |
1185 | X86MachineClass *x86mc = X86_MACHINE_CLASS(oc); | |
1186 | NMIClass *nc = NMI_CLASS(oc); | |
1187 | ||
1188 | mc->cpu_index_to_instance_props = x86_cpu_index_to_props; | |
1189 | mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; | |
1190 | mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; | |
1191 | x86mc->compat_apic_id_mode = false; | |
2f34ebf2 | 1192 | x86mc->save_tsc_khz = true; |
f0bb276b PB |
1193 | nc->nmi_monitor_handler = x86_nmi; |
1194 | ||
ed9e923c PB |
1195 | object_class_property_add(oc, X86_MACHINE_SMM, "OnOffAuto", |
1196 | x86_machine_get_smm, x86_machine_set_smm, | |
d2623129 | 1197 | NULL, NULL); |
ed9e923c | 1198 | object_class_property_set_description(oc, X86_MACHINE_SMM, |
7eecec7d | 1199 | "Enable SMM"); |
17e89077 GH |
1200 | |
1201 | object_class_property_add(oc, X86_MACHINE_ACPI, "OnOffAuto", | |
1202 | x86_machine_get_acpi, x86_machine_set_acpi, | |
d2623129 | 1203 | NULL, NULL); |
17e89077 | 1204 | object_class_property_set_description(oc, X86_MACHINE_ACPI, |
7eecec7d | 1205 | "Enable ACPI"); |
f0bb276b PB |
1206 | } |
1207 | ||
1208 | static const TypeInfo x86_machine_info = { | |
1209 | .name = TYPE_X86_MACHINE, | |
1210 | .parent = TYPE_MACHINE, | |
1211 | .abstract = true, | |
1212 | .instance_size = sizeof(X86MachineState), | |
1213 | .instance_init = x86_machine_initfn, | |
1214 | .class_size = sizeof(X86MachineClass), | |
1215 | .class_init = x86_machine_class_init, | |
1216 | .interfaces = (InterfaceInfo[]) { | |
1217 | { TYPE_NMI }, | |
1218 | { } | |
1219 | }, | |
1220 | }; | |
1221 | ||
1222 | static void x86_machine_register_types(void) | |
1223 | { | |
1224 | type_register_static(&x86_machine_info); | |
1225 | } | |
1226 | ||
1227 | type_init(x86_machine_register_types) |