]>
Commit | Line | Data |
---|---|---|
549e984e SL |
1 | /* |
2 | * Copyright (c) 2003-2004 Fabrice Bellard | |
3 | * Copyright (c) 2019 Red Hat, Inc. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
6 | * of this software and associated documentation files (the "Software"), to deal | |
7 | * in the Software without restriction, including without limitation the rights | |
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
9 | * copies of the Software, and to permit persons to whom the Software is | |
10 | * furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
21 | * THE SOFTWARE. | |
22 | */ | |
23 | #include "qemu/osdep.h" | |
24 | #include "qemu/error-report.h" | |
25 | #include "qemu/option.h" | |
26 | #include "qemu/cutils.h" | |
27 | #include "qemu/units.h" | |
28 | #include "qemu-common.h" | |
29 | #include "qapi/error.h" | |
30 | #include "qapi/qmp/qerror.h" | |
31 | #include "qapi/qapi-visit-common.h" | |
32 | #include "qapi/visitor.h" | |
33 | #include "sysemu/qtest.h" | |
34 | #include "sysemu/numa.h" | |
35 | #include "sysemu/replay.h" | |
36 | #include "sysemu/sysemu.h" | |
37 | ||
38 | #include "hw/i386/x86.h" | |
549e984e SL |
39 | #include "target/i386/cpu.h" |
40 | #include "hw/i386/topology.h" | |
41 | #include "hw/i386/fw_cfg.h" | |
852c27e2 | 42 | #include "hw/intc/i8259.h" |
549e984e SL |
43 | |
44 | #include "hw/acpi/cpu_hotplug.h" | |
45 | #include "hw/nmi.h" | |
46 | #include "hw/loader.h" | |
47 | #include "multiboot.h" | |
48 | #include "elf.h" | |
49 | #include "standard-headers/asm-x86/bootparam.h" | |
50 | ||
51 | #define BIOS_FILENAME "bios.bin" | |
52 | ||
53 | /* Physical Address of PVH entry point read from kernel ELF NOTE */ | |
54 | static size_t pvh_start_addr; | |
55 | ||
56 | /* | |
57 | * Calculates initial APIC ID for a specific CPU index | |
58 | * | |
59 | * Currently we need to be able to calculate the APIC ID from the CPU index | |
60 | * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have | |
61 | * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of | |
62 | * all CPUs up to max_cpus. | |
63 | */ | |
703a548a | 64 | uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, |
549e984e SL |
65 | unsigned int cpu_index) |
66 | { | |
703a548a | 67 | MachineState *ms = MACHINE(x86ms); |
f0bb276b | 68 | X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms); |
549e984e SL |
69 | uint32_t correct_id; |
70 | static bool warned; | |
71 | ||
f0bb276b | 72 | correct_id = x86_apicid_from_cpu_idx(x86ms->smp_dies, ms->smp.cores, |
549e984e | 73 | ms->smp.threads, cpu_index); |
f0bb276b | 74 | if (x86mc->compat_apic_id_mode) { |
549e984e SL |
75 | if (cpu_index != correct_id && !warned && !qtest_enabled()) { |
76 | error_report("APIC IDs set in compatibility mode, " | |
77 | "CPU topology won't match the configuration"); | |
78 | warned = true; | |
79 | } | |
80 | return cpu_index; | |
81 | } else { | |
82 | return correct_id; | |
83 | } | |
84 | } | |
85 | ||
703a548a SL |
86 | |
87 | void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) | |
549e984e SL |
88 | { |
89 | Object *cpu = NULL; | |
90 | Error *local_err = NULL; | |
91 | CPUX86State *env = NULL; | |
92 | ||
703a548a | 93 | cpu = object_new(MACHINE(x86ms)->cpu_type); |
549e984e SL |
94 | |
95 | env = &X86_CPU(cpu)->env; | |
f0bb276b | 96 | env->nr_dies = x86ms->smp_dies; |
549e984e SL |
97 | |
98 | object_property_set_uint(cpu, apic_id, "apic-id", &local_err); | |
99 | object_property_set_bool(cpu, true, "realized", &local_err); | |
100 | ||
101 | object_unref(cpu); | |
102 | error_propagate(errp, local_err); | |
103 | } | |
104 | ||
703a548a | 105 | void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) |
549e984e SL |
106 | { |
107 | int i; | |
108 | const CPUArchIdList *possible_cpus; | |
703a548a SL |
109 | MachineState *ms = MACHINE(x86ms); |
110 | MachineClass *mc = MACHINE_GET_CLASS(x86ms); | |
549e984e | 111 | |
703a548a | 112 | x86_cpu_set_default_version(default_cpu_version); |
549e984e SL |
113 | |
114 | /* | |
115 | * Calculates the limit to CPU APIC ID values | |
116 | * | |
117 | * Limit for the APIC ID value, so that all | |
703a548a | 118 | * CPU APIC IDs are < x86ms->apic_id_limit. |
549e984e SL |
119 | * |
120 | * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). | |
121 | */ | |
703a548a | 122 | x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, |
f0bb276b | 123 | ms->smp.max_cpus - 1) + 1; |
549e984e SL |
124 | possible_cpus = mc->possible_cpu_arch_ids(ms); |
125 | for (i = 0; i < ms->smp.cpus; i++) { | |
703a548a | 126 | x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); |
549e984e SL |
127 | } |
128 | } | |
129 | ||
130 | CpuInstanceProperties | |
131 | x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) | |
132 | { | |
133 | MachineClass *mc = MACHINE_GET_CLASS(ms); | |
134 | const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); | |
135 | ||
136 | assert(cpu_index < possible_cpus->len); | |
137 | return possible_cpus->cpus[cpu_index].props; | |
138 | } | |
139 | ||
140 | int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) | |
141 | { | |
142 | X86CPUTopoInfo topo; | |
f0bb276b | 143 | X86MachineState *x86ms = X86_MACHINE(ms); |
549e984e SL |
144 | |
145 | assert(idx < ms->possible_cpus->len); | |
146 | x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, | |
f0bb276b | 147 | x86ms->smp_dies, ms->smp.cores, |
549e984e SL |
148 | ms->smp.threads, &topo); |
149 | return topo.pkg_id % ms->numa_state->num_nodes; | |
150 | } | |
151 | ||
152 | const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) | |
153 | { | |
f0bb276b | 154 | X86MachineState *x86ms = X86_MACHINE(ms); |
549e984e SL |
155 | int i; |
156 | unsigned int max_cpus = ms->smp.max_cpus; | |
157 | ||
158 | if (ms->possible_cpus) { | |
159 | /* | |
160 | * make sure that max_cpus hasn't changed since the first use, i.e. | |
161 | * -smp hasn't been parsed after it | |
162 | */ | |
163 | assert(ms->possible_cpus->len == max_cpus); | |
164 | return ms->possible_cpus; | |
165 | } | |
166 | ||
167 | ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + | |
168 | sizeof(CPUArchId) * max_cpus); | |
169 | ms->possible_cpus->len = max_cpus; | |
170 | for (i = 0; i < ms->possible_cpus->len; i++) { | |
171 | X86CPUTopoInfo topo; | |
172 | ||
173 | ms->possible_cpus->cpus[i].type = ms->cpu_type; | |
174 | ms->possible_cpus->cpus[i].vcpus_count = 1; | |
175 | ms->possible_cpus->cpus[i].arch_id = | |
703a548a | 176 | x86_cpu_apic_id_from_index(x86ms, i); |
549e984e | 177 | x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, |
f0bb276b | 178 | x86ms->smp_dies, ms->smp.cores, |
549e984e SL |
179 | ms->smp.threads, &topo); |
180 | ms->possible_cpus->cpus[i].props.has_socket_id = true; | |
181 | ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; | |
f0bb276b | 182 | if (x86ms->smp_dies > 1) { |
549e984e SL |
183 | ms->possible_cpus->cpus[i].props.has_die_id = true; |
184 | ms->possible_cpus->cpus[i].props.die_id = topo.die_id; | |
185 | } | |
186 | ms->possible_cpus->cpus[i].props.has_core_id = true; | |
187 | ms->possible_cpus->cpus[i].props.core_id = topo.core_id; | |
188 | ms->possible_cpus->cpus[i].props.has_thread_id = true; | |
189 | ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; | |
190 | } | |
191 | return ms->possible_cpus; | |
192 | } | |
193 | ||
f0bb276b PB |
194 | static void x86_nmi(NMIState *n, int cpu_index, Error **errp) |
195 | { | |
196 | /* cpu index isn't used */ | |
197 | CPUState *cs; | |
198 | ||
199 | CPU_FOREACH(cs) { | |
200 | X86CPU *cpu = X86_CPU(cs); | |
201 | ||
202 | if (!cpu->apic_state) { | |
203 | cpu_interrupt(cs, CPU_INTERRUPT_NMI); | |
204 | } else { | |
205 | apic_deliver_nmi(cpu->apic_state); | |
206 | } | |
207 | } | |
208 | } | |
209 | ||
549e984e SL |
210 | static long get_file_size(FILE *f) |
211 | { | |
212 | long where, size; | |
213 | ||
214 | /* XXX: on Unix systems, using fstat() probably makes more sense */ | |
215 | ||
216 | where = ftell(f); | |
217 | fseek(f, 0, SEEK_END); | |
218 | size = ftell(f); | |
219 | fseek(f, where, SEEK_SET); | |
220 | ||
221 | return size; | |
222 | } | |
223 | ||
224 | struct setup_data { | |
225 | uint64_t next; | |
226 | uint32_t type; | |
227 | uint32_t len; | |
228 | uint8_t data[0]; | |
229 | } __attribute__((packed)); | |
230 | ||
231 | ||
232 | /* | |
233 | * The entry point into the kernel for PVH boot is different from | |
234 | * the native entry point. The PVH entry is defined by the x86/HVM | |
235 | * direct boot ABI and is available in an ELFNOTE in the kernel binary. | |
236 | * | |
237 | * This function is passed to load_elf() when it is called from | |
238 | * load_elfboot() which then additionally checks for an ELF Note of | |
239 | * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to | |
240 | * parse the PVH entry address from the ELF Note. | |
241 | * | |
242 | * Due to trickery in elf_opts.h, load_elf() is actually available as | |
243 | * load_elf32() or load_elf64() and this routine needs to be able | |
244 | * to deal with being called as 32 or 64 bit. | |
245 | * | |
246 | * The address of the PVH entry point is saved to the 'pvh_start_addr' | |
247 | * global variable. (although the entry point is 32-bit, the kernel | |
248 | * binary can be either 32-bit or 64-bit). | |
249 | */ | |
250 | static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) | |
251 | { | |
252 | size_t *elf_note_data_addr; | |
253 | ||
254 | /* Check if ELF Note header passed in is valid */ | |
255 | if (arg1 == NULL) { | |
256 | return 0; | |
257 | } | |
258 | ||
259 | if (is64) { | |
260 | struct elf64_note *nhdr64 = (struct elf64_note *)arg1; | |
261 | uint64_t nhdr_size64 = sizeof(struct elf64_note); | |
262 | uint64_t phdr_align = *(uint64_t *)arg2; | |
263 | uint64_t nhdr_namesz = nhdr64->n_namesz; | |
264 | ||
265 | elf_note_data_addr = | |
266 | ((void *)nhdr64) + nhdr_size64 + | |
267 | QEMU_ALIGN_UP(nhdr_namesz, phdr_align); | |
268 | } else { | |
269 | struct elf32_note *nhdr32 = (struct elf32_note *)arg1; | |
270 | uint32_t nhdr_size32 = sizeof(struct elf32_note); | |
271 | uint32_t phdr_align = *(uint32_t *)arg2; | |
272 | uint32_t nhdr_namesz = nhdr32->n_namesz; | |
273 | ||
274 | elf_note_data_addr = | |
275 | ((void *)nhdr32) + nhdr_size32 + | |
276 | QEMU_ALIGN_UP(nhdr_namesz, phdr_align); | |
277 | } | |
278 | ||
279 | pvh_start_addr = *elf_note_data_addr; | |
280 | ||
281 | return pvh_start_addr; | |
282 | } | |
283 | ||
284 | static bool load_elfboot(const char *kernel_filename, | |
285 | int kernel_file_size, | |
286 | uint8_t *header, | |
287 | size_t pvh_xen_start_addr, | |
288 | FWCfgState *fw_cfg) | |
289 | { | |
290 | uint32_t flags = 0; | |
291 | uint32_t mh_load_addr = 0; | |
292 | uint32_t elf_kernel_size = 0; | |
293 | uint64_t elf_entry; | |
294 | uint64_t elf_low, elf_high; | |
295 | int kernel_size; | |
296 | ||
297 | if (ldl_p(header) != 0x464c457f) { | |
298 | return false; /* no elfboot */ | |
299 | } | |
300 | ||
301 | bool elf_is64 = header[EI_CLASS] == ELFCLASS64; | |
302 | flags = elf_is64 ? | |
303 | ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; | |
304 | ||
305 | if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ | |
306 | error_report("elfboot unsupported flags = %x", flags); | |
307 | exit(1); | |
308 | } | |
309 | ||
310 | uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; | |
311 | kernel_size = load_elf(kernel_filename, read_pvh_start_addr, | |
312 | NULL, &elf_note_type, &elf_entry, | |
313 | &elf_low, &elf_high, 0, I386_ELF_MACHINE, | |
314 | 0, 0); | |
315 | ||
316 | if (kernel_size < 0) { | |
317 | error_report("Error while loading elf kernel"); | |
318 | exit(1); | |
319 | } | |
320 | mh_load_addr = elf_low; | |
321 | elf_kernel_size = elf_high - elf_low; | |
322 | ||
323 | if (pvh_start_addr == 0) { | |
324 | error_report("Error loading uncompressed kernel without PVH ELF Note"); | |
325 | exit(1); | |
326 | } | |
327 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); | |
328 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); | |
329 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); | |
330 | ||
331 | return true; | |
332 | } | |
333 | ||
703a548a SL |
334 | void x86_load_linux(X86MachineState *x86ms, |
335 | FWCfgState *fw_cfg, | |
336 | int acpi_data_size, | |
337 | bool pvh_enabled, | |
338 | bool linuxboot_dma_enabled) | |
549e984e SL |
339 | { |
340 | uint16_t protocol; | |
341 | int setup_size, kernel_size, cmdline_size; | |
342 | int dtb_size, setup_data_offset; | |
343 | uint32_t initrd_max; | |
344 | uint8_t header[8192], *setup, *kernel; | |
345 | hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; | |
346 | FILE *f; | |
347 | char *vmode; | |
703a548a | 348 | MachineState *machine = MACHINE(x86ms); |
549e984e SL |
349 | struct setup_data *setup_data; |
350 | const char *kernel_filename = machine->kernel_filename; | |
351 | const char *initrd_filename = machine->initrd_filename; | |
352 | const char *dtb_filename = machine->dtb; | |
353 | const char *kernel_cmdline = machine->kernel_cmdline; | |
354 | ||
355 | /* Align to 16 bytes as a paranoia measure */ | |
356 | cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; | |
357 | ||
358 | /* load the kernel header */ | |
359 | f = fopen(kernel_filename, "rb"); | |
360 | if (!f) { | |
361 | fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", | |
362 | kernel_filename, strerror(errno)); | |
363 | exit(1); | |
364 | } | |
365 | ||
366 | kernel_size = get_file_size(f); | |
367 | if (!kernel_size || | |
368 | fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != | |
369 | MIN(ARRAY_SIZE(header), kernel_size)) { | |
370 | fprintf(stderr, "qemu: could not load kernel '%s': %s\n", | |
371 | kernel_filename, strerror(errno)); | |
372 | exit(1); | |
373 | } | |
374 | ||
375 | /* kernel protocol version */ | |
376 | if (ldl_p(header + 0x202) == 0x53726448) { | |
377 | protocol = lduw_p(header + 0x206); | |
378 | } else { | |
379 | /* | |
380 | * This could be a multiboot kernel. If it is, let's stop treating it | |
381 | * like a Linux kernel. | |
382 | * Note: some multiboot images could be in the ELF format (the same of | |
383 | * PVH), so we try multiboot first since we check the multiboot magic | |
384 | * header before to load it. | |
385 | */ | |
386 | if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, | |
387 | kernel_cmdline, kernel_size, header)) { | |
388 | return; | |
389 | } | |
390 | /* | |
391 | * Check if the file is an uncompressed kernel file (ELF) and load it, | |
392 | * saving the PVH entry point used by the x86/HVM direct boot ABI. | |
393 | * If load_elfboot() is successful, populate the fw_cfg info. | |
394 | */ | |
703a548a | 395 | if (pvh_enabled && |
549e984e SL |
396 | load_elfboot(kernel_filename, kernel_size, |
397 | header, pvh_start_addr, fw_cfg)) { | |
398 | fclose(f); | |
399 | ||
400 | fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, | |
401 | strlen(kernel_cmdline) + 1); | |
402 | fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); | |
403 | ||
404 | fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); | |
405 | fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, | |
406 | header, sizeof(header)); | |
407 | ||
408 | /* load initrd */ | |
409 | if (initrd_filename) { | |
410 | GMappedFile *mapped_file; | |
411 | gsize initrd_size; | |
412 | gchar *initrd_data; | |
413 | GError *gerr = NULL; | |
414 | ||
415 | mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); | |
416 | if (!mapped_file) { | |
417 | fprintf(stderr, "qemu: error reading initrd %s: %s\n", | |
418 | initrd_filename, gerr->message); | |
419 | exit(1); | |
420 | } | |
f0bb276b | 421 | x86ms->initrd_mapped_file = mapped_file; |
549e984e SL |
422 | |
423 | initrd_data = g_mapped_file_get_contents(mapped_file); | |
424 | initrd_size = g_mapped_file_get_length(mapped_file); | |
703a548a | 425 | initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; |
549e984e SL |
426 | if (initrd_size >= initrd_max) { |
427 | fprintf(stderr, "qemu: initrd is too large, cannot support." | |
428 | "(max: %"PRIu32", need %"PRId64")\n", | |
429 | initrd_max, (uint64_t)initrd_size); | |
430 | exit(1); | |
431 | } | |
432 | ||
433 | initrd_addr = (initrd_max - initrd_size) & ~4095; | |
434 | ||
435 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); | |
436 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); | |
437 | fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, | |
438 | initrd_size); | |
439 | } | |
440 | ||
441 | option_rom[nb_option_roms].bootindex = 0; | |
442 | option_rom[nb_option_roms].name = "pvh.bin"; | |
443 | nb_option_roms++; | |
444 | ||
445 | return; | |
446 | } | |
447 | protocol = 0; | |
448 | } | |
449 | ||
450 | if (protocol < 0x200 || !(header[0x211] & 0x01)) { | |
451 | /* Low kernel */ | |
452 | real_addr = 0x90000; | |
453 | cmdline_addr = 0x9a000 - cmdline_size; | |
454 | prot_addr = 0x10000; | |
455 | } else if (protocol < 0x202) { | |
456 | /* High but ancient kernel */ | |
457 | real_addr = 0x90000; | |
458 | cmdline_addr = 0x9a000 - cmdline_size; | |
459 | prot_addr = 0x100000; | |
460 | } else { | |
461 | /* High and recent kernel */ | |
462 | real_addr = 0x10000; | |
463 | cmdline_addr = 0x20000; | |
464 | prot_addr = 0x100000; | |
465 | } | |
466 | ||
467 | /* highest address for loading the initrd */ | |
468 | if (protocol >= 0x20c && | |
469 | lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { | |
470 | /* | |
471 | * Linux has supported initrd up to 4 GB for a very long time (2007, | |
472 | * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), | |
473 | * though it only sets initrd_max to 2 GB to "work around bootloader | |
474 | * bugs". Luckily, QEMU firmware(which does something like bootloader) | |
475 | * has supported this. | |
476 | * | |
477 | * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can | |
478 | * be loaded into any address. | |
479 | * | |
480 | * In addition, initrd_max is uint32_t simply because QEMU doesn't | |
481 | * support the 64-bit boot protocol (specifically the ext_ramdisk_image | |
482 | * field). | |
483 | * | |
484 | * Therefore here just limit initrd_max to UINT32_MAX simply as well. | |
485 | */ | |
486 | initrd_max = UINT32_MAX; | |
487 | } else if (protocol >= 0x203) { | |
488 | initrd_max = ldl_p(header + 0x22c); | |
489 | } else { | |
490 | initrd_max = 0x37ffffff; | |
491 | } | |
492 | ||
703a548a SL |
493 | if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { |
494 | initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; | |
549e984e SL |
495 | } |
496 | ||
497 | fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); | |
498 | fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); | |
499 | fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); | |
500 | ||
501 | if (protocol >= 0x202) { | |
502 | stl_p(header + 0x228, cmdline_addr); | |
503 | } else { | |
504 | stw_p(header + 0x20, 0xA33F); | |
505 | stw_p(header + 0x22, cmdline_addr - real_addr); | |
506 | } | |
507 | ||
508 | /* handle vga= parameter */ | |
509 | vmode = strstr(kernel_cmdline, "vga="); | |
510 | if (vmode) { | |
511 | unsigned int video_mode; | |
512 | int ret; | |
513 | /* skip "vga=" */ | |
514 | vmode += 4; | |
515 | if (!strncmp(vmode, "normal", 6)) { | |
516 | video_mode = 0xffff; | |
517 | } else if (!strncmp(vmode, "ext", 3)) { | |
518 | video_mode = 0xfffe; | |
519 | } else if (!strncmp(vmode, "ask", 3)) { | |
520 | video_mode = 0xfffd; | |
521 | } else { | |
522 | ret = qemu_strtoui(vmode, NULL, 0, &video_mode); | |
523 | if (ret != 0) { | |
524 | fprintf(stderr, "qemu: can't parse 'vga' parameter: %s\n", | |
525 | strerror(-ret)); | |
526 | exit(1); | |
527 | } | |
528 | } | |
529 | stw_p(header + 0x1fa, video_mode); | |
530 | } | |
531 | ||
532 | /* loader type */ | |
533 | /* | |
534 | * High nybble = B reserved for QEMU; low nybble is revision number. | |
535 | * If this code is substantially changed, you may want to consider | |
536 | * incrementing the revision. | |
537 | */ | |
538 | if (protocol >= 0x200) { | |
539 | header[0x210] = 0xB0; | |
540 | } | |
541 | /* heap */ | |
542 | if (protocol >= 0x201) { | |
543 | header[0x211] |= 0x80; /* CAN_USE_HEAP */ | |
544 | stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); | |
545 | } | |
546 | ||
547 | /* load initrd */ | |
548 | if (initrd_filename) { | |
549 | GMappedFile *mapped_file; | |
550 | gsize initrd_size; | |
551 | gchar *initrd_data; | |
552 | GError *gerr = NULL; | |
553 | ||
554 | if (protocol < 0x200) { | |
555 | fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); | |
556 | exit(1); | |
557 | } | |
558 | ||
559 | mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); | |
560 | if (!mapped_file) { | |
561 | fprintf(stderr, "qemu: error reading initrd %s: %s\n", | |
562 | initrd_filename, gerr->message); | |
563 | exit(1); | |
564 | } | |
f0bb276b | 565 | x86ms->initrd_mapped_file = mapped_file; |
549e984e SL |
566 | |
567 | initrd_data = g_mapped_file_get_contents(mapped_file); | |
568 | initrd_size = g_mapped_file_get_length(mapped_file); | |
569 | if (initrd_size >= initrd_max) { | |
570 | fprintf(stderr, "qemu: initrd is too large, cannot support." | |
571 | "(max: %"PRIu32", need %"PRId64")\n", | |
572 | initrd_max, (uint64_t)initrd_size); | |
573 | exit(1); | |
574 | } | |
575 | ||
576 | initrd_addr = (initrd_max - initrd_size) & ~4095; | |
577 | ||
578 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); | |
579 | fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); | |
580 | fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); | |
581 | ||
582 | stl_p(header + 0x218, initrd_addr); | |
583 | stl_p(header + 0x21c, initrd_size); | |
584 | } | |
585 | ||
586 | /* load kernel and setup */ | |
587 | setup_size = header[0x1f1]; | |
588 | if (setup_size == 0) { | |
589 | setup_size = 4; | |
590 | } | |
591 | setup_size = (setup_size + 1) * 512; | |
592 | if (setup_size > kernel_size) { | |
593 | fprintf(stderr, "qemu: invalid kernel header\n"); | |
594 | exit(1); | |
595 | } | |
596 | kernel_size -= setup_size; | |
597 | ||
598 | setup = g_malloc(setup_size); | |
599 | kernel = g_malloc(kernel_size); | |
600 | fseek(f, 0, SEEK_SET); | |
601 | if (fread(setup, 1, setup_size, f) != setup_size) { | |
602 | fprintf(stderr, "fread() failed\n"); | |
603 | exit(1); | |
604 | } | |
605 | if (fread(kernel, 1, kernel_size, f) != kernel_size) { | |
606 | fprintf(stderr, "fread() failed\n"); | |
607 | exit(1); | |
608 | } | |
609 | fclose(f); | |
610 | ||
611 | /* append dtb to kernel */ | |
612 | if (dtb_filename) { | |
613 | if (protocol < 0x209) { | |
614 | fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); | |
615 | exit(1); | |
616 | } | |
617 | ||
618 | dtb_size = get_image_size(dtb_filename); | |
619 | if (dtb_size <= 0) { | |
620 | fprintf(stderr, "qemu: error reading dtb %s: %s\n", | |
621 | dtb_filename, strerror(errno)); | |
622 | exit(1); | |
623 | } | |
624 | ||
625 | setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); | |
626 | kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; | |
627 | kernel = g_realloc(kernel, kernel_size); | |
628 | ||
629 | stq_p(header + 0x250, prot_addr + setup_data_offset); | |
630 | ||
631 | setup_data = (struct setup_data *)(kernel + setup_data_offset); | |
632 | setup_data->next = 0; | |
633 | setup_data->type = cpu_to_le32(SETUP_DTB); | |
634 | setup_data->len = cpu_to_le32(dtb_size); | |
635 | ||
636 | load_image_size(dtb_filename, setup_data->data, dtb_size); | |
637 | } | |
638 | ||
639 | memcpy(setup, header, MIN(sizeof(header), setup_size)); | |
640 | ||
641 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); | |
642 | fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); | |
643 | fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); | |
644 | ||
645 | fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); | |
646 | fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); | |
647 | fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); | |
648 | ||
649 | option_rom[nb_option_roms].bootindex = 0; | |
650 | option_rom[nb_option_roms].name = "linuxboot.bin"; | |
703a548a | 651 | if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { |
549e984e SL |
652 | option_rom[nb_option_roms].name = "linuxboot_dma.bin"; |
653 | } | |
654 | nb_option_roms++; | |
655 | } | |
656 | ||
657 | void x86_bios_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) | |
658 | { | |
659 | char *filename; | |
660 | MemoryRegion *bios, *isa_bios; | |
661 | int bios_size, isa_bios_size; | |
662 | int ret; | |
663 | ||
664 | /* BIOS load */ | |
665 | if (bios_name == NULL) { | |
666 | bios_name = BIOS_FILENAME; | |
667 | } | |
668 | filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); | |
669 | if (filename) { | |
670 | bios_size = get_image_size(filename); | |
671 | } else { | |
672 | bios_size = -1; | |
673 | } | |
674 | if (bios_size <= 0 || | |
675 | (bios_size % 65536) != 0) { | |
676 | goto bios_error; | |
677 | } | |
678 | bios = g_malloc(sizeof(*bios)); | |
679 | memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); | |
680 | if (!isapc_ram_fw) { | |
681 | memory_region_set_readonly(bios, true); | |
682 | } | |
683 | ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); | |
684 | if (ret != 0) { | |
685 | bios_error: | |
686 | fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); | |
687 | exit(1); | |
688 | } | |
689 | g_free(filename); | |
690 | ||
691 | /* map the last 128KB of the BIOS in ISA space */ | |
692 | isa_bios_size = MIN(bios_size, 128 * KiB); | |
693 | isa_bios = g_malloc(sizeof(*isa_bios)); | |
694 | memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, | |
695 | bios_size - isa_bios_size, isa_bios_size); | |
696 | memory_region_add_subregion_overlap(rom_memory, | |
697 | 0x100000 - isa_bios_size, | |
698 | isa_bios, | |
699 | 1); | |
700 | if (!isapc_ram_fw) { | |
701 | memory_region_set_readonly(isa_bios, true); | |
702 | } | |
703 | ||
704 | /* map all the bios at the top of memory */ | |
705 | memory_region_add_subregion(rom_memory, | |
706 | (uint32_t)(-bios_size), | |
707 | bios); | |
708 | } | |
f0bb276b PB |
709 | |
710 | static void x86_machine_get_max_ram_below_4g(Object *obj, Visitor *v, | |
711 | const char *name, void *opaque, | |
712 | Error **errp) | |
713 | { | |
714 | X86MachineState *x86ms = X86_MACHINE(obj); | |
715 | uint64_t value = x86ms->max_ram_below_4g; | |
716 | ||
717 | visit_type_size(v, name, &value, errp); | |
718 | } | |
719 | ||
720 | static void x86_machine_set_max_ram_below_4g(Object *obj, Visitor *v, | |
721 | const char *name, void *opaque, | |
722 | Error **errp) | |
723 | { | |
724 | X86MachineState *x86ms = X86_MACHINE(obj); | |
725 | Error *error = NULL; | |
726 | uint64_t value; | |
727 | ||
728 | visit_type_size(v, name, &value, &error); | |
729 | if (error) { | |
730 | error_propagate(errp, error); | |
731 | return; | |
732 | } | |
733 | if (value > 4 * GiB) { | |
734 | error_setg(&error, | |
735 | "Machine option 'max-ram-below-4g=%"PRIu64 | |
736 | "' expects size less than or equal to 4G", value); | |
737 | error_propagate(errp, error); | |
738 | return; | |
739 | } | |
740 | ||
741 | if (value < 1 * MiB) { | |
742 | warn_report("Only %" PRIu64 " bytes of RAM below the 4GiB boundary," | |
743 | "BIOS may not work with less than 1MiB", value); | |
744 | } | |
745 | ||
746 | x86ms->max_ram_below_4g = value; | |
747 | } | |
748 | ||
ed9e923c PB |
749 | bool x86_machine_is_smm_enabled(X86MachineState *x86ms) |
750 | { | |
751 | bool smm_available = false; | |
752 | ||
753 | if (x86ms->smm == ON_OFF_AUTO_OFF) { | |
754 | return false; | |
755 | } | |
756 | ||
757 | if (tcg_enabled() || qtest_enabled()) { | |
758 | smm_available = true; | |
759 | } else if (kvm_enabled()) { | |
760 | smm_available = kvm_has_smm(); | |
761 | } | |
762 | ||
763 | if (smm_available) { | |
764 | return true; | |
765 | } | |
766 | ||
767 | if (x86ms->smm == ON_OFF_AUTO_ON) { | |
768 | error_report("System Management Mode not supported by this hypervisor."); | |
769 | exit(1); | |
770 | } | |
771 | return false; | |
772 | } | |
773 | ||
774 | static void x86_machine_get_smm(Object *obj, Visitor *v, const char *name, | |
775 | void *opaque, Error **errp) | |
776 | { | |
777 | X86MachineState *x86ms = X86_MACHINE(obj); | |
778 | OnOffAuto smm = x86ms->smm; | |
779 | ||
780 | visit_type_OnOffAuto(v, name, &smm, errp); | |
781 | } | |
782 | ||
783 | static void x86_machine_set_smm(Object *obj, Visitor *v, const char *name, | |
784 | void *opaque, Error **errp) | |
785 | { | |
786 | X86MachineState *x86ms = X86_MACHINE(obj); | |
787 | ||
788 | visit_type_OnOffAuto(v, name, &x86ms->smm, errp); | |
789 | } | |
790 | ||
f0bb276b PB |
791 | static void x86_machine_initfn(Object *obj) |
792 | { | |
793 | X86MachineState *x86ms = X86_MACHINE(obj); | |
794 | ||
ed9e923c | 795 | x86ms->smm = ON_OFF_AUTO_AUTO; |
f0bb276b PB |
796 | x86ms->max_ram_below_4g = 0; /* use default */ |
797 | x86ms->smp_dies = 1; | |
798 | } | |
799 | ||
800 | static void x86_machine_class_init(ObjectClass *oc, void *data) | |
801 | { | |
802 | MachineClass *mc = MACHINE_CLASS(oc); | |
803 | X86MachineClass *x86mc = X86_MACHINE_CLASS(oc); | |
804 | NMIClass *nc = NMI_CLASS(oc); | |
805 | ||
806 | mc->cpu_index_to_instance_props = x86_cpu_index_to_props; | |
807 | mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; | |
808 | mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; | |
809 | x86mc->compat_apic_id_mode = false; | |
2f34ebf2 | 810 | x86mc->save_tsc_khz = true; |
f0bb276b PB |
811 | nc->nmi_monitor_handler = x86_nmi; |
812 | ||
813 | object_class_property_add(oc, X86_MACHINE_MAX_RAM_BELOW_4G, "size", | |
814 | x86_machine_get_max_ram_below_4g, x86_machine_set_max_ram_below_4g, | |
815 | NULL, NULL, &error_abort); | |
f0bb276b PB |
816 | object_class_property_set_description(oc, X86_MACHINE_MAX_RAM_BELOW_4G, |
817 | "Maximum ram below the 4G boundary (32bit boundary)", &error_abort); | |
ed9e923c PB |
818 | |
819 | object_class_property_add(oc, X86_MACHINE_SMM, "OnOffAuto", | |
820 | x86_machine_get_smm, x86_machine_set_smm, | |
821 | NULL, NULL, &error_abort); | |
822 | object_class_property_set_description(oc, X86_MACHINE_SMM, | |
823 | "Enable SMM", &error_abort); | |
f0bb276b PB |
824 | } |
825 | ||
826 | static const TypeInfo x86_machine_info = { | |
827 | .name = TYPE_X86_MACHINE, | |
828 | .parent = TYPE_MACHINE, | |
829 | .abstract = true, | |
830 | .instance_size = sizeof(X86MachineState), | |
831 | .instance_init = x86_machine_initfn, | |
832 | .class_size = sizeof(X86MachineClass), | |
833 | .class_init = x86_machine_class_init, | |
834 | .interfaces = (InterfaceInfo[]) { | |
835 | { TYPE_NMI }, | |
836 | { } | |
837 | }, | |
838 | }; | |
839 | ||
840 | static void x86_machine_register_types(void) | |
841 | { | |
842 | type_register_static(&x86_machine_info); | |
843 | } | |
844 | ||
845 | type_init(x86_machine_register_types) |