]>
Commit | Line | Data |
---|---|---|
40b0b3f8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
692f66f2 HB |
2 | /* |
3 | * crash.c - kernel crash support code. | |
4 | * Copyright (C) 2002-2004 Eric Biederman <[email protected]> | |
692f66f2 HB |
5 | */ |
6 | ||
44e8a5e9 | 7 | #include <linux/buildid.h> |
692f66f2 | 8 | #include <linux/crash_core.h> |
71d2bcec | 9 | #include <linux/init.h> |
692f66f2 HB |
10 | #include <linux/utsname.h> |
11 | #include <linux/vmalloc.h> | |
46d36b1b | 12 | #include <linux/sizes.h> |
6f991cc3 | 13 | #include <linux/kexec.h> |
24726275 ED |
14 | #include <linux/memory.h> |
15 | #include <linux/cpuhotplug.h> | |
692f66f2 HB |
16 | |
17 | #include <asm/page.h> | |
18 | #include <asm/sections.h> | |
19 | ||
a24d22b2 | 20 | #include <crypto/sha1.h> |
0935288c | 21 | |
5fd8fea9 | 22 | #include "kallsyms_internal.h" |
24726275 | 23 | #include "kexec_internal.h" |
5fd8fea9 | 24 | |
6f991cc3 ED |
25 | /* Per cpu memory for storing cpu states in case of system crash. */ |
26 | note_buf_t __percpu *crash_notes; | |
5fd8fea9 | 27 | |
692f66f2 | 28 | /* vmcoreinfo stuff */ |
23c85094 OS |
29 | unsigned char *vmcoreinfo_data; |
30 | size_t vmcoreinfo_size; | |
203e9e41 | 31 | u32 *vmcoreinfo_note; |
692f66f2 | 32 | |
1229384f XP |
33 | /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ |
34 | static unsigned char *vmcoreinfo_data_safecopy; | |
35 | ||
692f66f2 HB |
36 | /* |
37 | * parsing the "crashkernel" commandline | |
38 | * | |
39 | * this code is intended to be called from architecture specific code | |
40 | */ | |
41 | ||
42 | ||
43 | /* | |
44 | * This function parses command lines in the format | |
45 | * | |
46 | * crashkernel=ramsize-range:size[,...][@offset] | |
47 | * | |
48 | * The function returns 0 on success and -EINVAL on failure. | |
49 | */ | |
50 | static int __init parse_crashkernel_mem(char *cmdline, | |
51 | unsigned long long system_ram, | |
52 | unsigned long long *crash_size, | |
53 | unsigned long long *crash_base) | |
54 | { | |
55 | char *cur = cmdline, *tmp; | |
46d36b1b TL |
56 | unsigned long long total_mem = system_ram; |
57 | ||
58 | /* | |
59 | * Firmware sometimes reserves some memory regions for its own use, | |
60 | * so the system memory size is less than the actual physical memory | |
61 | * size. Work around this by rounding up the total size to 128M, | |
62 | * which is enough for most test cases. | |
63 | */ | |
64 | total_mem = roundup(total_mem, SZ_128M); | |
692f66f2 HB |
65 | |
66 | /* for each entry of the comma-separated list */ | |
67 | do { | |
68 | unsigned long long start, end = ULLONG_MAX, size; | |
69 | ||
70 | /* get the start of the range */ | |
71 | start = memparse(cur, &tmp); | |
72 | if (cur == tmp) { | |
73 | pr_warn("crashkernel: Memory value expected\n"); | |
74 | return -EINVAL; | |
75 | } | |
76 | cur = tmp; | |
77 | if (*cur != '-') { | |
78 | pr_warn("crashkernel: '-' expected\n"); | |
79 | return -EINVAL; | |
80 | } | |
81 | cur++; | |
82 | ||
83 | /* if no ':' is here, than we read the end */ | |
84 | if (*cur != ':') { | |
85 | end = memparse(cur, &tmp); | |
86 | if (cur == tmp) { | |
87 | pr_warn("crashkernel: Memory value expected\n"); | |
88 | return -EINVAL; | |
89 | } | |
90 | cur = tmp; | |
91 | if (end <= start) { | |
92 | pr_warn("crashkernel: end <= start\n"); | |
93 | return -EINVAL; | |
94 | } | |
95 | } | |
96 | ||
97 | if (*cur != ':') { | |
98 | pr_warn("crashkernel: ':' expected\n"); | |
99 | return -EINVAL; | |
100 | } | |
101 | cur++; | |
102 | ||
103 | size = memparse(cur, &tmp); | |
104 | if (cur == tmp) { | |
105 | pr_warn("Memory value expected\n"); | |
106 | return -EINVAL; | |
107 | } | |
108 | cur = tmp; | |
46d36b1b | 109 | if (size >= total_mem) { |
692f66f2 HB |
110 | pr_warn("crashkernel: invalid size\n"); |
111 | return -EINVAL; | |
112 | } | |
113 | ||
114 | /* match ? */ | |
46d36b1b | 115 | if (total_mem >= start && total_mem < end) { |
692f66f2 HB |
116 | *crash_size = size; |
117 | break; | |
118 | } | |
119 | } while (*cur++ == ','); | |
120 | ||
121 | if (*crash_size > 0) { | |
122 | while (*cur && *cur != ' ' && *cur != '@') | |
123 | cur++; | |
124 | if (*cur == '@') { | |
125 | cur++; | |
126 | *crash_base = memparse(cur, &tmp); | |
127 | if (cur == tmp) { | |
128 | pr_warn("Memory value expected after '@'\n"); | |
129 | return -EINVAL; | |
130 | } | |
131 | } | |
de40ccef DY |
132 | } else |
133 | pr_info("crashkernel size resulted in zero bytes\n"); | |
692f66f2 HB |
134 | |
135 | return 0; | |
136 | } | |
137 | ||
138 | /* | |
139 | * That function parses "simple" (old) crashkernel command lines like | |
140 | * | |
141 | * crashkernel=size[@offset] | |
142 | * | |
143 | * It returns 0 on success and -EINVAL on failure. | |
144 | */ | |
145 | static int __init parse_crashkernel_simple(char *cmdline, | |
146 | unsigned long long *crash_size, | |
147 | unsigned long long *crash_base) | |
148 | { | |
149 | char *cur = cmdline; | |
150 | ||
151 | *crash_size = memparse(cmdline, &cur); | |
152 | if (cmdline == cur) { | |
153 | pr_warn("crashkernel: memory value expected\n"); | |
154 | return -EINVAL; | |
155 | } | |
156 | ||
157 | if (*cur == '@') | |
158 | *crash_base = memparse(cur+1, &cur); | |
159 | else if (*cur != ' ' && *cur != '\0') { | |
160 | pr_warn("crashkernel: unrecognized char: %c\n", *cur); | |
161 | return -EINVAL; | |
162 | } | |
163 | ||
164 | return 0; | |
165 | } | |
166 | ||
167 | #define SUFFIX_HIGH 0 | |
168 | #define SUFFIX_LOW 1 | |
169 | #define SUFFIX_NULL 2 | |
170 | static __initdata char *suffix_tbl[] = { | |
171 | [SUFFIX_HIGH] = ",high", | |
172 | [SUFFIX_LOW] = ",low", | |
173 | [SUFFIX_NULL] = NULL, | |
174 | }; | |
175 | ||
176 | /* | |
177 | * That function parses "suffix" crashkernel command lines like | |
178 | * | |
179 | * crashkernel=size,[high|low] | |
180 | * | |
181 | * It returns 0 on success and -EINVAL on failure. | |
182 | */ | |
183 | static int __init parse_crashkernel_suffix(char *cmdline, | |
184 | unsigned long long *crash_size, | |
185 | const char *suffix) | |
186 | { | |
187 | char *cur = cmdline; | |
188 | ||
189 | *crash_size = memparse(cmdline, &cur); | |
190 | if (cmdline == cur) { | |
191 | pr_warn("crashkernel: memory value expected\n"); | |
192 | return -EINVAL; | |
193 | } | |
194 | ||
195 | /* check with suffix */ | |
196 | if (strncmp(cur, suffix, strlen(suffix))) { | |
197 | pr_warn("crashkernel: unrecognized char: %c\n", *cur); | |
198 | return -EINVAL; | |
199 | } | |
200 | cur += strlen(suffix); | |
201 | if (*cur != ' ' && *cur != '\0') { | |
202 | pr_warn("crashkernel: unrecognized char: %c\n", *cur); | |
203 | return -EINVAL; | |
204 | } | |
205 | ||
206 | return 0; | |
207 | } | |
208 | ||
209 | static __init char *get_last_crashkernel(char *cmdline, | |
210 | const char *name, | |
211 | const char *suffix) | |
212 | { | |
213 | char *p = cmdline, *ck_cmdline = NULL; | |
214 | ||
215 | /* find crashkernel and use the last one if there are more */ | |
216 | p = strstr(p, name); | |
217 | while (p) { | |
218 | char *end_p = strchr(p, ' '); | |
219 | char *q; | |
220 | ||
221 | if (!end_p) | |
222 | end_p = p + strlen(p); | |
223 | ||
224 | if (!suffix) { | |
225 | int i; | |
226 | ||
227 | /* skip the one with any known suffix */ | |
228 | for (i = 0; suffix_tbl[i]; i++) { | |
229 | q = end_p - strlen(suffix_tbl[i]); | |
230 | if (!strncmp(q, suffix_tbl[i], | |
231 | strlen(suffix_tbl[i]))) | |
232 | goto next; | |
233 | } | |
234 | ck_cmdline = p; | |
235 | } else { | |
236 | q = end_p - strlen(suffix); | |
237 | if (!strncmp(q, suffix, strlen(suffix))) | |
238 | ck_cmdline = p; | |
239 | } | |
240 | next: | |
241 | p = strstr(p+1, name); | |
242 | } | |
243 | ||
692f66f2 HB |
244 | return ck_cmdline; |
245 | } | |
246 | ||
247 | static int __init __parse_crashkernel(char *cmdline, | |
248 | unsigned long long system_ram, | |
249 | unsigned long long *crash_size, | |
250 | unsigned long long *crash_base, | |
251 | const char *name, | |
252 | const char *suffix) | |
253 | { | |
254 | char *first_colon, *first_space; | |
255 | char *ck_cmdline; | |
256 | ||
257 | BUG_ON(!crash_size || !crash_base); | |
258 | *crash_size = 0; | |
259 | *crash_base = 0; | |
260 | ||
261 | ck_cmdline = get_last_crashkernel(cmdline, name, suffix); | |
692f66f2 | 262 | if (!ck_cmdline) |
2e5920bb | 263 | return -ENOENT; |
692f66f2 HB |
264 | |
265 | ck_cmdline += strlen(name); | |
266 | ||
267 | if (suffix) | |
268 | return parse_crashkernel_suffix(ck_cmdline, crash_size, | |
269 | suffix); | |
270 | /* | |
271 | * if the commandline contains a ':', then that's the extended | |
272 | * syntax -- if not, it must be the classic syntax | |
273 | */ | |
274 | first_colon = strchr(ck_cmdline, ':'); | |
275 | first_space = strchr(ck_cmdline, ' '); | |
276 | if (first_colon && (!first_space || first_colon < first_space)) | |
277 | return parse_crashkernel_mem(ck_cmdline, system_ram, | |
278 | crash_size, crash_base); | |
279 | ||
280 | return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); | |
281 | } | |
282 | ||
283 | /* | |
284 | * That function is the entry point for command line parsing and should be | |
285 | * called from the arch-specific code. | |
286 | */ | |
287 | int __init parse_crashkernel(char *cmdline, | |
288 | unsigned long long system_ram, | |
289 | unsigned long long *crash_size, | |
290 | unsigned long long *crash_base) | |
291 | { | |
292 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | |
293 | "crashkernel=", NULL); | |
294 | } | |
295 | ||
296 | int __init parse_crashkernel_high(char *cmdline, | |
297 | unsigned long long system_ram, | |
298 | unsigned long long *crash_size, | |
299 | unsigned long long *crash_base) | |
300 | { | |
301 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | |
302 | "crashkernel=", suffix_tbl[SUFFIX_HIGH]); | |
303 | } | |
304 | ||
305 | int __init parse_crashkernel_low(char *cmdline, | |
306 | unsigned long long system_ram, | |
307 | unsigned long long *crash_size, | |
308 | unsigned long long *crash_base) | |
309 | { | |
310 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | |
311 | "crashkernel=", suffix_tbl[SUFFIX_LOW]); | |
312 | } | |
313 | ||
71d2bcec PR |
314 | /* |
315 | * Add a dummy early_param handler to mark crashkernel= as a known command line | |
316 | * parameter and suppress incorrect warnings in init/main.c. | |
317 | */ | |
318 | static int __init parse_crashkernel_dummy(char *arg) | |
319 | { | |
320 | return 0; | |
321 | } | |
322 | early_param("crashkernel", parse_crashkernel_dummy); | |
323 | ||
6f991cc3 ED |
324 | int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, |
325 | void **addr, unsigned long *sz) | |
326 | { | |
327 | Elf64_Ehdr *ehdr; | |
328 | Elf64_Phdr *phdr; | |
329 | unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; | |
330 | unsigned char *buf; | |
331 | unsigned int cpu, i; | |
332 | unsigned long long notes_addr; | |
333 | unsigned long mstart, mend; | |
334 | ||
335 | /* extra phdr for vmcoreinfo ELF note */ | |
336 | nr_phdr = nr_cpus + 1; | |
337 | nr_phdr += mem->nr_ranges; | |
338 | ||
339 | /* | |
340 | * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping | |
341 | * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). | |
342 | * I think this is required by tools like gdb. So same physical | |
343 | * memory will be mapped in two ELF headers. One will contain kernel | |
344 | * text virtual addresses and other will have __va(physical) addresses. | |
345 | */ | |
346 | ||
347 | nr_phdr++; | |
348 | elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); | |
349 | elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); | |
350 | ||
351 | buf = vzalloc(elf_sz); | |
352 | if (!buf) | |
353 | return -ENOMEM; | |
354 | ||
355 | ehdr = (Elf64_Ehdr *)buf; | |
356 | phdr = (Elf64_Phdr *)(ehdr + 1); | |
357 | memcpy(ehdr->e_ident, ELFMAG, SELFMAG); | |
358 | ehdr->e_ident[EI_CLASS] = ELFCLASS64; | |
359 | ehdr->e_ident[EI_DATA] = ELFDATA2LSB; | |
360 | ehdr->e_ident[EI_VERSION] = EV_CURRENT; | |
361 | ehdr->e_ident[EI_OSABI] = ELF_OSABI; | |
362 | memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); | |
363 | ehdr->e_type = ET_CORE; | |
364 | ehdr->e_machine = ELF_ARCH; | |
365 | ehdr->e_version = EV_CURRENT; | |
366 | ehdr->e_phoff = sizeof(Elf64_Ehdr); | |
367 | ehdr->e_ehsize = sizeof(Elf64_Ehdr); | |
368 | ehdr->e_phentsize = sizeof(Elf64_Phdr); | |
369 | ||
a396d0f8 ED |
370 | /* Prepare one phdr of type PT_NOTE for each possible CPU */ |
371 | for_each_possible_cpu(cpu) { | |
6f991cc3 ED |
372 | phdr->p_type = PT_NOTE; |
373 | notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); | |
374 | phdr->p_offset = phdr->p_paddr = notes_addr; | |
375 | phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); | |
376 | (ehdr->e_phnum)++; | |
377 | phdr++; | |
378 | } | |
379 | ||
380 | /* Prepare one PT_NOTE header for vmcoreinfo */ | |
381 | phdr->p_type = PT_NOTE; | |
382 | phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); | |
383 | phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; | |
384 | (ehdr->e_phnum)++; | |
385 | phdr++; | |
386 | ||
387 | /* Prepare PT_LOAD type program header for kernel text region */ | |
388 | if (need_kernel_map) { | |
389 | phdr->p_type = PT_LOAD; | |
390 | phdr->p_flags = PF_R|PF_W|PF_X; | |
391 | phdr->p_vaddr = (unsigned long) _text; | |
392 | phdr->p_filesz = phdr->p_memsz = _end - _text; | |
393 | phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); | |
394 | ehdr->e_phnum++; | |
395 | phdr++; | |
396 | } | |
397 | ||
398 | /* Go through all the ranges in mem->ranges[] and prepare phdr */ | |
399 | for (i = 0; i < mem->nr_ranges; i++) { | |
400 | mstart = mem->ranges[i].start; | |
401 | mend = mem->ranges[i].end; | |
402 | ||
403 | phdr->p_type = PT_LOAD; | |
404 | phdr->p_flags = PF_R|PF_W|PF_X; | |
405 | phdr->p_offset = mstart; | |
406 | ||
407 | phdr->p_paddr = mstart; | |
408 | phdr->p_vaddr = (unsigned long) __va(mstart); | |
409 | phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; | |
410 | phdr->p_align = 0; | |
411 | ehdr->e_phnum++; | |
412 | pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", | |
413 | phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, | |
414 | ehdr->e_phnum, phdr->p_offset); | |
415 | phdr++; | |
416 | } | |
417 | ||
418 | *addr = buf; | |
419 | *sz = elf_sz; | |
420 | return 0; | |
421 | } | |
422 | ||
423 | int crash_exclude_mem_range(struct crash_mem *mem, | |
424 | unsigned long long mstart, unsigned long long mend) | |
425 | { | |
426 | int i, j; | |
427 | unsigned long long start, end, p_start, p_end; | |
428 | struct range temp_range = {0, 0}; | |
429 | ||
430 | for (i = 0; i < mem->nr_ranges; i++) { | |
431 | start = mem->ranges[i].start; | |
432 | end = mem->ranges[i].end; | |
433 | p_start = mstart; | |
434 | p_end = mend; | |
435 | ||
436 | if (mstart > end || mend < start) | |
437 | continue; | |
438 | ||
439 | /* Truncate any area outside of range */ | |
440 | if (mstart < start) | |
441 | p_start = start; | |
442 | if (mend > end) | |
443 | p_end = end; | |
444 | ||
445 | /* Found completely overlapping range */ | |
446 | if (p_start == start && p_end == end) { | |
447 | mem->ranges[i].start = 0; | |
448 | mem->ranges[i].end = 0; | |
449 | if (i < mem->nr_ranges - 1) { | |
450 | /* Shift rest of the ranges to left */ | |
451 | for (j = i; j < mem->nr_ranges - 1; j++) { | |
452 | mem->ranges[j].start = | |
453 | mem->ranges[j+1].start; | |
454 | mem->ranges[j].end = | |
455 | mem->ranges[j+1].end; | |
456 | } | |
457 | ||
458 | /* | |
459 | * Continue to check if there are another overlapping ranges | |
460 | * from the current position because of shifting the above | |
461 | * mem ranges. | |
462 | */ | |
463 | i--; | |
464 | mem->nr_ranges--; | |
465 | continue; | |
466 | } | |
467 | mem->nr_ranges--; | |
468 | return 0; | |
469 | } | |
470 | ||
471 | if (p_start > start && p_end < end) { | |
472 | /* Split original range */ | |
473 | mem->ranges[i].end = p_start - 1; | |
474 | temp_range.start = p_end + 1; | |
475 | temp_range.end = end; | |
476 | } else if (p_start != start) | |
477 | mem->ranges[i].end = p_start - 1; | |
478 | else | |
479 | mem->ranges[i].start = p_end + 1; | |
480 | break; | |
481 | } | |
482 | ||
483 | /* If a split happened, add the split to array */ | |
484 | if (!temp_range.end) | |
485 | return 0; | |
486 | ||
487 | /* Split happened */ | |
488 | if (i == mem->max_nr_ranges - 1) | |
489 | return -ENOMEM; | |
490 | ||
491 | /* Location where new range should go */ | |
492 | j = i + 1; | |
493 | if (j < mem->nr_ranges) { | |
494 | /* Move over all ranges one slot towards the end */ | |
495 | for (i = mem->nr_ranges - 1; i >= j; i--) | |
496 | mem->ranges[i + 1] = mem->ranges[i]; | |
497 | } | |
498 | ||
499 | mem->ranges[j].start = temp_range.start; | |
500 | mem->ranges[j].end = temp_range.end; | |
501 | mem->nr_ranges++; | |
502 | return 0; | |
503 | } | |
504 | ||
51dbd925 HB |
505 | Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, |
506 | void *data, size_t data_len) | |
692f66f2 | 507 | { |
51dbd925 HB |
508 | struct elf_note *note = (struct elf_note *)buf; |
509 | ||
510 | note->n_namesz = strlen(name) + 1; | |
511 | note->n_descsz = data_len; | |
512 | note->n_type = type; | |
513 | buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); | |
514 | memcpy(buf, name, note->n_namesz); | |
515 | buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); | |
516 | memcpy(buf, data, data_len); | |
517 | buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); | |
692f66f2 HB |
518 | |
519 | return buf; | |
520 | } | |
521 | ||
51dbd925 | 522 | void final_note(Elf_Word *buf) |
692f66f2 | 523 | { |
51dbd925 | 524 | memset(buf, 0, sizeof(struct elf_note)); |
692f66f2 HB |
525 | } |
526 | ||
527 | static void update_vmcoreinfo_note(void) | |
528 | { | |
529 | u32 *buf = vmcoreinfo_note; | |
530 | ||
531 | if (!vmcoreinfo_size) | |
532 | return; | |
533 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, | |
534 | vmcoreinfo_size); | |
535 | final_note(buf); | |
536 | } | |
537 | ||
1229384f XP |
538 | void crash_update_vmcoreinfo_safecopy(void *ptr) |
539 | { | |
540 | if (ptr) | |
541 | memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); | |
542 | ||
543 | vmcoreinfo_data_safecopy = ptr; | |
544 | } | |
545 | ||
692f66f2 HB |
546 | void crash_save_vmcoreinfo(void) |
547 | { | |
203e9e41 XP |
548 | if (!vmcoreinfo_note) |
549 | return; | |
550 | ||
1229384f XP |
551 | /* Use the safe copy to generate vmcoreinfo note if have */ |
552 | if (vmcoreinfo_data_safecopy) | |
553 | vmcoreinfo_data = vmcoreinfo_data_safecopy; | |
554 | ||
91bc9aaf | 555 | vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); |
692f66f2 HB |
556 | update_vmcoreinfo_note(); |
557 | } | |
558 | ||
559 | void vmcoreinfo_append_str(const char *fmt, ...) | |
560 | { | |
561 | va_list args; | |
562 | char buf[0x50]; | |
563 | size_t r; | |
564 | ||
565 | va_start(args, fmt); | |
566 | r = vscnprintf(buf, sizeof(buf), fmt, args); | |
567 | va_end(args); | |
568 | ||
5203f499 | 569 | r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); |
692f66f2 HB |
570 | |
571 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); | |
572 | ||
573 | vmcoreinfo_size += r; | |
08fc35f3 SB |
574 | |
575 | WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, | |
576 | "vmcoreinfo data exceeds allocated size, truncating"); | |
692f66f2 HB |
577 | } |
578 | ||
579 | /* | |
580 | * provide an empty default implementation here -- architecture | |
581 | * code may override this | |
582 | */ | |
583 | void __weak arch_crash_save_vmcoreinfo(void) | |
584 | {} | |
585 | ||
586 | phys_addr_t __weak paddr_vmcoreinfo_note(void) | |
587 | { | |
203e9e41 | 588 | return __pa(vmcoreinfo_note); |
692f66f2 | 589 | } |
43d4cb47 | 590 | EXPORT_SYMBOL(paddr_vmcoreinfo_note); |
692f66f2 HB |
591 | |
592 | static int __init crash_save_vmcoreinfo_init(void) | |
593 | { | |
203e9e41 XP |
594 | vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); |
595 | if (!vmcoreinfo_data) { | |
596 | pr_warn("Memory allocation for vmcoreinfo_data failed\n"); | |
597 | return -ENOMEM; | |
598 | } | |
599 | ||
600 | vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, | |
601 | GFP_KERNEL | __GFP_ZERO); | |
602 | if (!vmcoreinfo_note) { | |
603 | free_page((unsigned long)vmcoreinfo_data); | |
604 | vmcoreinfo_data = NULL; | |
605 | pr_warn("Memory allocation for vmcoreinfo_note failed\n"); | |
606 | return -ENOMEM; | |
607 | } | |
608 | ||
692f66f2 | 609 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); |
44e8a5e9 | 610 | VMCOREINFO_BUILD_ID(); |
692f66f2 HB |
611 | VMCOREINFO_PAGESIZE(PAGE_SIZE); |
612 | ||
613 | VMCOREINFO_SYMBOL(init_uts_ns); | |
ca4a9241 | 614 | VMCOREINFO_OFFSET(uts_namespace, name); |
692f66f2 HB |
615 | VMCOREINFO_SYMBOL(node_online_map); |
616 | #ifdef CONFIG_MMU | |
eff4345e | 617 | VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); |
692f66f2 HB |
618 | #endif |
619 | VMCOREINFO_SYMBOL(_stext); | |
620 | VMCOREINFO_SYMBOL(vmap_area_list); | |
621 | ||
a9ee6cf5 | 622 | #ifndef CONFIG_NUMA |
692f66f2 HB |
623 | VMCOREINFO_SYMBOL(mem_map); |
624 | VMCOREINFO_SYMBOL(contig_page_data); | |
625 | #endif | |
626 | #ifdef CONFIG_SPARSEMEM | |
a0b12803 | 627 | VMCOREINFO_SYMBOL_ARRAY(mem_section); |
692f66f2 HB |
628 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); |
629 | VMCOREINFO_STRUCT_SIZE(mem_section); | |
630 | VMCOREINFO_OFFSET(mem_section, section_mem_map); | |
4f5aecdf | 631 | VMCOREINFO_NUMBER(SECTION_SIZE_BITS); |
1d50e5d0 | 632 | VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); |
692f66f2 HB |
633 | #endif |
634 | VMCOREINFO_STRUCT_SIZE(page); | |
635 | VMCOREINFO_STRUCT_SIZE(pglist_data); | |
636 | VMCOREINFO_STRUCT_SIZE(zone); | |
637 | VMCOREINFO_STRUCT_SIZE(free_area); | |
638 | VMCOREINFO_STRUCT_SIZE(list_head); | |
639 | VMCOREINFO_SIZE(nodemask_t); | |
640 | VMCOREINFO_OFFSET(page, flags); | |
641 | VMCOREINFO_OFFSET(page, _refcount); | |
642 | VMCOREINFO_OFFSET(page, mapping); | |
643 | VMCOREINFO_OFFSET(page, lru); | |
644 | VMCOREINFO_OFFSET(page, _mapcount); | |
645 | VMCOREINFO_OFFSET(page, private); | |
692f66f2 HB |
646 | VMCOREINFO_OFFSET(page, compound_head); |
647 | VMCOREINFO_OFFSET(pglist_data, node_zones); | |
648 | VMCOREINFO_OFFSET(pglist_data, nr_zones); | |
43b02ba9 | 649 | #ifdef CONFIG_FLATMEM |
692f66f2 HB |
650 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); |
651 | #endif | |
652 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); | |
653 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); | |
654 | VMCOREINFO_OFFSET(pglist_data, node_id); | |
655 | VMCOREINFO_OFFSET(zone, free_area); | |
656 | VMCOREINFO_OFFSET(zone, vm_stat); | |
657 | VMCOREINFO_OFFSET(zone, spanned_pages); | |
658 | VMCOREINFO_OFFSET(free_area, free_list); | |
659 | VMCOREINFO_OFFSET(list_head, next); | |
660 | VMCOREINFO_OFFSET(list_head, prev); | |
661 | VMCOREINFO_OFFSET(vmap_area, va_start); | |
662 | VMCOREINFO_OFFSET(vmap_area, list); | |
23baf831 | 663 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1); |
692f66f2 HB |
664 | log_buf_vmcoreinfo_setup(); |
665 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | |
666 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | |
667 | VMCOREINFO_NUMBER(PG_lru); | |
668 | VMCOREINFO_NUMBER(PG_private); | |
669 | VMCOREINFO_NUMBER(PG_swapcache); | |
1cbf29da | 670 | VMCOREINFO_NUMBER(PG_swapbacked); |
692f66f2 HB |
671 | VMCOREINFO_NUMBER(PG_slab); |
672 | #ifdef CONFIG_MEMORY_FAILURE | |
673 | VMCOREINFO_NUMBER(PG_hwpoison); | |
674 | #endif | |
675 | VMCOREINFO_NUMBER(PG_head_mask); | |
6e292b9b | 676 | #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) |
692f66f2 HB |
677 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); |
678 | #ifdef CONFIG_HUGETLB_PAGE | |
9c5ccf2d | 679 | VMCOREINFO_NUMBER(PG_hugetlb); |
e04b742f DH |
680 | #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) |
681 | VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); | |
692f66f2 HB |
682 | #endif |
683 | ||
5fd8fea9 SB |
684 | #ifdef CONFIG_KALLSYMS |
685 | VMCOREINFO_SYMBOL(kallsyms_names); | |
f09bddbd | 686 | VMCOREINFO_SYMBOL(kallsyms_num_syms); |
5fd8fea9 SB |
687 | VMCOREINFO_SYMBOL(kallsyms_token_table); |
688 | VMCOREINFO_SYMBOL(kallsyms_token_index); | |
689 | #ifdef CONFIG_KALLSYMS_BASE_RELATIVE | |
690 | VMCOREINFO_SYMBOL(kallsyms_offsets); | |
691 | VMCOREINFO_SYMBOL(kallsyms_relative_base); | |
692 | #else | |
693 | VMCOREINFO_SYMBOL(kallsyms_addresses); | |
694 | #endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ | |
695 | #endif /* CONFIG_KALLSYMS */ | |
696 | ||
692f66f2 HB |
697 | arch_crash_save_vmcoreinfo(); |
698 | update_vmcoreinfo_note(); | |
699 | ||
700 | return 0; | |
701 | } | |
702 | ||
703 | subsys_initcall(crash_save_vmcoreinfo_init); | |
6f991cc3 ED |
704 | |
705 | static int __init crash_notes_memory_init(void) | |
706 | { | |
707 | /* Allocate memory for saving cpu registers. */ | |
708 | size_t size, align; | |
709 | ||
710 | /* | |
711 | * crash_notes could be allocated across 2 vmalloc pages when percpu | |
712 | * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc | |
713 | * pages are also on 2 continuous physical pages. In this case the | |
714 | * 2nd part of crash_notes in 2nd page could be lost since only the | |
715 | * starting address and size of crash_notes are exported through sysfs. | |
716 | * Here round up the size of crash_notes to the nearest power of two | |
717 | * and pass it to __alloc_percpu as align value. This can make sure | |
718 | * crash_notes is allocated inside one physical page. | |
719 | */ | |
720 | size = sizeof(note_buf_t); | |
721 | align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); | |
722 | ||
723 | /* | |
724 | * Break compile if size is bigger than PAGE_SIZE since crash_notes | |
725 | * definitely will be in 2 pages with that. | |
726 | */ | |
727 | BUILD_BUG_ON(size > PAGE_SIZE); | |
728 | ||
729 | crash_notes = __alloc_percpu(size, align); | |
730 | if (!crash_notes) { | |
731 | pr_warn("Memory allocation for saving cpu register states failed\n"); | |
732 | return -ENOMEM; | |
733 | } | |
734 | return 0; | |
735 | } | |
736 | subsys_initcall(crash_notes_memory_init); | |
24726275 ED |
737 | |
738 | #ifdef CONFIG_CRASH_HOTPLUG | |
739 | #undef pr_fmt | |
740 | #define pr_fmt(fmt) "crash hp: " fmt | |
a72bbec7 ED |
741 | |
742 | /* | |
743 | * This routine utilized when the crash_hotplug sysfs node is read. | |
744 | * It reflects the kernel's ability/permission to update the crash | |
745 | * elfcorehdr directly. | |
746 | */ | |
747 | int crash_check_update_elfcorehdr(void) | |
748 | { | |
749 | int rc = 0; | |
750 | ||
751 | /* Obtain lock while reading crash information */ | |
752 | if (!kexec_trylock()) { | |
753 | pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); | |
754 | return 0; | |
755 | } | |
756 | if (kexec_crash_image) { | |
757 | if (kexec_crash_image->file_mode) | |
758 | rc = 1; | |
759 | else | |
760 | rc = kexec_crash_image->update_elfcorehdr; | |
761 | } | |
762 | /* Release lock now that update complete */ | |
763 | kexec_unlock(); | |
764 | ||
765 | return rc; | |
766 | } | |
767 | ||
24726275 ED |
768 | /* |
769 | * To accurately reflect hot un/plug changes of cpu and memory resources | |
770 | * (including onling and offlining of those resources), the elfcorehdr | |
771 | * (which is passed to the crash kernel via the elfcorehdr= parameter) | |
772 | * must be updated with the new list of CPUs and memories. | |
773 | * | |
774 | * In order to make changes to elfcorehdr, two conditions are needed: | |
775 | * First, the segment containing the elfcorehdr must be large enough | |
776 | * to permit a growing number of resources; the elfcorehdr memory size | |
777 | * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. | |
778 | * Second, purgatory must explicitly exclude the elfcorehdr from the | |
779 | * list of segments it checks (since the elfcorehdr changes and thus | |
780 | * would require an update to purgatory itself to update the digest). | |
781 | */ | |
782 | static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) | |
783 | { | |
784 | struct kimage *image; | |
785 | ||
786 | /* Obtain lock while changing crash information */ | |
787 | if (!kexec_trylock()) { | |
788 | pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); | |
789 | return; | |
790 | } | |
791 | ||
792 | /* Check kdump is not loaded */ | |
793 | if (!kexec_crash_image) | |
794 | goto out; | |
795 | ||
796 | image = kexec_crash_image; | |
797 | ||
a72bbec7 ED |
798 | /* Check that updating elfcorehdr is permitted */ |
799 | if (!(image->file_mode || image->update_elfcorehdr)) | |
800 | goto out; | |
801 | ||
24726275 ED |
802 | if (hp_action == KEXEC_CRASH_HP_ADD_CPU || |
803 | hp_action == KEXEC_CRASH_HP_REMOVE_CPU) | |
804 | pr_debug("hp_action %u, cpu %u\n", hp_action, cpu); | |
805 | else | |
806 | pr_debug("hp_action %u\n", hp_action); | |
807 | ||
808 | /* | |
809 | * The elfcorehdr_index is set to -1 when the struct kimage | |
810 | * is allocated. Find the segment containing the elfcorehdr, | |
811 | * if not already found. | |
812 | */ | |
813 | if (image->elfcorehdr_index < 0) { | |
814 | unsigned long mem; | |
815 | unsigned char *ptr; | |
816 | unsigned int n; | |
817 | ||
818 | for (n = 0; n < image->nr_segments; n++) { | |
819 | mem = image->segment[n].mem; | |
820 | ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); | |
821 | if (ptr) { | |
822 | /* The segment containing elfcorehdr */ | |
823 | if (memcmp(ptr, ELFMAG, SELFMAG) == 0) | |
824 | image->elfcorehdr_index = (int)n; | |
825 | kunmap_local(ptr); | |
826 | } | |
827 | } | |
828 | } | |
829 | ||
830 | if (image->elfcorehdr_index < 0) { | |
831 | pr_err("unable to locate elfcorehdr segment"); | |
832 | goto out; | |
833 | } | |
834 | ||
835 | /* Needed in order for the segments to be updated */ | |
836 | arch_kexec_unprotect_crashkres(); | |
837 | ||
838 | /* Differentiate between normal load and hotplug update */ | |
839 | image->hp_action = hp_action; | |
840 | ||
841 | /* Now invoke arch-specific update handler */ | |
842 | arch_crash_handle_hotplug_event(image); | |
843 | ||
844 | /* No longer handling a hotplug event */ | |
845 | image->hp_action = KEXEC_CRASH_HP_NONE; | |
846 | image->elfcorehdr_updated = true; | |
847 | ||
848 | /* Change back to read-only */ | |
849 | arch_kexec_protect_crashkres(); | |
850 | ||
851 | /* Errors in the callback is not a reason to rollback state */ | |
852 | out: | |
853 | /* Release lock now that update complete */ | |
854 | kexec_unlock(); | |
855 | } | |
856 | ||
857 | static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) | |
858 | { | |
859 | switch (val) { | |
860 | case MEM_ONLINE: | |
861 | crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, | |
862 | KEXEC_CRASH_HP_INVALID_CPU); | |
863 | break; | |
864 | ||
865 | case MEM_OFFLINE: | |
866 | crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, | |
867 | KEXEC_CRASH_HP_INVALID_CPU); | |
868 | break; | |
869 | } | |
870 | return NOTIFY_OK; | |
871 | } | |
872 | ||
873 | static struct notifier_block crash_memhp_nb = { | |
874 | .notifier_call = crash_memhp_notifier, | |
875 | .priority = 0 | |
876 | }; | |
877 | ||
878 | static int crash_cpuhp_online(unsigned int cpu) | |
879 | { | |
880 | crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu); | |
881 | return 0; | |
882 | } | |
883 | ||
884 | static int crash_cpuhp_offline(unsigned int cpu) | |
885 | { | |
886 | crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu); | |
887 | return 0; | |
888 | } | |
889 | ||
890 | static int __init crash_hotplug_init(void) | |
891 | { | |
892 | int result = 0; | |
893 | ||
894 | if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) | |
895 | register_memory_notifier(&crash_memhp_nb); | |
896 | ||
897 | if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { | |
898 | result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, | |
899 | "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline); | |
900 | } | |
901 | ||
902 | return result; | |
903 | } | |
904 | ||
905 | subsys_initcall(crash_hotplug_init); | |
906 | #endif |