1 // SPDX-License-Identifier: GPL-2.0-only
3 * powerpc code to implement the kexec_file_load syscall
6 * Copyright (C) 2004 IBM Corp.
7 * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
10 * Copyright (C) 2020 IBM Corporation
12 * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
13 * Heavily modified for the kernel by
14 * Hari Bathini, IBM Corporation.
17 #define pr_fmt(fmt) "kexec ranges: " fmt
19 #include <linux/sort.h>
20 #include <linux/kexec.h>
22 #include <linux/slab.h>
23 #include <linux/memblock.h>
24 #include <linux/crash_core.h>
25 #include <asm/sections.h>
26 #include <asm/kexec_ranges.h>
27 #include <asm/crashdump-ppc64.h>
29 #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
31 * get_max_nr_ranges - Get the max no. of ranges crash_mem structure
32 * could hold, given the size allocated for it.
33 * @size: Allocation size of crash_mem structure.
35 * Returns the maximum no. of ranges.
37 static inline unsigned int get_max_nr_ranges(size_t size)
39 return ((size - sizeof(struct crash_mem)) /
40 sizeof(struct range));
44 * get_mem_rngs_size - Get the allocated size of mem_rngs based on
45 * max_nr_ranges and chunk size.
46 * @mem_rngs: Memory ranges.
48 * Returns the maximum size of @mem_rngs.
50 static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)
57 size = (sizeof(struct crash_mem) +
58 (mem_rngs->max_nr_ranges * sizeof(struct range)));
61 * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.
62 * So, align to get the actual length.
64 return ALIGN(size, MEM_RANGE_CHUNK_SZ);
68 * __add_mem_range - add a memory range to memory ranges list.
69 * @mem_ranges: Range list to add the memory range to.
70 * @base: Base address of the range to add.
71 * @size: Size of the memory range to add.
73 * (Re)allocates memory, if needed.
75 * Returns 0 on success, negative errno on error.
77 static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
79 struct crash_mem *mem_rngs = *mem_ranges;
81 if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {
82 mem_rngs = realloc_mem_ranges(mem_ranges);
87 mem_rngs->ranges[mem_rngs->nr_ranges].start = base;
88 mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;
89 pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",
90 base, base + size - 1, mem_rngs->nr_ranges);
91 mem_rngs->nr_ranges++;
96 * __merge_memory_ranges - Merges the given memory ranges list.
97 * @mem_rngs: Range list to merge.
99 * Assumes a sorted range list.
103 static void __merge_memory_ranges(struct crash_mem *mem_rngs)
105 struct range *ranges;
112 ranges = &(mem_rngs->ranges[0]);
113 for (i = 1; i < mem_rngs->nr_ranges; i++) {
114 if (ranges[i].start <= (ranges[i-1].end + 1))
115 ranges[idx].end = ranges[i].end;
121 ranges[idx] = ranges[i];
124 mem_rngs->nr_ranges = idx + 1;
127 /* cmp_func_t callback to sort ranges with sort() */
128 static int rngcmp(const void *_x, const void *_y)
130 const struct range *x = _x, *y = _y;
132 if (x->start > y->start)
134 if (x->start < y->start)
140 * sort_memory_ranges - Sorts the given memory ranges list.
141 * @mem_rngs: Range list to sort.
142 * @merge: If true, merge the list after sorting.
146 void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)
153 /* Sort the ranges in-place */
154 sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,
155 sizeof(mem_rngs->ranges[0]), rngcmp, NULL);
158 __merge_memory_ranges(mem_rngs);
160 /* For debugging purpose */
161 pr_debug("Memory ranges:\n");
162 for (i = 0; i < mem_rngs->nr_ranges; i++) {
163 pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,
164 mem_rngs->ranges[i].start,
165 mem_rngs->ranges[i].end);
170 * realloc_mem_ranges - reallocate mem_ranges with size incremented
171 * by MEM_RANGE_CHUNK_SZ. Frees up the old memory,
172 * if memory allocation fails.
173 * @mem_ranges: Memory ranges to reallocate.
175 * Returns pointer to reallocated memory on success, NULL otherwise.
177 struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)
179 struct crash_mem *mem_rngs = *mem_ranges;
180 unsigned int nr_ranges;
183 size = get_mem_rngs_size(mem_rngs);
184 nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;
186 size += MEM_RANGE_CHUNK_SZ;
187 mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);
194 mem_rngs->nr_ranges = nr_ranges;
195 mem_rngs->max_nr_ranges = get_max_nr_ranges(size);
196 *mem_ranges = mem_rngs;
202 * add_mem_range - Updates existing memory range, if there is an overlap.
203 * Else, adds a new memory range.
204 * @mem_ranges: Range list to add the memory range to.
205 * @base: Base address of the range to add.
206 * @size: Size of the memory range to add.
208 * (Re)allocates memory, if needed.
210 * Returns 0 on success, negative errno on error.
212 int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
214 struct crash_mem *mem_rngs = *mem_ranges;
215 u64 mstart, mend, end;
221 end = base + size - 1;
223 if (!mem_rngs || !(mem_rngs->nr_ranges))
224 return __add_mem_range(mem_ranges, base, size);
226 for (i = 0; i < mem_rngs->nr_ranges; i++) {
227 mstart = mem_rngs->ranges[i].start;
228 mend = mem_rngs->ranges[i].end;
229 if (base < mend && end > mstart) {
231 mem_rngs->ranges[i].start = base;
233 mem_rngs->ranges[i].end = end;
238 return __add_mem_range(mem_ranges, base, size);
241 #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
243 #ifdef CONFIG_KEXEC_FILE
245 * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
246 * @mem_ranges: Range list to add the memory range(s) to.
248 * Returns 0 on success, negative errno on error.
250 static int add_tce_mem_ranges(struct crash_mem **mem_ranges)
252 struct device_node *dn = NULL;
255 for_each_node_by_type(dn, "pci") {
259 ret = of_property_read_u64(dn, "linux,tce-base", &base);
260 ret |= of_property_read_u32(dn, "linux,tce-size", &size);
263 * It is ok to have pci nodes without tce. So, ignore
264 * property does not exist error.
266 if (ret == -EINVAL) {
273 ret = add_mem_range(mem_ranges, base, size);
283 * add_initrd_mem_range - Adds initrd range to the given memory ranges list,
284 * if the initrd was retained.
285 * @mem_ranges: Range list to add the memory range to.
287 * Returns 0 on success, negative errno on error.
289 static int add_initrd_mem_range(struct crash_mem **mem_ranges)
294 /* This range means something, only if initrd was retained */
295 if (!strstr(saved_command_line, "retain_initrd"))
298 ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
299 ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
301 ret = add_mem_range(mem_ranges, base, end - base + 1);
307 * add_htab_mem_range - Adds htab range to the given memory ranges list,
309 * @mem_ranges: Range list to add the memory range to.
311 * Returns 0 on success, negative errno on error.
313 static int add_htab_mem_range(struct crash_mem **mem_ranges)
316 #ifdef CONFIG_PPC_64S_HASH_MMU
320 return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
327 * add_kernel_mem_range - Adds kernel text region to the given
328 * memory ranges list.
329 * @mem_ranges: Range list to add the memory range to.
331 * Returns 0 on success, negative errno on error.
333 static int add_kernel_mem_range(struct crash_mem **mem_ranges)
335 return add_mem_range(mem_ranges, 0, __pa(_end));
337 #endif /* CONFIG_KEXEC_FILE */
339 #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
341 * add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
342 * @mem_ranges: Range list to add the memory range to.
344 * Returns 0 on success, negative errno on error.
346 static int add_rtas_mem_range(struct crash_mem **mem_ranges)
348 struct device_node *dn;
352 dn = of_find_node_by_path("/rtas");
356 ret = of_property_read_u32(dn, "linux,rtas-base", &base);
357 ret |= of_property_read_u32(dn, "rtas-size", &size);
359 ret = add_mem_range(mem_ranges, base, size);
366 * add_opal_mem_range - Adds OPAL region to the given memory ranges list.
367 * @mem_ranges: Range list to add the memory range to.
369 * Returns 0 on success, negative errno on error.
371 static int add_opal_mem_range(struct crash_mem **mem_ranges)
373 struct device_node *dn;
377 dn = of_find_node_by_path("/ibm,opal");
381 ret = of_property_read_u64(dn, "opal-base-address", &base);
382 ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
384 ret = add_mem_range(mem_ranges, base, size);
389 #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
391 #ifdef CONFIG_KEXEC_FILE
393 * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
394 * to the given memory ranges list.
395 * @mem_ranges: Range list to add the memory ranges to.
397 * Returns 0 on success, negative errno on error.
399 static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
401 int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
402 struct device_node *root = of_find_node_by_path("/");
405 prop = of_get_property(root, "reserved-ranges", &len);
406 n_mem_addr_cells = of_n_addr_cells(root);
407 n_mem_size_cells = of_n_size_cells(root);
412 cells = n_mem_addr_cells + n_mem_size_cells;
414 /* Each reserved range is an (address,size) pair */
415 for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
418 base = of_read_number(prop + (i * cells), n_mem_addr_cells);
419 size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
422 ret = add_mem_range(mem_ranges, base, size);
431 * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
432 * memory regions that should be added to the
433 * memory reserve map to ensure the region is
434 * protected from any mischief.
435 * @mem_ranges: Range list to add the memory ranges to.
437 * Returns 0 on success, negative errno on error.
439 int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
443 ret = add_rtas_mem_range(mem_ranges);
447 ret = add_tce_mem_ranges(mem_ranges);
451 ret = add_reserved_mem_ranges(mem_ranges);
454 pr_err("Failed to setup reserved memory ranges\n");
459 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
460 * regions like opal/rtas, tce-table, initrd,
461 * kernel, htab which should be avoided while
462 * setting up kexec load segments.
463 * @mem_ranges: Range list to add the memory ranges to.
465 * Returns 0 on success, negative errno on error.
467 int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
471 ret = add_tce_mem_ranges(mem_ranges);
475 ret = add_initrd_mem_range(mem_ranges);
479 ret = add_htab_mem_range(mem_ranges);
483 ret = add_kernel_mem_range(mem_ranges);
487 ret = add_rtas_mem_range(mem_ranges);
491 ret = add_opal_mem_range(mem_ranges);
495 ret = add_reserved_mem_ranges(mem_ranges);
499 /* exclude memory ranges should be sorted for easy lookup */
500 sort_memory_ranges(*mem_ranges, true);
503 pr_err("Failed to setup exclude memory ranges\n");
507 #ifdef CONFIG_CRASH_DUMP
509 * get_usable_memory_ranges - Get usable memory ranges. This list includes
510 * regions like crashkernel, opal/rtas & tce-table,
511 * that kdump kernel could use.
512 * @mem_ranges: Range list to add the memory ranges to.
514 * Returns 0 on success, negative errno on error.
516 int get_usable_memory_ranges(struct crash_mem **mem_ranges)
521 * Early boot failure observed on guests when low memory (first memory
522 * block?) is not added to usable memory. So, add [0, crashk_res.end]
523 * instead of [crashk_res.start, crashk_res.end] to workaround it.
524 * Also, crashed kernel's memory must be added to reserve map to
525 * avoid kdump kernel from using it.
527 ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
531 ret = add_rtas_mem_range(mem_ranges);
535 ret = add_opal_mem_range(mem_ranges);
539 ret = add_tce_mem_ranges(mem_ranges);
542 pr_err("Failed to setup usable memory ranges\n");
545 #endif /* CONFIG_CRASH_DUMP */
546 #endif /* CONFIG_KEXEC_FILE */
548 #ifdef CONFIG_CRASH_DUMP
550 * get_crash_memory_ranges - Get crash memory ranges. This list includes
551 * first/crashing kernel's memory regions that
552 * would be exported via an elfcore.
553 * @mem_ranges: Range list to add the memory ranges to.
555 * Returns 0 on success, negative errno on error.
557 int get_crash_memory_ranges(struct crash_mem **mem_ranges)
559 phys_addr_t base, end;
560 struct crash_mem *tmem;
564 for_each_mem_range(i, &base, &end) {
565 u64 size = end - base;
567 /* Skip backup memory region, which needs a separate entry */
568 if (base == BACKUP_SRC_START) {
569 if (size > BACKUP_SRC_SIZE) {
570 base = BACKUP_SRC_END + 1;
571 size -= BACKUP_SRC_SIZE;
576 ret = add_mem_range(mem_ranges, base, size);
580 /* Try merging adjacent ranges before reallocation attempt */
581 if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
582 sort_memory_ranges(*mem_ranges, true);
585 /* Reallocate memory ranges if there is no space to split ranges */
587 if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
588 tmem = realloc_mem_ranges(mem_ranges);
593 /* Exclude crashkernel region */
594 ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
599 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
600 * regions are exported to save their context at the time of
601 * crash, they should actually be backed up just like the
602 * first 64K bytes of memory.
604 ret = add_rtas_mem_range(mem_ranges);
608 ret = add_opal_mem_range(mem_ranges);
612 /* create a separate program header for the backup region */
613 ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
617 sort_memory_ranges(*mem_ranges, false);
620 pr_err("Failed to setup crash memory ranges\n");
625 * remove_mem_range - Removes the given memory range from the range list.
626 * @mem_ranges: Range list to remove the memory range to.
627 * @base: Base address of the range to remove.
628 * @size: Size of the memory range to remove.
630 * (Re)allocates memory, if needed.
632 * Returns 0 on success, negative errno on error.
634 int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
640 struct crash_mem *mem_rngs = *mem_ranges;
646 * Memory range are stored as start and end address, use
647 * the same format to do remove operation.
649 end = base + size - 1;
651 for (i = 0; i < mem_rngs->nr_ranges; i++) {
652 mstart = mem_rngs->ranges[i].start;
653 mend = mem_rngs->ranges[i].end;
656 * Memory range to remove is not part of this range entry
657 * in the memory range list
659 if (!(base >= mstart && end <= mend))
663 * Memory range to remove is equivalent to this entry in the
664 * memory range list. Remove the range entry from the list.
666 if (base == mstart && end == mend) {
667 for (; i < mem_rngs->nr_ranges - 1; i++) {
668 mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
669 mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
671 mem_rngs->nr_ranges--;
675 * Start address of the memory range to remove and the
676 * current memory range entry in the list is same. Just
677 * move the start address of the current memory range
678 * entry in the list to end + 1.
680 else if (base == mstart) {
681 mem_rngs->ranges[i].start = end + 1;
685 * End address of the memory range to remove and the
686 * current memory range entry in the list is same.
687 * Just move the end address of the current memory
688 * range entry in the list to base - 1.
690 else if (end == mend) {
691 mem_rngs->ranges[i].end = base - 1;
695 * Memory range to remove is not at the edge of current
696 * memory range entry. Split the current memory entry into
700 mem_rngs->ranges[i].end = base - 1;
701 size = mem_rngs->ranges[i].end - end;
702 ret = add_mem_range(mem_ranges, end + 1, size);
708 #endif /* CONFIG_CRASH_DUMP */