]> Git Repo - linux.git/blame - kernel/power/snapshot.c
Linux 6.14-rc3
[linux.git] / kernel / power / snapshot.c
CommitLineData
55716d26 1// SPDX-License-Identifier: GPL-2.0-only
25761b6e 2/*
96bc7aec 3 * linux/kernel/power/snapshot.c
25761b6e 4 *
8357376d 5 * This file provides system snapshot/restore functionality for swsusp.
25761b6e 6 *
a2531293 7 * Copyright (C) 1998-2005 Pavel Machek <[email protected]>
8357376d 8 * Copyright (C) 2006 Rafael J. Wysocki <[email protected]>
25761b6e
RW
9 */
10
7a7b99bf 11#define pr_fmt(fmt) "PM: hibernation: " fmt
64ec72a1 12
f577eb30 13#include <linux/version.h>
25761b6e
RW
14#include <linux/module.h>
15#include <linux/mm.h>
16#include <linux/suspend.h>
25761b6e 17#include <linux/delay.h>
25761b6e 18#include <linux/bitops.h>
25761b6e 19#include <linux/spinlock.h>
25761b6e 20#include <linux/kernel.h>
25761b6e
RW
21#include <linux/pm.h>
22#include <linux/device.h>
74dfd666 23#include <linux/init.h>
57c8a661 24#include <linux/memblock.h>
38b8d208 25#include <linux/nmi.h>
25761b6e
RW
26#include <linux/syscalls.h>
27#include <linux/console.h>
28#include <linux/highmem.h>
846705de 29#include <linux/list.h>
5a0e3ad6 30#include <linux/slab.h>
52f5684c 31#include <linux/compiler.h>
db597605 32#include <linux/ktime.h>
61f6d09a 33#include <linux/set_memory.h>
25761b6e 34
7c0f6ba6 35#include <linux/uaccess.h>
25761b6e 36#include <asm/mmu_context.h>
25761b6e
RW
37#include <asm/tlbflush.h>
38#include <asm/io.h>
39
25761b6e
RW
40#include "power.h"
41
49368a47 42#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY)
4c0b6c10
RW
43static bool hibernate_restore_protection;
44static bool hibernate_restore_protection_active;
45
46void enable_restore_image_protection(void)
47{
48 hibernate_restore_protection = true;
49}
50
51static inline void hibernate_restore_protection_begin(void)
52{
53 hibernate_restore_protection_active = hibernate_restore_protection;
54}
55
56static inline void hibernate_restore_protection_end(void)
57{
58 hibernate_restore_protection_active = false;
59}
60
f4311756 61static inline int __must_check hibernate_restore_protect_page(void *page_address)
4c0b6c10
RW
62{
63 if (hibernate_restore_protection_active)
f4311756
CL
64 return set_memory_ro((unsigned long)page_address, 1);
65 return 0;
4c0b6c10
RW
66}
67
f4311756 68static inline int hibernate_restore_unprotect_page(void *page_address)
4c0b6c10
RW
69{
70 if (hibernate_restore_protection_active)
f4311756
CL
71 return set_memory_rw((unsigned long)page_address, 1);
72 return 0;
4c0b6c10
RW
73}
74#else
75static inline void hibernate_restore_protection_begin(void) {}
76static inline void hibernate_restore_protection_end(void) {}
f4311756
CL
77static inline int __must_check hibernate_restore_protect_page(void *page_address) {return 0; }
78static inline int hibernate_restore_unprotect_page(void *page_address) {return 0; }
49368a47 79#endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */
4c0b6c10 80
2abf962a
MR
81
82/*
83 * The calls to set_direct_map_*() should not fail because remapping a page
84 * here means that we only update protection bits in an existing PTE.
85 * It is still worth to have a warning here if something changes and this
86 * will no longer be the case.
87 */
88static inline void hibernate_map_page(struct page *page)
89{
90 if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
91 int ret = set_direct_map_default_noflush(page);
92
93 if (ret)
94 pr_warn_once("Failed to remap page\n");
95 } else {
96 debug_pagealloc_map_pages(page, 1);
97 }
98}
99
100static inline void hibernate_unmap_page(struct page *page)
101{
102 if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
103 unsigned long addr = (unsigned long)page_address(page);
104 int ret = set_direct_map_invalid_noflush(page);
105
106 if (ret)
107 pr_warn_once("Failed to remap page\n");
108
109 flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
110 } else {
111 debug_pagealloc_unmap_pages(page, 1);
112 }
113}
114
74dfd666
RW
115static int swsusp_page_is_free(struct page *);
116static void swsusp_set_page_forbidden(struct page *);
117static void swsusp_unset_page_forbidden(struct page *);
118
ddeb6487
RW
119/*
120 * Number of bytes to reserve for memory allocations made by device drivers
121 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
122 * cause image creation to fail (tunable via /sys/power/reserved_size).
123 */
124unsigned long reserved_size;
125
126void __init hibernate_reserved_size_init(void)
127{
128 reserved_size = SPARE_PAGES * PAGE_SIZE;
129}
130
fe419535
RW
131/*
132 * Preferred image size in bytes (tunable via /sys/power/image_size).
1c1be3a9
RW
133 * When it is set to N, swsusp will do its best to ensure the image
134 * size will not exceed N bytes, but if that is impossible, it will
135 * try to create the smallest image possible.
fe419535 136 */
ac5c24ec
RW
137unsigned long image_size;
138
139void __init hibernate_image_size_init(void)
140{
ca79b0c2 141 image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE;
ac5c24ec 142}
fe419535 143
ef96f639
RW
144/*
145 * List of PBEs needed for restoring the pages that were allocated before
8357376d
RW
146 * the suspend and included in the suspend image, but have also been
147 * allocated by the "resume" kernel, so their contents cannot be written
148 * directly to their "original" page frames.
149 */
75534b50
RW
150struct pbe *restore_pblist;
151
9c744481
RW
152/* struct linked_page is used to build chains of pages */
153
154#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *))
155
156struct linked_page {
157 struct linked_page *next;
158 char data[LINKED_PAGE_DATA_SIZE];
159} __packed;
160
161/*
162 * List of "safe" pages (ie. pages that were not used by the image kernel
163 * before hibernation) that may be used as temporary storage for image kernel
164 * memory contents.
165 */
166static struct linked_page *safe_pages_list;
167
8357376d 168/* Pointer to an auxiliary buffer (1 page) */
940864dd 169static void *buffer;
7088a5c0 170
0bcd888d
RW
171#define PG_ANY 0
172#define PG_SAFE 1
173#define PG_UNSAFE_CLEAR 1
174#define PG_UNSAFE_KEEP 0
175
940864dd 176static unsigned int allocated_unsafe_pages;
f6143aa6 177
ef96f639
RW
178/**
179 * get_image_page - Allocate a page for a hibernation image.
180 * @gfp_mask: GFP mask for the allocation.
181 * @safe_needed: Get pages that were not used before hibernation (restore only)
182 *
183 * During image restoration, for storing the PBE list and the image data, we can
184 * only use memory pages that do not conflict with the pages used before
185 * hibernation. The "unsafe" pages have PageNosaveFree set and we count them
186 * using allocated_unsafe_pages.
187 *
188 * Each allocated image page is marked as PageNosave and PageNosaveFree so that
189 * swsusp_free() can release it.
190 */
8357376d 191static void *get_image_page(gfp_t gfp_mask, int safe_needed)
f6143aa6
RW
192{
193 void *res;
194
195 res = (void *)get_zeroed_page(gfp_mask);
196 if (safe_needed)
7be98234 197 while (res && swsusp_page_is_free(virt_to_page(res))) {
f6143aa6 198 /* The page is unsafe, mark it for swsusp_free() */
7be98234 199 swsusp_set_page_forbidden(virt_to_page(res));
940864dd 200 allocated_unsafe_pages++;
f6143aa6
RW
201 res = (void *)get_zeroed_page(gfp_mask);
202 }
203 if (res) {
7be98234
RW
204 swsusp_set_page_forbidden(virt_to_page(res));
205 swsusp_set_page_free(virt_to_page(res));
f6143aa6
RW
206 }
207 return res;
208}
209
9c744481
RW
210static void *__get_safe_page(gfp_t gfp_mask)
211{
212 if (safe_pages_list) {
213 void *ret = safe_pages_list;
214
215 safe_pages_list = safe_pages_list->next;
216 memset(ret, 0, PAGE_SIZE);
217 return ret;
218 }
219 return get_image_page(gfp_mask, PG_SAFE);
220}
221
f6143aa6
RW
222unsigned long get_safe_page(gfp_t gfp_mask)
223{
9c744481 224 return (unsigned long)__get_safe_page(gfp_mask);
8357376d
RW
225}
226
5b6d15de
RW
227static struct page *alloc_image_page(gfp_t gfp_mask)
228{
8357376d
RW
229 struct page *page;
230
231 page = alloc_page(gfp_mask);
232 if (page) {
7be98234
RW
233 swsusp_set_page_forbidden(page);
234 swsusp_set_page_free(page);
8357376d
RW
235 }
236 return page;
f6143aa6
RW
237}
238
307c5971
RW
239static void recycle_safe_page(void *page_address)
240{
241 struct linked_page *lp = page_address;
242
243 lp->next = safe_pages_list;
244 safe_pages_list = lp;
245}
246
f6143aa6 247/**
ef96f639
RW
248 * free_image_page - Free a page allocated for hibernation image.
249 * @addr: Address of the page to free.
250 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page.
251 *
252 * The page to free should have been allocated by get_image_page() (page flags
253 * set by it are affected).
f6143aa6 254 */
f6143aa6
RW
255static inline void free_image_page(void *addr, int clear_nosave_free)
256{
8357376d
RW
257 struct page *page;
258
259 BUG_ON(!virt_addr_valid(addr));
260
261 page = virt_to_page(addr);
262
7be98234 263 swsusp_unset_page_forbidden(page);
f6143aa6 264 if (clear_nosave_free)
7be98234 265 swsusp_unset_page_free(page);
8357376d
RW
266
267 __free_page(page);
f6143aa6
RW
268}
269
efd5a852
RW
270static inline void free_list_of_pages(struct linked_page *list,
271 int clear_page_nosave)
b788db79
RW
272{
273 while (list) {
274 struct linked_page *lp = list->next;
275
276 free_image_page(list, clear_page_nosave);
277 list = lp;
278 }
279}
280
ef96f639
RW
281/*
282 * struct chain_allocator is used for allocating small objects out of
283 * a linked list of pages called 'the chain'.
284 *
285 * The chain grows each time when there is no room for a new object in
286 * the current page. The allocated objects cannot be freed individually.
287 * It is only possible to free them all at once, by freeing the entire
288 * chain.
289 *
290 * NOTE: The chain allocator may be inefficient if the allocated objects
291 * are not much smaller than PAGE_SIZE.
292 */
b788db79
RW
293struct chain_allocator {
294 struct linked_page *chain; /* the chain */
295 unsigned int used_space; /* total size of objects allocated out
ef96f639 296 of the current page */
b788db79
RW
297 gfp_t gfp_mask; /* mask for allocating pages */
298 int safe_needed; /* if set, only "safe" pages are allocated */
299};
300
efd5a852
RW
301static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask,
302 int safe_needed)
b788db79
RW
303{
304 ca->chain = NULL;
305 ca->used_space = LINKED_PAGE_DATA_SIZE;
306 ca->gfp_mask = gfp_mask;
307 ca->safe_needed = safe_needed;
308}
309
310static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
311{
312 void *ret;
313
314 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
315 struct linked_page *lp;
316
9c744481
RW
317 lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) :
318 get_image_page(ca->gfp_mask, PG_ANY);
b788db79
RW
319 if (!lp)
320 return NULL;
321
322 lp->next = ca->chain;
323 ca->chain = lp;
324 ca->used_space = 0;
325 }
326 ret = ca->chain->data + ca->used_space;
327 ca->used_space += size;
328 return ret;
329}
330
e5a3b0c5 331/*
ef96f639 332 * Data types related to memory bitmaps.
b788db79 333 *
e4b2897a 334 * Memory bitmap is a structure consisting of many linked lists of
ef96f639 335 * objects. The main list's elements are of type struct zone_bitmap
6be2408a 336 * and each of them corresponds to one zone. For each zone bitmap
ef96f639
RW
337 * object there is a list of objects of type struct bm_block that
338 * represent each blocks of bitmap in which information is stored.
b788db79 339 *
ef96f639
RW
340 * struct memory_bitmap contains a pointer to the main list of zone
341 * bitmap objects, a struct bm_position used for browsing the bitmap,
342 * and a pointer to the list of pages used for allocating all of the
343 * zone bitmap objects and bitmap block objects.
b788db79 344 *
ef96f639
RW
345 * NOTE: It has to be possible to lay out the bitmap in memory
346 * using only allocations of order 0. Additionally, the bitmap is
347 * designed to work with arbitrary number of zones (this is over the
348 * top for now, but let's avoid making unnecessary assumptions ;-).
b788db79 349 *
ef96f639
RW
350 * struct zone_bitmap contains a pointer to a list of bitmap block
351 * objects and a pointer to the bitmap block object that has been
352 * most recently used for setting bits. Additionally, it contains the
353 * PFNs that correspond to the start and end of the represented zone.
b788db79 354 *
ef96f639
RW
355 * struct bm_block contains a pointer to the memory page in which
356 * information is stored (in the form of a block of bitmap)
357 * It also contains the pfns that correspond to the start and end of
358 * the represented memory area.
f469f02d 359 *
ef96f639
RW
360 * The memory bitmap is organized as a radix tree to guarantee fast random
361 * access to the bits. There is one radix tree for each zone (as returned
362 * from create_mem_extents).
f469f02d 363 *
ef96f639
RW
364 * One radix tree is represented by one struct mem_zone_bm_rtree. There are
365 * two linked lists for the nodes of the tree, one for the inner nodes and
366 * one for the leave nodes. The linked leave nodes are used for fast linear
367 * access of the memory bitmap.
f469f02d 368 *
ef96f639 369 * The struct rtree_node represents one node of the radix tree.
b788db79
RW
370 */
371
372#define BM_END_OF_MAP (~0UL)
373
8de03073 374#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE)
f469f02d
JR
375#define BM_BLOCK_SHIFT (PAGE_SHIFT + 3)
376#define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1)
b788db79 377
f469f02d
JR
378/*
379 * struct rtree_node is a wrapper struct to link the nodes
380 * of the rtree together for easy linear iteration over
381 * bits and easy freeing
382 */
383struct rtree_node {
384 struct list_head list;
385 unsigned long *data;
386};
387
388/*
389 * struct mem_zone_bm_rtree represents a bitmap used for one
390 * populated memory zone.
391 */
392struct mem_zone_bm_rtree {
393 struct list_head list; /* Link Zones together */
394 struct list_head nodes; /* Radix Tree inner nodes */
395 struct list_head leaves; /* Radix Tree leaves */
396 unsigned long start_pfn; /* Zone start page frame */
397 unsigned long end_pfn; /* Zone end page frame + 1 */
398 struct rtree_node *rtree; /* Radix Tree Root */
399 int levels; /* Number of Radix Tree Levels */
400 unsigned int blocks; /* Number of Bitmap Blocks */
401};
402
847aea98 403/* struct bm_position is used for browsing memory bitmaps */
b788db79
RW
404
405struct bm_position {
3a20cb17
JR
406 struct mem_zone_bm_rtree *zone;
407 struct rtree_node *node;
408 unsigned long node_pfn;
005e8ddd 409 unsigned long cur_pfn;
3a20cb17 410 int node_bit;
b788db79
RW
411};
412
413struct memory_bitmap {
f469f02d 414 struct list_head zones;
b788db79 415 struct linked_page *p_list; /* list of pages used to store zone
ef96f639
RW
416 bitmap objects and bitmap block
417 objects */
b788db79
RW
418 struct bm_position cur; /* most recently used bit position */
419};
420
421/* Functions that operate on memory bitmaps */
422
f469f02d
JR
423#define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long))
424#if BITS_PER_LONG == 32
425#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2)
426#else
427#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3)
428#endif
429#define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
430
ef96f639
RW
431/**
432 * alloc_rtree_node - Allocate a new node and add it to the radix tree.
467df4cf
YL
433 * @gfp_mask: GFP mask for the allocation.
434 * @safe_needed: Get pages not used before hibernation (restore only)
435 * @ca: Pointer to a linked list of pages ("a chain") to allocate from
436 * @list: Radix Tree node to add.
f469f02d 437 *
ef96f639
RW
438 * This function is used to allocate inner nodes as well as the
439 * leave nodes of the radix tree. It also adds the node to the
440 * corresponding linked list passed in by the *list parameter.
f469f02d
JR
441 */
442static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
443 struct chain_allocator *ca,
444 struct list_head *list)
445{
446 struct rtree_node *node;
447
448 node = chain_alloc(ca, sizeof(struct rtree_node));
449 if (!node)
450 return NULL;
451
452 node->data = get_image_page(gfp_mask, safe_needed);
453 if (!node->data)
454 return NULL;
455
456 list_add_tail(&node->list, list);
457
458 return node;
459}
460
ef96f639
RW
461/**
462 * add_rtree_block - Add a new leave node to the radix tree.
f469f02d 463 *
ef96f639
RW
464 * The leave nodes need to be allocated in order to keep the leaves
465 * linked list in order. This is guaranteed by the zone->blocks
466 * counter.
f469f02d
JR
467 */
468static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
469 int safe_needed, struct chain_allocator *ca)
470{
471 struct rtree_node *node, *block, **dst;
472 unsigned int levels_needed, block_nr;
473 int i;
474
475 block_nr = zone->blocks;
476 levels_needed = 0;
477
478 /* How many levels do we need for this block nr? */
479 while (block_nr) {
480 levels_needed += 1;
481 block_nr >>= BM_RTREE_LEVEL_SHIFT;
482 }
483
484 /* Make sure the rtree has enough levels */
485 for (i = zone->levels; i < levels_needed; i++) {
486 node = alloc_rtree_node(gfp_mask, safe_needed, ca,
487 &zone->nodes);
488 if (!node)
489 return -ENOMEM;
490
491 node->data[0] = (unsigned long)zone->rtree;
492 zone->rtree = node;
493 zone->levels += 1;
494 }
495
496 /* Allocate new block */
497 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves);
498 if (!block)
499 return -ENOMEM;
500
501 /* Now walk the rtree to insert the block */
502 node = zone->rtree;
503 dst = &zone->rtree;
504 block_nr = zone->blocks;
505 for (i = zone->levels; i > 0; i--) {
506 int index;
507
508 if (!node) {
509 node = alloc_rtree_node(gfp_mask, safe_needed, ca,
510 &zone->nodes);
511 if (!node)
512 return -ENOMEM;
513 *dst = node;
514 }
515
516 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
517 index &= BM_RTREE_LEVEL_MASK;
518 dst = (struct rtree_node **)&((*dst)->data[index]);
519 node = *dst;
520 }
521
522 zone->blocks += 1;
523 *dst = block;
524
525 return 0;
526}
527
528static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
529 int clear_nosave_free);
530
ef96f639
RW
531/**
532 * create_zone_bm_rtree - Create a radix tree for one zone.
f469f02d 533 *
ef96f639
RW
534 * Allocated the mem_zone_bm_rtree structure and initializes it.
535 * This function also allocated and builds the radix tree for the
536 * zone.
f469f02d 537 */
efd5a852
RW
538static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask,
539 int safe_needed,
540 struct chain_allocator *ca,
541 unsigned long start,
542 unsigned long end)
f469f02d
JR
543{
544 struct mem_zone_bm_rtree *zone;
545 unsigned int i, nr_blocks;
546 unsigned long pages;
547
548 pages = end - start;
549 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree));
550 if (!zone)
551 return NULL;
552
553 INIT_LIST_HEAD(&zone->nodes);
554 INIT_LIST_HEAD(&zone->leaves);
555 zone->start_pfn = start;
556 zone->end_pfn = end;
557 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
558
559 for (i = 0; i < nr_blocks; i++) {
560 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) {
561 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR);
562 return NULL;
563 }
564 }
565
566 return zone;
567}
568
ef96f639
RW
569/**
570 * free_zone_bm_rtree - Free the memory of the radix tree.
f469f02d 571 *
ef96f639
RW
572 * Free all node pages of the radix tree. The mem_zone_bm_rtree
573 * structure itself is not freed here nor are the rtree_node
574 * structs.
f469f02d
JR
575 */
576static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
577 int clear_nosave_free)
578{
579 struct rtree_node *node;
580
581 list_for_each_entry(node, &zone->nodes, list)
582 free_image_page(node->data, clear_nosave_free);
583
584 list_for_each_entry(node, &zone->leaves, list)
585 free_image_page(node->data, clear_nosave_free);
586}
587
b788db79
RW
588static void memory_bm_position_reset(struct memory_bitmap *bm)
589{
3a20cb17
JR
590 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
591 list);
592 bm->cur.node = list_entry(bm->cur.zone->leaves.next,
593 struct rtree_node, list);
594 bm->cur.node_pfn = 0;
005e8ddd 595 bm->cur.cur_pfn = BM_END_OF_MAP;
3a20cb17 596 bm->cur.node_bit = 0;
b788db79
RW
597}
598
599static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
600
846705de
RW
601struct mem_extent {
602 struct list_head hook;
603 unsigned long start;
604 unsigned long end;
605};
606
b788db79 607/**
ef96f639
RW
608 * free_mem_extents - Free a list of memory extents.
609 * @list: List of extents to free.
b788db79 610 */
846705de
RW
611static void free_mem_extents(struct list_head *list)
612{
613 struct mem_extent *ext, *aux;
b788db79 614
846705de
RW
615 list_for_each_entry_safe(ext, aux, list, hook) {
616 list_del(&ext->hook);
617 kfree(ext);
618 }
619}
620
621/**
ef96f639
RW
622 * create_mem_extents - Create a list of memory extents.
623 * @list: List to put the extents into.
624 * @gfp_mask: Mask to use for memory allocations.
625 *
626 * The extents represent contiguous ranges of PFNs.
846705de
RW
627 */
628static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
b788db79 629{
846705de 630 struct zone *zone;
b788db79 631
846705de 632 INIT_LIST_HEAD(list);
b788db79 633
ee99c71c 634 for_each_populated_zone(zone) {
846705de
RW
635 unsigned long zone_start, zone_end;
636 struct mem_extent *ext, *cur, *aux;
637
846705de 638 zone_start = zone->zone_start_pfn;
c33bc315 639 zone_end = zone_end_pfn(zone);
846705de
RW
640
641 list_for_each_entry(ext, list, hook)
642 if (zone_start <= ext->end)
643 break;
b788db79 644
846705de
RW
645 if (&ext->hook == list || zone_end < ext->start) {
646 /* New extent is necessary */
647 struct mem_extent *new_ext;
648
649 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
650 if (!new_ext) {
651 free_mem_extents(list);
652 return -ENOMEM;
653 }
654 new_ext->start = zone_start;
655 new_ext->end = zone_end;
656 list_add_tail(&new_ext->hook, &ext->hook);
657 continue;
658 }
659
660 /* Merge this zone's range of PFNs with the existing one */
661 if (zone_start < ext->start)
662 ext->start = zone_start;
663 if (zone_end > ext->end)
664 ext->end = zone_end;
665
666 /* More merging may be possible */
667 cur = ext;
668 list_for_each_entry_safe_continue(cur, aux, list, hook) {
669 if (zone_end < cur->start)
670 break;
671 if (zone_end < cur->end)
672 ext->end = cur->end;
673 list_del(&cur->hook);
674 kfree(cur);
675 }
b788db79 676 }
846705de
RW
677
678 return 0;
b788db79
RW
679}
680
681/**
ef96f639
RW
682 * memory_bm_create - Allocate memory for a memory bitmap.
683 */
efd5a852
RW
684static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
685 int safe_needed)
b788db79
RW
686{
687 struct chain_allocator ca;
846705de
RW
688 struct list_head mem_extents;
689 struct mem_extent *ext;
690 int error;
b788db79
RW
691
692 chain_init(&ca, gfp_mask, safe_needed);
f469f02d 693 INIT_LIST_HEAD(&bm->zones);
b788db79 694
846705de
RW
695 error = create_mem_extents(&mem_extents, gfp_mask);
696 if (error)
697 return error;
b788db79 698
846705de 699 list_for_each_entry(ext, &mem_extents, hook) {
f469f02d 700 struct mem_zone_bm_rtree *zone;
f469f02d
JR
701
702 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca,
703 ext->start, ext->end);
9047eb62
JR
704 if (!zone) {
705 error = -ENOMEM;
f469f02d 706 goto Error;
9047eb62 707 }
f469f02d 708 list_add_tail(&zone->list, &bm->zones);
b788db79 709 }
846705de 710
b788db79
RW
711 bm->p_list = ca.chain;
712 memory_bm_position_reset(bm);
846705de
RW
713 Exit:
714 free_mem_extents(&mem_extents);
715 return error;
b788db79 716
846705de 717 Error:
b788db79
RW
718 bm->p_list = ca.chain;
719 memory_bm_free(bm, PG_UNSAFE_CLEAR);
846705de 720 goto Exit;
b788db79
RW
721}
722
723/**
ef96f639
RW
724 * memory_bm_free - Free memory occupied by the memory bitmap.
725 * @bm: Memory bitmap.
726 */
b788db79
RW
727static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
728{
f469f02d 729 struct mem_zone_bm_rtree *zone;
b788db79 730
f469f02d
JR
731 list_for_each_entry(zone, &bm->zones, list)
732 free_zone_bm_rtree(zone, clear_nosave_free);
733
b788db79 734 free_list_of_pages(bm->p_list, clear_nosave_free);
846705de 735
f469f02d 736 INIT_LIST_HEAD(&bm->zones);
b788db79
RW
737}
738
739/**
ef96f639 740 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap.
07a33823 741 *
ef96f639
RW
742 * Find the bit in memory bitmap @bm that corresponds to the given PFN.
743 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated.
744 *
745 * Walk the radix tree to find the page containing the bit that represents @pfn
746 * and return the position of the bit in @addr and @bit_nr.
07a33823 747 */
9047eb62
JR
748static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
749 void **addr, unsigned int *bit_nr)
07a33823
JR
750{
751 struct mem_zone_bm_rtree *curr, *zone;
752 struct rtree_node *node;
753 int i, block_nr;
754
3a20cb17
JR
755 zone = bm->cur.zone;
756
757 if (pfn >= zone->start_pfn && pfn < zone->end_pfn)
758 goto zone_found;
759
07a33823
JR
760 zone = NULL;
761
762 /* Find the right zone */
763 list_for_each_entry(curr, &bm->zones, list) {
764 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) {
765 zone = curr;
766 break;
767 }
768 }
769
770 if (!zone)
771 return -EFAULT;
772
3a20cb17 773zone_found:
07a33823 774 /*
ef96f639
RW
775 * We have found the zone. Now walk the radix tree to find the leaf node
776 * for our PFN.
07a33823 777 */
da6043fe
AW
778
779 /*
7b7b8a2c 780 * If the zone we wish to scan is the current zone and the
da6043fe
AW
781 * pfn falls into the current node then we do not need to walk
782 * the tree.
783 */
3a20cb17 784 node = bm->cur.node;
da6043fe
AW
785 if (zone == bm->cur.zone &&
786 ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
3a20cb17
JR
787 goto node_found;
788
07a33823
JR
789 node = zone->rtree;
790 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT;
791
792 for (i = zone->levels; i > 0; i--) {
793 int index;
794
795 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
796 index &= BM_RTREE_LEVEL_MASK;
797 BUG_ON(node->data[index] == 0);
798 node = (struct rtree_node *)node->data[index];
799 }
800
3a20cb17
JR
801node_found:
802 /* Update last position */
803 bm->cur.zone = zone;
804 bm->cur.node = node;
805 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
005e8ddd 806 bm->cur.cur_pfn = pfn;
3a20cb17 807
07a33823
JR
808 /* Set return values */
809 *addr = node->data;
810 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK;
811
812 return 0;
813}
814
74dfd666
RW
815static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
816{
817 void *addr;
818 unsigned int bit;
a82f7119 819 int error;
74dfd666 820
a82f7119
RW
821 error = memory_bm_find_bit(bm, pfn, &addr, &bit);
822 BUG_ON(error);
74dfd666
RW
823 set_bit(bit, addr);
824}
825
a82f7119
RW
826static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
827{
828 void *addr;
829 unsigned int bit;
830 int error;
831
832 error = memory_bm_find_bit(bm, pfn, &addr, &bit);
07a33823
JR
833 if (!error)
834 set_bit(bit, addr);
835
a82f7119
RW
836 return error;
837}
838
74dfd666
RW
839static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
840{
841 void *addr;
842 unsigned int bit;
a82f7119 843 int error;
74dfd666 844
a82f7119
RW
845 error = memory_bm_find_bit(bm, pfn, &addr, &bit);
846 BUG_ON(error);
74dfd666
RW
847 clear_bit(bit, addr);
848}
849
fdd64ed5
JR
850static void memory_bm_clear_current(struct memory_bitmap *bm)
851{
852 int bit;
853
854 bit = max(bm->cur.node_bit - 1, 0);
855 clear_bit(bit, bm->cur.node->data);
856}
857
005e8ddd
BG
858static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
859{
860 return bm->cur.cur_pfn;
861}
862
74dfd666
RW
863static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
864{
865 void *addr;
866 unsigned int bit;
9047eb62 867 int error;
74dfd666 868
a82f7119
RW
869 error = memory_bm_find_bit(bm, pfn, &addr, &bit);
870 BUG_ON(error);
9047eb62 871 return test_bit(bit, addr);
b788db79
RW
872}
873
69643279
RW
874static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
875{
876 void *addr;
877 unsigned int bit;
07a33823 878
9047eb62 879 return !memory_bm_find_bit(bm, pfn, &addr, &bit);
b788db79
RW
880}
881
3a20cb17 882/*
ef96f639 883 * rtree_next_node - Jump to the next leaf node.
3a20cb17 884 *
ef96f639
RW
885 * Set the position to the beginning of the next node in the
886 * memory bitmap. This is either the next node in the current
887 * zone's radix tree or the first node in the radix tree of the
888 * next zone.
3a20cb17 889 *
ef96f639 890 * Return true if there is a next node, false otherwise.
3a20cb17
JR
891 */
892static bool rtree_next_node(struct memory_bitmap *bm)
893{
924d8696
JM
894 if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
895 bm->cur.node = list_entry(bm->cur.node->list.next,
896 struct rtree_node, list);
3a20cb17
JR
897 bm->cur.node_pfn += BM_BITS_PER_BLOCK;
898 bm->cur.node_bit = 0;
0f7d83e8 899 touch_softlockup_watchdog();
3a20cb17
JR
900 return true;
901 }
902
903 /* No more nodes, goto next zone */
924d8696
JM
904 if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
905 bm->cur.zone = list_entry(bm->cur.zone->list.next,
3a20cb17 906 struct mem_zone_bm_rtree, list);
3a20cb17
JR
907 bm->cur.node = list_entry(bm->cur.zone->leaves.next,
908 struct rtree_node, list);
909 bm->cur.node_pfn = 0;
910 bm->cur.node_bit = 0;
911 return true;
912 }
913
914 /* No more zones */
915 return false;
916}
917
9047eb62 918/**
467df4cf 919 * memory_bm_next_pfn - Find the next set bit in a memory bitmap.
ef96f639 920 * @bm: Memory bitmap.
3a20cb17 921 *
ef96f639
RW
922 * Starting from the last returned position this function searches for the next
923 * set bit in @bm and returns the PFN represented by it. If no more bits are
924 * set, BM_END_OF_MAP is returned.
9047eb62 925 *
ef96f639
RW
926 * It is required to run memory_bm_position_reset() before the first call to
927 * this function for the given memory bitmap.
3a20cb17 928 */
9047eb62 929static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
3a20cb17
JR
930{
931 unsigned long bits, pfn, pages;
932 int bit;
933
934 do {
935 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn;
936 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK);
937 bit = find_next_bit(bm->cur.node->data, bits,
938 bm->cur.node_bit);
939 if (bit < bits) {
940 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
941 bm->cur.node_bit = bit + 1;
005e8ddd 942 bm->cur.cur_pfn = pfn;
3a20cb17
JR
943 return pfn;
944 }
945 } while (rtree_next_node(bm));
946
005e8ddd 947 bm->cur.cur_pfn = BM_END_OF_MAP;
3a20cb17
JR
948 return BM_END_OF_MAP;
949}
950
ef96f639
RW
951/*
952 * This structure represents a range of page frames the contents of which
953 * should not be saved during hibernation.
74dfd666 954 */
74dfd666
RW
955struct nosave_region {
956 struct list_head list;
957 unsigned long start_pfn;
958 unsigned long end_pfn;
959};
960
961static LIST_HEAD(nosave_regions);
962
307c5971
RW
963static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
964{
965 struct rtree_node *node;
966
967 list_for_each_entry(node, &zone->nodes, list)
968 recycle_safe_page(node->data);
969
970 list_for_each_entry(node, &zone->leaves, list)
971 recycle_safe_page(node->data);
972}
973
974static void memory_bm_recycle(struct memory_bitmap *bm)
975{
976 struct mem_zone_bm_rtree *zone;
977 struct linked_page *p_list;
978
979 list_for_each_entry(zone, &bm->zones, list)
980 recycle_zone_bm_rtree(zone);
981
982 p_list = bm->p_list;
983 while (p_list) {
984 struct linked_page *lp = p_list;
985
986 p_list = lp->next;
987 recycle_safe_page(lp);
988 }
989}
990
74dfd666 991/**
ef96f639
RW
992 * register_nosave_region - Register a region of unsaveable memory.
993 *
994 * Register a range of page frames the contents of which should not be saved
995 * during hibernation (to be used in the early initialization code).
74dfd666 996 */
33569ef3 997void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
74dfd666
RW
998{
999 struct nosave_region *region;
1000
1001 if (start_pfn >= end_pfn)
1002 return;
1003
1004 if (!list_empty(&nosave_regions)) {
1005 /* Try to extend the previous region (they should be sorted) */
1006 region = list_entry(nosave_regions.prev,
1007 struct nosave_region, list);
1008 if (region->end_pfn == start_pfn) {
1009 region->end_pfn = end_pfn;
1010 goto Report;
1011 }
1012 }
33569ef3 1013 /* This allocation cannot fail */
c6f23979 1014 region = memblock_alloc_or_panic(sizeof(struct nosave_region),
33569ef3 1015 SMP_CACHE_BYTES);
74dfd666
RW
1016 region->start_pfn = start_pfn;
1017 region->end_pfn = end_pfn;
1018 list_add_tail(&region->list, &nosave_regions);
1019 Report:
64ec72a1 1020 pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n",
cd38ca85
BH
1021 (unsigned long long) start_pfn << PAGE_SHIFT,
1022 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
74dfd666
RW
1023}
1024
1025/*
1026 * Set bits in this map correspond to the page frames the contents of which
1027 * should not be saved during the suspend.
1028 */
1029static struct memory_bitmap *forbidden_pages_map;
1030
1031/* Set bits in this map correspond to free page frames. */
1032static struct memory_bitmap *free_pages_map;
1033
1034/*
1035 * Each page frame allocated for creating the image is marked by setting the
1036 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
1037 */
1038
1039void swsusp_set_page_free(struct page *page)
1040{
1041 if (free_pages_map)
1042 memory_bm_set_bit(free_pages_map, page_to_pfn(page));
1043}
1044
1045static int swsusp_page_is_free(struct page *page)
1046{
1047 return free_pages_map ?
1048 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
1049}
1050
1051void swsusp_unset_page_free(struct page *page)
1052{
1053 if (free_pages_map)
1054 memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
1055}
1056
1057static void swsusp_set_page_forbidden(struct page *page)
1058{
1059 if (forbidden_pages_map)
1060 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
1061}
1062
1063int swsusp_page_is_forbidden(struct page *page)
1064{
1065 return forbidden_pages_map ?
1066 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
1067}
1068
1069static void swsusp_unset_page_forbidden(struct page *page)
1070{
1071 if (forbidden_pages_map)
1072 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
1073}
1074
1075/**
ef96f639
RW
1076 * mark_nosave_pages - Mark pages that should not be saved.
1077 * @bm: Memory bitmap.
1078 *
1079 * Set the bits in @bm that correspond to the page frames the contents of which
1080 * should not be saved.
74dfd666 1081 */
74dfd666
RW
1082static void mark_nosave_pages(struct memory_bitmap *bm)
1083{
1084 struct nosave_region *region;
1085
1086 if (list_empty(&nosave_regions))
1087 return;
1088
1089 list_for_each_entry(region, &nosave_regions, list) {
1090 unsigned long pfn;
1091
64ec72a1 1092 pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n",
69f1d475
BH
1093 (unsigned long long) region->start_pfn << PAGE_SHIFT,
1094 ((unsigned long long) region->end_pfn << PAGE_SHIFT)
1095 - 1);
74dfd666
RW
1096
1097 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
a82f7119
RW
1098 if (pfn_valid(pfn)) {
1099 /*
1100 * It is safe to ignore the result of
1101 * mem_bm_set_bit_check() here, since we won't
1102 * touch the PFNs for which the error is
1103 * returned anyway.
1104 */
1105 mem_bm_set_bit_check(bm, pfn);
1106 }
74dfd666
RW
1107 }
1108}
1109
1110/**
ef96f639
RW
1111 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information.
1112 *
1113 * Create bitmaps needed for marking page frames that should not be saved and
1114 * free page frames. The forbidden_pages_map and free_pages_map pointers are
1115 * only modified if everything goes well, because we don't want the bits to be
1116 * touched before both bitmaps are set up.
74dfd666 1117 */
74dfd666
RW
1118int create_basic_memory_bitmaps(void)
1119{
1120 struct memory_bitmap *bm1, *bm2;
a1ca8295 1121 int error;
74dfd666 1122
aab17289
RW
1123 if (forbidden_pages_map && free_pages_map)
1124 return 0;
1125 else
1126 BUG_ON(forbidden_pages_map || free_pages_map);
74dfd666 1127
0709db60 1128 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
74dfd666
RW
1129 if (!bm1)
1130 return -ENOMEM;
1131
0709db60 1132 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
74dfd666
RW
1133 if (error)
1134 goto Free_first_object;
1135
0709db60 1136 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
74dfd666
RW
1137 if (!bm2)
1138 goto Free_first_bitmap;
1139
0709db60 1140 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
74dfd666
RW
1141 if (error)
1142 goto Free_second_object;
1143
1144 forbidden_pages_map = bm1;
1145 free_pages_map = bm2;
1146 mark_nosave_pages(forbidden_pages_map);
1147
64ec72a1 1148 pr_debug("Basic memory bitmaps created\n");
74dfd666
RW
1149
1150 return 0;
1151
1152 Free_second_object:
1153 kfree(bm2);
1154 Free_first_bitmap:
480f0de6 1155 memory_bm_free(bm1, PG_UNSAFE_CLEAR);
74dfd666
RW
1156 Free_first_object:
1157 kfree(bm1);
1158 return -ENOMEM;
1159}
1160
1161/**
ef96f639
RW
1162 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information.
1163 *
1164 * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The
1165 * auxiliary pointers are necessary so that the bitmaps themselves are not
1166 * referred to while they are being freed.
74dfd666 1167 */
74dfd666
RW
1168void free_basic_memory_bitmaps(void)
1169{
1170 struct memory_bitmap *bm1, *bm2;
1171
6a0c7cd3
RW
1172 if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
1173 return;
74dfd666
RW
1174
1175 bm1 = forbidden_pages_map;
1176 bm2 = free_pages_map;
1177 forbidden_pages_map = NULL;
1178 free_pages_map = NULL;
1179 memory_bm_free(bm1, PG_UNSAFE_CLEAR);
1180 kfree(bm1);
1181 memory_bm_free(bm2, PG_UNSAFE_CLEAR);
1182 kfree(bm2);
1183
64ec72a1 1184 pr_debug("Basic memory bitmaps freed\n");
74dfd666
RW
1185}
1186
03b6c9a3
VB
1187static void clear_or_poison_free_page(struct page *page)
1188{
1189 if (page_poisoning_enabled_static())
1190 __kernel_poison_pages(page, 1);
1191 else if (want_init_on_free())
1192 clear_highpage(page);
1193}
1194
1195void clear_or_poison_free_pages(void)
1ad1410f 1196{
1ad1410f
AA
1197 struct memory_bitmap *bm = free_pages_map;
1198 unsigned long pfn;
1199
1200 if (WARN_ON(!(free_pages_map)))
1201 return;
1202
03b6c9a3 1203 if (page_poisoning_enabled() || want_init_on_free()) {
18451f9f 1204 memory_bm_position_reset(bm);
1ad1410f 1205 pfn = memory_bm_next_pfn(bm);
18451f9f
AP
1206 while (pfn != BM_END_OF_MAP) {
1207 if (pfn_valid(pfn))
03b6c9a3 1208 clear_or_poison_free_page(pfn_to_page(pfn));
18451f9f
AP
1209
1210 pfn = memory_bm_next_pfn(bm);
1211 }
1212 memory_bm_position_reset(bm);
1213 pr_info("free pages cleared after restore\n");
1ad1410f 1214 }
1ad1410f
AA
1215}
1216
b788db79 1217/**
ef96f639
RW
1218 * snapshot_additional_pages - Estimate the number of extra pages needed.
1219 * @zone: Memory zone to carry out the computation for.
1220 *
1221 * Estimate the number of additional pages needed for setting up a hibernation
1222 * image data structures for @zone (usually, the returned value is greater than
1223 * the exact number).
b788db79 1224 */
b788db79
RW
1225unsigned int snapshot_additional_pages(struct zone *zone)
1226{
f469f02d 1227 unsigned int rtree, nodes;
b788db79 1228
f469f02d
JR
1229 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
1230 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node),
1231 LINKED_PAGE_DATA_SIZE);
1232 while (nodes > 1) {
1233 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL);
1234 rtree += nodes;
1235 }
1236
9047eb62 1237 return 2 * rtree;
b788db79
RW
1238}
1239
31a1b9d7
KW
1240/*
1241 * Touch the watchdog for every WD_PAGE_COUNT pages.
1242 */
1243#define WD_PAGE_COUNT (128*1024)
1244
1245static void mark_free_pages(struct zone *zone)
1246{
1247 unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
1248 unsigned long flags;
1249 unsigned int order, t;
1250 struct page *page;
1251
1252 if (zone_is_empty(zone))
1253 return;
1254
1255 spin_lock_irqsave(&zone->lock, flags);
1256
1257 max_zone_pfn = zone_end_pfn(zone);
1258 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1259 if (pfn_valid(pfn)) {
1260 page = pfn_to_page(pfn);
1261
1262 if (!--page_count) {
1263 touch_nmi_watchdog();
1264 page_count = WD_PAGE_COUNT;
1265 }
1266
1267 if (page_zone(page) != zone)
1268 continue;
1269
1270 if (!swsusp_page_is_forbidden(page))
1271 swsusp_unset_page_free(page);
1272 }
1273
1274 for_each_migratetype_order(order, t) {
1275 list_for_each_entry(page,
1276 &zone->free_area[order].free_list[t], buddy_list) {
1277 unsigned long i;
1278
1279 pfn = page_to_pfn(page);
1280 for (i = 0; i < (1UL << order); i++) {
1281 if (!--page_count) {
1282 touch_nmi_watchdog();
1283 page_count = WD_PAGE_COUNT;
1284 }
1285 swsusp_set_page_free(pfn_to_page(pfn + i));
1286 }
1287 }
1288 }
1289 spin_unlock_irqrestore(&zone->lock, flags);
1290}
1291
8357376d
RW
1292#ifdef CONFIG_HIGHMEM
1293/**
ef96f639
RW
1294 * count_free_highmem_pages - Compute the total number of free highmem pages.
1295 *
1296 * The returned number is system-wide.
8357376d 1297 */
8357376d
RW
1298static unsigned int count_free_highmem_pages(void)
1299{
1300 struct zone *zone;
1301 unsigned int cnt = 0;
1302
ee99c71c
KM
1303 for_each_populated_zone(zone)
1304 if (is_highmem(zone))
d23ad423 1305 cnt += zone_page_state(zone, NR_FREE_PAGES);
8357376d
RW
1306
1307 return cnt;
1308}
1309
1310/**
ef96f639
RW
1311 * saveable_highmem_page - Check if a highmem page is saveable.
1312 *
1313 * Determine whether a highmem page should be included in a hibernation image.
8357376d 1314 *
ef96f639
RW
1315 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
1316 * and it isn't part of a free chunk of pages.
8357376d 1317 */
846705de 1318static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
8357376d
RW
1319{
1320 struct page *page;
1321
1322 if (!pfn_valid(pfn))
1323 return NULL;
1324
5b56db37
DH
1325 page = pfn_to_online_page(pfn);
1326 if (!page || page_zone(page) != zone)
846705de 1327 return NULL;
8357376d
RW
1328
1329 BUG_ON(!PageHighMem(page));
1330
abd02ac6
DH
1331 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
1332 return NULL;
1333
1334 if (PageReserved(page) || PageOffline(page))
8357376d
RW
1335 return NULL;
1336
c6968e73
SG
1337 if (page_is_guard(page))
1338 return NULL;
1339
8357376d
RW
1340 return page;
1341}
1342
1343/**
ef96f639 1344 * count_highmem_pages - Compute the total number of saveable highmem pages.
8357376d 1345 */
fe419535 1346static unsigned int count_highmem_pages(void)
8357376d
RW
1347{
1348 struct zone *zone;
1349 unsigned int n = 0;
1350
98e73dc5 1351 for_each_populated_zone(zone) {
8357376d
RW
1352 unsigned long pfn, max_zone_pfn;
1353
1354 if (!is_highmem(zone))
1355 continue;
1356
1357 mark_free_pages(zone);
c33bc315 1358 max_zone_pfn = zone_end_pfn(zone);
8357376d 1359 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
846705de 1360 if (saveable_highmem_page(zone, pfn))
8357376d
RW
1361 n++;
1362 }
1363 return n;
1364}
8357376d
RW
1365#endif /* CONFIG_HIGHMEM */
1366
25761b6e 1367/**
ef96f639
RW
1368 * saveable_page - Check if the given page is saveable.
1369 *
1370 * Determine whether a non-highmem page should be included in a hibernation
1371 * image.
25761b6e 1372 *
ef96f639
RW
1373 * We should save the page if it isn't Nosave, and is not in the range
1374 * of pages statically defined as 'unsaveable', and it isn't part of
1375 * a free chunk of pages.
25761b6e 1376 */
846705de 1377static struct page *saveable_page(struct zone *zone, unsigned long pfn)
25761b6e 1378{
de491861 1379 struct page *page;
25761b6e
RW
1380
1381 if (!pfn_valid(pfn))
ae83c5ee 1382 return NULL;
25761b6e 1383
5b56db37
DH
1384 page = pfn_to_online_page(pfn);
1385 if (!page || page_zone(page) != zone)
846705de 1386 return NULL;
ae83c5ee 1387
8357376d
RW
1388 BUG_ON(PageHighMem(page));
1389
7be98234 1390 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
ae83c5ee 1391 return NULL;
8357376d 1392
abd02ac6
DH
1393 if (PageOffline(page))
1394 return NULL;
1395
8a235efa
RW
1396 if (PageReserved(page)
1397 && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
ae83c5ee 1398 return NULL;
25761b6e 1399
c6968e73
SG
1400 if (page_is_guard(page))
1401 return NULL;
1402
ae83c5ee 1403 return page;
25761b6e
RW
1404}
1405
8357376d 1406/**
ef96f639 1407 * count_data_pages - Compute the total number of saveable non-highmem pages.
8357376d 1408 */
fe419535 1409static unsigned int count_data_pages(void)
25761b6e
RW
1410{
1411 struct zone *zone;
ae83c5ee 1412 unsigned long pfn, max_zone_pfn;
dc19d507 1413 unsigned int n = 0;
25761b6e 1414
98e73dc5 1415 for_each_populated_zone(zone) {
25761b6e
RW
1416 if (is_highmem(zone))
1417 continue;
8357376d 1418
25761b6e 1419 mark_free_pages(zone);
c33bc315 1420 max_zone_pfn = zone_end_pfn(zone);
ae83c5ee 1421 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
846705de 1422 if (saveable_page(zone, pfn))
8357376d 1423 n++;
25761b6e 1424 }
a0f49651 1425 return n;
25761b6e
RW
1426}
1427
ef96f639
RW
1428/*
1429 * This is needed, because copy_page and memcpy are not usable for copying
005e8ddd
BG
1430 * task structs. Returns true if the page was filled with only zeros,
1431 * otherwise false.
8357376d 1432 */
005e8ddd 1433static inline bool do_copy_page(long *dst, long *src)
f623f0db 1434{
005e8ddd 1435 long z = 0;
f623f0db
RW
1436 int n;
1437
005e8ddd
BG
1438 for (n = PAGE_SIZE / sizeof(long); n; n--) {
1439 z |= *src;
f623f0db 1440 *dst++ = *src++;
005e8ddd
BG
1441 }
1442 return !z;
f623f0db
RW
1443}
1444
8a235efa 1445/**
ef96f639
RW
1446 * safe_copy_page - Copy a page in a safe way.
1447 *
1448 * Check if the page we are going to copy is marked as present in the kernel
d6332692
RE
1449 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
1450 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
005e8ddd
BG
1451 * always returns 'true'. Returns true if the page was entirely composed of
1452 * zeros, otherwise it will return false.
8a235efa 1453 */
005e8ddd 1454static bool safe_copy_page(void *dst, struct page *s_page)
8a235efa 1455{
005e8ddd
BG
1456 bool zeros_only;
1457
8a235efa 1458 if (kernel_page_present(s_page)) {
005e8ddd 1459 zeros_only = do_copy_page(dst, page_address(s_page));
8a235efa 1460 } else {
2abf962a 1461 hibernate_map_page(s_page);
005e8ddd 1462 zeros_only = do_copy_page(dst, page_address(s_page));
2abf962a 1463 hibernate_unmap_page(s_page);
8a235efa 1464 }
005e8ddd 1465 return zeros_only;
8a235efa
RW
1466}
1467
8357376d 1468#ifdef CONFIG_HIGHMEM
efd5a852 1469static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn)
8357376d
RW
1470{
1471 return is_highmem(zone) ?
846705de 1472 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
8357376d
RW
1473}
1474
005e8ddd 1475static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
8357376d
RW
1476{
1477 struct page *s_page, *d_page;
1478 void *src, *dst;
005e8ddd 1479 bool zeros_only;
8357376d
RW
1480
1481 s_page = pfn_to_page(src_pfn);
1482 d_page = pfn_to_page(dst_pfn);
1483 if (PageHighMem(s_page)) {
489c693b
CH
1484 src = kmap_local_page(s_page);
1485 dst = kmap_local_page(d_page);
005e8ddd 1486 zeros_only = do_copy_page(dst, src);
489c693b
CH
1487 kunmap_local(dst);
1488 kunmap_local(src);
8357376d 1489 } else {
8357376d 1490 if (PageHighMem(d_page)) {
ef96f639
RW
1491 /*
1492 * The page pointed to by src may contain some kernel
8357376d
RW
1493 * data modified by kmap_atomic()
1494 */
005e8ddd 1495 zeros_only = safe_copy_page(buffer, s_page);
489c693b 1496 dst = kmap_local_page(d_page);
3ecb01df 1497 copy_page(dst, buffer);
489c693b 1498 kunmap_local(dst);
8357376d 1499 } else {
005e8ddd 1500 zeros_only = safe_copy_page(page_address(d_page), s_page);
8357376d
RW
1501 }
1502 }
005e8ddd 1503 return zeros_only;
8357376d
RW
1504}
1505#else
846705de 1506#define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
8357376d 1507
005e8ddd 1508static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
8357376d 1509{
005e8ddd 1510 return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
8a235efa 1511 pfn_to_page(src_pfn));
8357376d
RW
1512}
1513#endif /* CONFIG_HIGHMEM */
1514
005e8ddd
BG
1515/*
1516 * Copy data pages will copy all pages into pages pulled from the copy_bm.
1517 * If a page was entirely filled with zeros it will be marked in the zero_bm.
1518 *
1519 * Returns the number of pages copied.
1520 */
1521static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
1522 struct memory_bitmap *orig_bm,
1523 struct memory_bitmap *zero_bm)
25761b6e 1524{
005e8ddd 1525 unsigned long copied_pages = 0;
25761b6e 1526 struct zone *zone;
005e8ddd 1527 unsigned long pfn, copy_pfn;
25761b6e 1528
98e73dc5 1529 for_each_populated_zone(zone) {
b788db79
RW
1530 unsigned long max_zone_pfn;
1531
25761b6e 1532 mark_free_pages(zone);
c33bc315 1533 max_zone_pfn = zone_end_pfn(zone);
b788db79 1534 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
8357376d 1535 if (page_is_saveable(zone, pfn))
b788db79 1536 memory_bm_set_bit(orig_bm, pfn);
25761b6e 1537 }
b788db79
RW
1538 memory_bm_position_reset(orig_bm);
1539 memory_bm_position_reset(copy_bm);
005e8ddd 1540 copy_pfn = memory_bm_next_pfn(copy_bm);
df7c4872 1541 for(;;) {
b788db79 1542 pfn = memory_bm_next_pfn(orig_bm);
df7c4872
FW
1543 if (unlikely(pfn == BM_END_OF_MAP))
1544 break;
005e8ddd
BG
1545 if (copy_data_page(copy_pfn, pfn)) {
1546 memory_bm_set_bit(zero_bm, pfn);
1547 /* Use this copy_pfn for a page that is not full of zeros */
1548 continue;
1549 }
1550 copied_pages++;
1551 copy_pfn = memory_bm_next_pfn(copy_bm);
df7c4872 1552 }
005e8ddd 1553 return copied_pages;
25761b6e
RW
1554}
1555
8357376d
RW
1556/* Total number of image pages */
1557static unsigned int nr_copy_pages;
1558/* Number of pages needed for saving the original pfns of the image pages */
1559static unsigned int nr_meta_pages;
005e8ddd
BG
1560/* Number of zero pages */
1561static unsigned int nr_zero_pages;
1562
64a473cb
RW
1563/*
1564 * Numbers of normal and highmem page frames allocated for hibernation image
1565 * before suspending devices.
1566 */
0bae5fd3 1567static unsigned int alloc_normal, alloc_highmem;
64a473cb
RW
1568/*
1569 * Memory bitmap used for marking saveable pages (during hibernation) or
1570 * hibernation image pages (during restore)
1571 */
1572static struct memory_bitmap orig_bm;
1573/*
1574 * Memory bitmap used during hibernation for marking allocated page frames that
1575 * will contain copies of saveable pages. During restore it is initially used
1576 * for marking hibernation image pages, but then the set bits from it are
1577 * duplicated in @orig_bm and it is released. On highmem systems it is next
1578 * used for marking "safe" highmem pages, but it has to be reinitialized for
1579 * this purpose.
1580 */
1581static struct memory_bitmap copy_bm;
8357376d 1582
005e8ddd
BG
1583/* Memory bitmap which tracks which saveable pages were zero filled. */
1584static struct memory_bitmap zero_bm;
1585
25761b6e 1586/**
ef96f639 1587 * swsusp_free - Free pages allocated for hibernation image.
cd560bb2 1588 *
6be2408a 1589 * Image pages are allocated before snapshot creation, so they need to be
ef96f639 1590 * released after resume.
25761b6e 1591 */
25761b6e
RW
1592void swsusp_free(void)
1593{
fdd64ed5 1594 unsigned long fb_pfn, fr_pfn;
6efde38f 1595
fdd64ed5
JR
1596 if (!forbidden_pages_map || !free_pages_map)
1597 goto out;
1598
1599 memory_bm_position_reset(forbidden_pages_map);
1600 memory_bm_position_reset(free_pages_map);
1601
1602loop:
1603 fr_pfn = memory_bm_next_pfn(free_pages_map);
1604 fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1605
1606 /*
1607 * Find the next bit set in both bitmaps. This is guaranteed to
1608 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP.
1609 */
1610 do {
1611 if (fb_pfn < fr_pfn)
1612 fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1613 if (fr_pfn < fb_pfn)
1614 fr_pfn = memory_bm_next_pfn(free_pages_map);
1615 } while (fb_pfn != fr_pfn);
1616
1617 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
1618 struct page *page = pfn_to_page(fr_pfn);
1619
1620 memory_bm_clear_current(forbidden_pages_map);
1621 memory_bm_clear_current(free_pages_map);
4c0b6c10 1622 hibernate_restore_unprotect_page(page_address(page));
fdd64ed5
JR
1623 __free_page(page);
1624 goto loop;
25761b6e 1625 }
fdd64ed5
JR
1626
1627out:
f577eb30
RW
1628 nr_copy_pages = 0;
1629 nr_meta_pages = 0;
005e8ddd 1630 nr_zero_pages = 0;
75534b50 1631 restore_pblist = NULL;
6e1819d6 1632 buffer = NULL;
64a473cb
RW
1633 alloc_normal = 0;
1634 alloc_highmem = 0;
4c0b6c10 1635 hibernate_restore_protection_end();
25761b6e
RW
1636}
1637
4bb33435
RW
1638/* Helper functions used for the shrinking of memory. */
1639
1640#define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN)
1641
fe419535 1642/**
ef96f639 1643 * preallocate_image_pages - Allocate a number of pages for hibernation image.
4bb33435
RW
1644 * @nr_pages: Number of page frames to allocate.
1645 * @mask: GFP flags to use for the allocation.
fe419535 1646 *
4bb33435
RW
1647 * Return value: Number of page frames actually allocated
1648 */
1649static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1650{
1651 unsigned long nr_alloc = 0;
1652
1653 while (nr_pages > 0) {
64a473cb
RW
1654 struct page *page;
1655
1656 page = alloc_image_page(mask);
1657 if (!page)
4bb33435 1658 break;
64a473cb
RW
1659 memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1660 if (PageHighMem(page))
1661 alloc_highmem++;
1662 else
1663 alloc_normal++;
4bb33435
RW
1664 nr_pages--;
1665 nr_alloc++;
1666 }
1667
1668 return nr_alloc;
1669}
1670
6715045d
RW
1671static unsigned long preallocate_image_memory(unsigned long nr_pages,
1672 unsigned long avail_normal)
4bb33435 1673{
6715045d
RW
1674 unsigned long alloc;
1675
1676 if (avail_normal <= alloc_normal)
1677 return 0;
1678
1679 alloc = avail_normal - alloc_normal;
1680 if (nr_pages < alloc)
1681 alloc = nr_pages;
1682
1683 return preallocate_image_pages(alloc, GFP_IMAGE);
4bb33435
RW
1684}
1685
1686#ifdef CONFIG_HIGHMEM
1687static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1688{
1689 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1690}
1691
1692/**
ef96f639 1693 * __fraction - Compute (an approximation of) x * (multiplier / base).
fe419535 1694 */
4bb33435
RW
1695static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1696{
809ed78a 1697 return div64_u64(x * multiplier, base);
4bb33435 1698}
fe419535 1699
4bb33435 1700static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
efd5a852
RW
1701 unsigned long highmem,
1702 unsigned long total)
fe419535 1703{
4bb33435
RW
1704 unsigned long alloc = __fraction(nr_pages, highmem, total);
1705
1706 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
fe419535 1707}
4bb33435
RW
1708#else /* CONFIG_HIGHMEM */
1709static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1710{
1711 return 0;
1712}
1713
1714static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
efd5a852
RW
1715 unsigned long highmem,
1716 unsigned long total)
4bb33435
RW
1717{
1718 return 0;
1719}
1720#endif /* CONFIG_HIGHMEM */
fe419535 1721
4bb33435 1722/**
ef96f639 1723 * free_unnecessary_pages - Release preallocated pages not needed for the image.
64a473cb 1724 */
a64fc82c 1725static unsigned long free_unnecessary_pages(void)
64a473cb 1726{
a64fc82c 1727 unsigned long save, to_free_normal, to_free_highmem, free;
64a473cb 1728
6715045d
RW
1729 save = count_data_pages();
1730 if (alloc_normal >= save) {
1731 to_free_normal = alloc_normal - save;
1732 save = 0;
1733 } else {
1734 to_free_normal = 0;
1735 save -= alloc_normal;
1736 }
1737 save += count_highmem_pages();
1738 if (alloc_highmem >= save) {
1739 to_free_highmem = alloc_highmem - save;
64a473cb
RW
1740 } else {
1741 to_free_highmem = 0;
4d4cf23c
RW
1742 save -= alloc_highmem;
1743 if (to_free_normal > save)
1744 to_free_normal -= save;
1745 else
1746 to_free_normal = 0;
64a473cb 1747 }
a64fc82c 1748 free = to_free_normal + to_free_highmem;
64a473cb
RW
1749
1750 memory_bm_position_reset(&copy_bm);
1751
a9c9b442 1752 while (to_free_normal > 0 || to_free_highmem > 0) {
64a473cb
RW
1753 unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1754 struct page *page = pfn_to_page(pfn);
1755
1756 if (PageHighMem(page)) {
1757 if (!to_free_highmem)
1758 continue;
1759 to_free_highmem--;
1760 alloc_highmem--;
1761 } else {
1762 if (!to_free_normal)
1763 continue;
1764 to_free_normal--;
1765 alloc_normal--;
1766 }
1767 memory_bm_clear_bit(&copy_bm, pfn);
1768 swsusp_unset_page_forbidden(page);
1769 swsusp_unset_page_free(page);
1770 __free_page(page);
1771 }
a64fc82c
WK
1772
1773 return free;
64a473cb
RW
1774}
1775
ef4aede3 1776/**
ef96f639 1777 * minimum_image_size - Estimate the minimum acceptable size of an image.
ef4aede3
RW
1778 * @saveable: Number of saveable pages in the system.
1779 *
1780 * We want to avoid attempting to free too much memory too hard, so estimate the
1781 * minimum acceptable size of a hibernation image to use as the lower limit for
1782 * preallocating memory.
1783 *
1784 * We assume that the minimum image size should be proportional to
1785 *
1786 * [number of saveable pages] - [number of pages that can be freed in theory]
1787 *
1788 * where the second term is the sum of (1) reclaimable slab pages, (2) active
bdbc98ab 1789 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages.
ef4aede3
RW
1790 */
1791static unsigned long minimum_image_size(unsigned long saveable)
1792{
1793 unsigned long size;
1794
d42f3245 1795 size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B)
599d0c95
MG
1796 + global_node_page_state(NR_ACTIVE_ANON)
1797 + global_node_page_state(NR_INACTIVE_ANON)
1798 + global_node_page_state(NR_ACTIVE_FILE)
bdbc98ab 1799 + global_node_page_state(NR_INACTIVE_FILE);
ef4aede3
RW
1800
1801 return saveable <= size ? 0 : saveable - size;
1802}
1803
64a473cb 1804/**
ef96f639 1805 * hibernate_preallocate_memory - Preallocate memory for hibernation image.
4bb33435
RW
1806 *
1807 * To create a hibernation image it is necessary to make a copy of every page
1808 * frame in use. We also need a number of page frames to be free during
1809 * hibernation for allocations made while saving the image and for device
1810 * drivers, in case they need to allocate memory from their hibernation
ddeb6487 1811 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
b0c609ab 1812 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
ddeb6487
RW
1813 * /sys/power/reserved_size, respectively). To make this happen, we compute the
1814 * total number of available page frames and allocate at least
4bb33435 1815 *
6e5d7300 1816 * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2
1817 * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
4bb33435
RW
1818 *
1819 * of them, which corresponds to the maximum size of a hibernation image.
1820 *
1821 * If image_size is set below the number following from the above formula,
1822 * the preallocation of memory is continued until the total number of saveable
ef4aede3
RW
1823 * pages in the system is below the requested image size or the minimum
1824 * acceptable image size returned by minimum_image_size(), whichever is greater.
4bb33435 1825 */
64a473cb 1826int hibernate_preallocate_memory(void)
fe419535 1827{
fe419535 1828 struct zone *zone;
4bb33435 1829 unsigned long saveable, size, max_size, count, highmem, pages = 0;
6715045d 1830 unsigned long alloc, save_highmem, pages_highmem, avail_normal;
db597605 1831 ktime_t start, stop;
64a473cb 1832 int error;
fe419535 1833
7a7b99bf 1834 pr_info("Preallocating image memory\n");
db597605 1835 start = ktime_get();
fe419535 1836
64a473cb 1837 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
7a7b99bf
LS
1838 if (error) {
1839 pr_err("Cannot allocate original bitmap\n");
64a473cb 1840 goto err_out;
7a7b99bf 1841 }
64a473cb
RW
1842
1843 error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
7a7b99bf
LS
1844 if (error) {
1845 pr_err("Cannot allocate copy bitmap\n");
64a473cb 1846 goto err_out;
7a7b99bf 1847 }
64a473cb 1848
005e8ddd
BG
1849 error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
1850 if (error) {
1851 pr_err("Cannot allocate zero bitmap\n");
1852 goto err_out;
1853 }
1854
64a473cb
RW
1855 alloc_normal = 0;
1856 alloc_highmem = 0;
005e8ddd 1857 nr_zero_pages = 0;
64a473cb 1858
4bb33435 1859 /* Count the number of saveable data pages. */
64a473cb 1860 save_highmem = count_highmem_pages();
4bb33435 1861 saveable = count_data_pages();
fe419535 1862
4bb33435
RW
1863 /*
1864 * Compute the total number of page frames we can use (count) and the
1865 * number of pages needed for image metadata (size).
1866 */
1867 count = saveable;
64a473cb
RW
1868 saveable += save_highmem;
1869 highmem = save_highmem;
4bb33435
RW
1870 size = 0;
1871 for_each_populated_zone(zone) {
1872 size += snapshot_additional_pages(zone);
1873 if (is_highmem(zone))
1874 highmem += zone_page_state(zone, NR_FREE_PAGES);
1875 else
1876 count += zone_page_state(zone, NR_FREE_PAGES);
1877 }
6715045d 1878 avail_normal = count;
4bb33435
RW
1879 count += highmem;
1880 count -= totalreserve_pages;
1881
1882 /* Compute the maximum number of saveable pages to leave in memory. */
ddeb6487
RW
1883 max_size = (count - (size + PAGES_FOR_IO)) / 2
1884 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
266f1a25 1885 /* Compute the desired number of image pages specified by image_size. */
4bb33435
RW
1886 size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1887 if (size > max_size)
1888 size = max_size;
1889 /*
266f1a25
RW
1890 * If the desired number of image pages is at least as large as the
1891 * current number of saveable pages in memory, allocate page frames for
1892 * the image and we're done.
4bb33435 1893 */
64a473cb
RW
1894 if (size >= saveable) {
1895 pages = preallocate_image_highmem(save_highmem);
6715045d 1896 pages += preallocate_image_memory(saveable - pages, avail_normal);
4bb33435 1897 goto out;
64a473cb 1898 }
4bb33435 1899
ef4aede3
RW
1900 /* Estimate the minimum size of the image. */
1901 pages = minimum_image_size(saveable);
6715045d
RW
1902 /*
1903 * To avoid excessive pressure on the normal zone, leave room in it to
1904 * accommodate an image of the minimum size (unless it's already too
1905 * small, in which case don't preallocate pages from it at all).
1906 */
1907 if (avail_normal > pages)
1908 avail_normal -= pages;
1909 else
1910 avail_normal = 0;
ef4aede3
RW
1911 if (size < pages)
1912 size = min_t(unsigned long, pages, max_size);
1913
4bb33435
RW
1914 /*
1915 * Let the memory management subsystem know that we're going to need a
1916 * large number of page frames to allocate and make it free some memory.
1917 * NOTE: If this is not done, performance will be hurt badly in some
1918 * test cases.
1919 */
1920 shrink_all_memory(saveable - size);
1921
1922 /*
1923 * The number of saveable pages in memory was too high, so apply some
1924 * pressure to decrease it. First, make room for the largest possible
1925 * image and fail if that doesn't work. Next, try to decrease the size
ef4aede3
RW
1926 * of the image as much as indicated by 'size' using allocations from
1927 * highmem and non-highmem zones separately.
4bb33435
RW
1928 */
1929 pages_highmem = preallocate_image_highmem(highmem / 2);
fd432b9f
AL
1930 alloc = count - max_size;
1931 if (alloc > pages_highmem)
1932 alloc -= pages_highmem;
1933 else
1934 alloc = 0;
6715045d
RW
1935 pages = preallocate_image_memory(alloc, avail_normal);
1936 if (pages < alloc) {
1937 /* We have exhausted non-highmem pages, try highmem. */
1938 alloc -= pages;
1939 pages += pages_highmem;
1940 pages_highmem = preallocate_image_highmem(alloc);
7a7b99bf
LS
1941 if (pages_highmem < alloc) {
1942 pr_err("Image allocation is %lu pages short\n",
1943 alloc - pages_highmem);
6715045d 1944 goto err_out;
7a7b99bf 1945 }
6715045d
RW
1946 pages += pages_highmem;
1947 /*
1948 * size is the desired number of saveable pages to leave in
1949 * memory, so try to preallocate (all memory - size) pages.
1950 */
1951 alloc = (count - pages) - size;
1952 pages += preallocate_image_highmem(alloc);
1953 } else {
1954 /*
1955 * There are approximately max_size saveable pages at this point
1956 * and we want to reduce this number down to size.
1957 */
1958 alloc = max_size - size;
1959 size = preallocate_highmem_fraction(alloc, highmem, count);
1960 pages_highmem += size;
1961 alloc -= size;
1962 size = preallocate_image_memory(alloc, avail_normal);
1963 pages_highmem += preallocate_image_highmem(alloc - size);
1964 pages += pages_highmem + size;
1965 }
4bb33435 1966
64a473cb
RW
1967 /*
1968 * We only need as many page frames for the image as there are saveable
1969 * pages in memory, but we have allocated more. Release the excessive
1970 * ones now.
1971 */
a64fc82c 1972 pages -= free_unnecessary_pages();
4bb33435
RW
1973
1974 out:
db597605 1975 stop = ktime_get();
5c0e9de0 1976 pr_info("Allocated %lu pages for snapshot\n", pages);
db597605 1977 swsusp_show_speed(start, stop, pages, "Allocated");
fe419535
RW
1978
1979 return 0;
64a473cb
RW
1980
1981 err_out:
64a473cb
RW
1982 swsusp_free();
1983 return -ENOMEM;
fe419535
RW
1984}
1985
8357376d
RW
1986#ifdef CONFIG_HIGHMEM
1987/**
ef96f639
RW
1988 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem.
1989 *
1990 * Compute the number of non-highmem pages that will be necessary for creating
1991 * copies of highmem pages.
1992 */
8357376d
RW
1993static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1994{
64a473cb 1995 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
8357376d
RW
1996
1997 if (free_highmem >= nr_highmem)
1998 nr_highmem = 0;
1999 else
2000 nr_highmem -= free_highmem;
2001
2002 return nr_highmem;
2003}
2004#else
efd5a852 2005static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
8357376d 2006#endif /* CONFIG_HIGHMEM */
25761b6e
RW
2007
2008/**
ef96f639 2009 * enough_free_mem - Check if there is enough free memory for the image.
25761b6e 2010 */
8357376d 2011static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
25761b6e 2012{
e5e2fa78 2013 struct zone *zone;
64a473cb 2014 unsigned int free = alloc_normal;
e5e2fa78 2015
98e73dc5 2016 for_each_populated_zone(zone)
8357376d 2017 if (!is_highmem(zone))
d23ad423 2018 free += zone_page_state(zone, NR_FREE_PAGES);
940864dd 2019
8357376d 2020 nr_pages += count_pages_for_highmem(nr_highmem);
64ec72a1
JP
2021 pr_debug("Normal pages needed: %u + %u, available pages: %u\n",
2022 nr_pages, PAGES_FOR_IO, free);
940864dd 2023
64a473cb 2024 return free > nr_pages + PAGES_FOR_IO;
25761b6e
RW
2025}
2026
8357376d
RW
2027#ifdef CONFIG_HIGHMEM
2028/**
ef96f639
RW
2029 * get_highmem_buffer - Allocate a buffer for highmem pages.
2030 *
2031 * If there are some highmem pages in the hibernation image, we may need a
2032 * buffer to copy them and/or load their data.
8357376d 2033 */
8357376d
RW
2034static inline int get_highmem_buffer(int safe_needed)
2035{
453f85d4 2036 buffer = get_image_page(GFP_ATOMIC, safe_needed);
8357376d
RW
2037 return buffer ? 0 : -ENOMEM;
2038}
2039
2040/**
467df4cf 2041 * alloc_highmem_pages - Allocate some highmem pages for the image.
ef96f639
RW
2042 *
2043 * Try to allocate as many pages as needed, but if the number of free highmem
2044 * pages is less than that, allocate them all.
8357376d 2045 */
efd5a852
RW
2046static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
2047 unsigned int nr_highmem)
8357376d
RW
2048{
2049 unsigned int to_alloc = count_free_highmem_pages();
2050
2051 if (to_alloc > nr_highmem)
2052 to_alloc = nr_highmem;
2053
2054 nr_highmem -= to_alloc;
2055 while (to_alloc-- > 0) {
2056 struct page *page;
2057
d0164adc 2058 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
8357376d
RW
2059 memory_bm_set_bit(bm, page_to_pfn(page));
2060 }
2061 return nr_highmem;
2062}
2063#else
2064static inline int get_highmem_buffer(int safe_needed) { return 0; }
2065
efd5a852
RW
2066static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
2067 unsigned int n) { return 0; }
8357376d
RW
2068#endif /* CONFIG_HIGHMEM */
2069
2070/**
ef96f639 2071 * swsusp_alloc - Allocate memory for hibernation image.
8357376d 2072 *
ef96f639
RW
2073 * We first try to allocate as many highmem pages as there are
2074 * saveable highmem pages in the system. If that fails, we allocate
2075 * non-highmem pages for the copies of the remaining highmem ones.
8357376d 2076 *
ef96f639
RW
2077 * In this approach it is likely that the copies of highmem pages will
2078 * also be located in the high memory, because of the way in which
2079 * copy_data_pages() works.
8357376d 2080 */
eba74c29 2081static int swsusp_alloc(struct memory_bitmap *copy_bm,
efd5a852 2082 unsigned int nr_pages, unsigned int nr_highmem)
054bd4c1 2083{
8357376d 2084 if (nr_highmem > 0) {
2e725a06 2085 if (get_highmem_buffer(PG_ANY))
64a473cb
RW
2086 goto err_out;
2087 if (nr_highmem > alloc_highmem) {
2088 nr_highmem -= alloc_highmem;
2089 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
2090 }
8357376d 2091 }
64a473cb
RW
2092 if (nr_pages > alloc_normal) {
2093 nr_pages -= alloc_normal;
2094 while (nr_pages-- > 0) {
2095 struct page *page;
2096
453f85d4 2097 page = alloc_image_page(GFP_ATOMIC);
64a473cb
RW
2098 if (!page)
2099 goto err_out;
2100 memory_bm_set_bit(copy_bm, page_to_pfn(page));
2101 }
25761b6e 2102 }
64a473cb 2103
b788db79 2104 return 0;
25761b6e 2105
64a473cb 2106 err_out:
b788db79 2107 swsusp_free();
2e725a06 2108 return -ENOMEM;
25761b6e
RW
2109}
2110
722a9f92 2111asmlinkage __visible int swsusp_save(void)
25761b6e 2112{
8357376d 2113 unsigned int nr_pages, nr_highmem;
25761b6e 2114
7a7b99bf 2115 pr_info("Creating image:\n");
25761b6e 2116
9f8f2172 2117 drain_local_pages(NULL);
a0f49651 2118 nr_pages = count_data_pages();
8357376d 2119 nr_highmem = count_highmem_pages();
64ec72a1 2120 pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
25761b6e 2121
8357376d 2122 if (!enough_free_mem(nr_pages, nr_highmem)) {
64ec72a1 2123 pr_err("Not enough free memory\n");
25761b6e
RW
2124 return -ENOMEM;
2125 }
2126
eba74c29 2127 if (swsusp_alloc(&copy_bm, nr_pages, nr_highmem)) {
64ec72a1 2128 pr_err("Memory allocation failed\n");
a0f49651 2129 return -ENOMEM;
8357376d 2130 }
25761b6e 2131
ef96f639
RW
2132 /*
2133 * During allocating of suspend pagedir, new cold pages may appear.
25761b6e
RW
2134 * Kill them.
2135 */
9f8f2172 2136 drain_local_pages(NULL);
005e8ddd 2137 nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
25761b6e
RW
2138
2139 /*
2140 * End of critical section. From now on, we can write to memory,
2141 * but we should not touch disk. This specially means we must _not_
2142 * touch swap space! Except we must write out our image of course.
2143 */
8357376d 2144 nr_pages += nr_highmem;
005e8ddd
BG
2145 /* We don't actually copy the zero pages */
2146 nr_zero_pages = nr_pages - nr_copy_pages;
8357376d 2147 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
a0f49651 2148
005e8ddd 2149 pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
8357376d 2150
25761b6e
RW
2151 return 0;
2152}
f577eb30 2153
d307c4a8
RW
2154#ifndef CONFIG_ARCH_HIBERNATION_HEADER
2155static int init_header_complete(struct swsusp_info *info)
f577eb30 2156{
d307c4a8 2157 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
f577eb30 2158 info->version_code = LINUX_VERSION_CODE;
d307c4a8
RW
2159 return 0;
2160}
2161
02d7f400 2162static const char *check_image_kernel(struct swsusp_info *info)
d307c4a8
RW
2163{
2164 if (info->version_code != LINUX_VERSION_CODE)
2165 return "kernel version";
2166 if (strcmp(info->uts.sysname,init_utsname()->sysname))
2167 return "system type";
2168 if (strcmp(info->uts.release,init_utsname()->release))
2169 return "kernel release";
2170 if (strcmp(info->uts.version,init_utsname()->version))
2171 return "version";
2172 if (strcmp(info->uts.machine,init_utsname()->machine))
2173 return "machine";
2174 return NULL;
2175}
2176#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
2177
af508b34
RW
2178unsigned long snapshot_get_image_size(void)
2179{
2180 return nr_copy_pages + nr_meta_pages + 1;
2181}
2182
d307c4a8
RW
2183static int init_header(struct swsusp_info *info)
2184{
2185 memset(info, 0, sizeof(struct swsusp_info));
0ed5fd13 2186 info->num_physpages = get_num_physpages();
f577eb30 2187 info->image_pages = nr_copy_pages;
af508b34 2188 info->pages = snapshot_get_image_size();
6e1819d6
RW
2189 info->size = info->pages;
2190 info->size <<= PAGE_SHIFT;
d307c4a8 2191 return init_header_complete(info);
f577eb30
RW
2192}
2193
005e8ddd
BG
2194#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
2195#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
2196
f577eb30 2197/**
ef96f639
RW
2198 * pack_pfns - Prepare PFNs for saving.
2199 * @bm: Memory bitmap.
2200 * @buf: Memory buffer to store the PFNs in.
005e8ddd 2201 * @zero_bm: Memory bitmap containing PFNs of zero pages.
ef96f639
RW
2202 *
2203 * PFNs corresponding to set bits in @bm are stored in the area of memory
005e8ddd
BG
2204 * pointed to by @buf (1 page at a time). Pages which were filled with only
2205 * zeros will have the highest bit set in the packed format to distinguish
2206 * them from PFNs which will be contained in the image file.
f577eb30 2207 */
005e8ddd
BG
2208static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
2209 struct memory_bitmap *zero_bm)
f577eb30
RW
2210{
2211 int j;
2212
b788db79 2213 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
940864dd
RW
2214 buf[j] = memory_bm_next_pfn(bm);
2215 if (unlikely(buf[j] == BM_END_OF_MAP))
b788db79 2216 break;
005e8ddd
BG
2217 if (memory_bm_test_bit(zero_bm, buf[j]))
2218 buf[j] |= ENCODED_PFN_ZERO_FLAG;
f577eb30 2219 }
f577eb30
RW
2220}
2221
2222/**
ef96f639
RW
2223 * snapshot_read_next - Get the address to read the next image page from.
2224 * @handle: Snapshot handle to be used for the reading.
f577eb30 2225 *
ef96f639
RW
2226 * On the first call, @handle should point to a zeroed snapshot_handle
2227 * structure. The structure gets populated then and a pointer to it should be
2228 * passed to this function every next time.
f577eb30 2229 *
ef96f639
RW
2230 * On success, the function returns a positive number. Then, the caller
2231 * is allowed to read up to the returned number of bytes from the memory
2232 * location computed by the data_of() macro.
f577eb30 2233 *
ef96f639
RW
2234 * The function returns 0 to indicate the end of the data stream condition,
2235 * and negative numbers are returned on errors. If that happens, the structure
2236 * pointed to by @handle is not updated and should not be used any more.
f577eb30 2237 */
d3c1b24c 2238int snapshot_read_next(struct snapshot_handle *handle)
f577eb30 2239{
fb13a28b 2240 if (handle->cur > nr_meta_pages + nr_copy_pages)
f577eb30 2241 return 0;
b788db79 2242
f577eb30
RW
2243 if (!buffer) {
2244 /* This makes the buffer be freed by swsusp_free() */
8357376d 2245 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
f577eb30
RW
2246 if (!buffer)
2247 return -ENOMEM;
2248 }
d3c1b24c 2249 if (!handle->cur) {
d307c4a8
RW
2250 int error;
2251
2252 error = init_header((struct swsusp_info *)buffer);
2253 if (error)
2254 return error;
f577eb30 2255 handle->buffer = buffer;
b788db79
RW
2256 memory_bm_position_reset(&orig_bm);
2257 memory_bm_position_reset(&copy_bm);
d3c1b24c 2258 } else if (handle->cur <= nr_meta_pages) {
3ecb01df 2259 clear_page(buffer);
005e8ddd 2260 pack_pfns(buffer, &orig_bm, &zero_bm);
d3c1b24c
JS
2261 } else {
2262 struct page *page;
b788db79 2263
d3c1b24c
JS
2264 page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
2265 if (PageHighMem(page)) {
ef96f639
RW
2266 /*
2267 * Highmem pages are copied to the buffer,
d3c1b24c
JS
2268 * because we can't return with a kmapped
2269 * highmem page (we may not be called again).
2270 */
2271 void *kaddr;
8357376d 2272
0de9a1e2 2273 kaddr = kmap_atomic(page);
3ecb01df 2274 copy_page(buffer, kaddr);
0de9a1e2 2275 kunmap_atomic(kaddr);
d3c1b24c
JS
2276 handle->buffer = buffer;
2277 } else {
2278 handle->buffer = page_address(page);
f577eb30 2279 }
f577eb30 2280 }
d3c1b24c
JS
2281 handle->cur++;
2282 return PAGE_SIZE;
f577eb30
RW
2283}
2284
6dbecfd3
RW
2285static void duplicate_memory_bitmap(struct memory_bitmap *dst,
2286 struct memory_bitmap *src)
2287{
2288 unsigned long pfn;
2289
2290 memory_bm_position_reset(src);
2291 pfn = memory_bm_next_pfn(src);
2292 while (pfn != BM_END_OF_MAP) {
2293 memory_bm_set_bit(dst, pfn);
2294 pfn = memory_bm_next_pfn(src);
2295 }
2296}
2297
f577eb30 2298/**
ef96f639
RW
2299 * mark_unsafe_pages - Mark pages that were used before hibernation.
2300 *
2301 * Mark the pages that cannot be used for storing the image during restoration,
2302 * because they conflict with the pages that had been used before hibernation.
f577eb30 2303 */
6dbecfd3 2304static void mark_unsafe_pages(struct memory_bitmap *bm)
f577eb30 2305{
6dbecfd3 2306 unsigned long pfn;
f577eb30 2307
6dbecfd3
RW
2308 /* Clear the "free"/"unsafe" bit for all PFNs */
2309 memory_bm_position_reset(free_pages_map);
2310 pfn = memory_bm_next_pfn(free_pages_map);
2311 while (pfn != BM_END_OF_MAP) {
2312 memory_bm_clear_current(free_pages_map);
2313 pfn = memory_bm_next_pfn(free_pages_map);
f577eb30
RW
2314 }
2315
6dbecfd3
RW
2316 /* Mark pages that correspond to the "original" PFNs as "unsafe" */
2317 duplicate_memory_bitmap(free_pages_map, bm);
f577eb30 2318
940864dd 2319 allocated_unsafe_pages = 0;
f577eb30
RW
2320}
2321
d307c4a8 2322static int check_header(struct swsusp_info *info)
f577eb30 2323{
02d7f400 2324 const char *reason;
f577eb30 2325
d307c4a8 2326 reason = check_image_kernel(info);
0ed5fd13 2327 if (!reason && info->num_physpages != get_num_physpages())
f577eb30 2328 reason = "memory size";
f577eb30 2329 if (reason) {
64ec72a1 2330 pr_err("Image mismatch: %s\n", reason);
f577eb30
RW
2331 return -EPERM;
2332 }
2333 return 0;
2334}
2335
2336/**
467df4cf 2337 * load_header - Check the image header and copy the data from it.
f577eb30 2338 */
efd5a852 2339static int load_header(struct swsusp_info *info)
f577eb30
RW
2340{
2341 int error;
f577eb30 2342
940864dd 2343 restore_pblist = NULL;
f577eb30
RW
2344 error = check_header(info);
2345 if (!error) {
f577eb30
RW
2346 nr_copy_pages = info->image_pages;
2347 nr_meta_pages = info->pages - info->image_pages - 1;
2348 }
2349 return error;
2350}
2351
2352/**
ef96f639
RW
2353 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
2354 * @bm: Memory bitmap.
2355 * @buf: Area of memory containing the PFNs.
005e8ddd 2356 * @zero_bm: Memory bitmap with the zero PFNs marked.
ef96f639
RW
2357 *
2358 * For each element of the array pointed to by @buf (1 page at a time), set the
005e8ddd
BG
2359 * corresponding bit in @bm. If the page was originally populated with only
2360 * zeros then a corresponding bit will also be set in @zero_bm.
f577eb30 2361 */
005e8ddd
BG
2362static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
2363 struct memory_bitmap *zero_bm)
f577eb30 2364{
005e8ddd
BG
2365 unsigned long decoded_pfn;
2366 bool zero;
f577eb30
RW
2367 int j;
2368
940864dd
RW
2369 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2370 if (unlikely(buf[j] == BM_END_OF_MAP))
2371 break;
2372
005e8ddd
BG
2373 zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
2374 decoded_pfn = buf[j] & ENCODED_PFN_MASK;
2375 if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
2376 memory_bm_set_bit(bm, decoded_pfn);
2377 if (zero) {
2378 memory_bm_set_bit(zero_bm, decoded_pfn);
2379 nr_zero_pages++;
2380 }
3363e0ad 2381 } else {
005e8ddd 2382 if (!pfn_valid(decoded_pfn))
3363e0ad 2383 pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
005e8ddd 2384 (unsigned long long)PFN_PHYS(decoded_pfn));
69643279 2385 return -EFAULT;
3363e0ad 2386 }
f577eb30 2387 }
69643279
RW
2388
2389 return 0;
f577eb30
RW
2390}
2391
8357376d 2392#ifdef CONFIG_HIGHMEM
ef96f639
RW
2393/*
2394 * struct highmem_pbe is used for creating the list of highmem pages that
8357376d
RW
2395 * should be restored atomically during the resume from disk, because the page
2396 * frames they have occupied before the suspend are in use.
2397 */
2398struct highmem_pbe {
2399 struct page *copy_page; /* data is here now */
2400 struct page *orig_page; /* data was here before the suspend */
2401 struct highmem_pbe *next;
2402};
2403
ef96f639
RW
2404/*
2405 * List of highmem PBEs needed for restoring the highmem pages that were
8357376d
RW
2406 * allocated before the suspend and included in the suspend image, but have
2407 * also been allocated by the "resume" kernel, so their contents cannot be
2408 * written directly to their "original" page frames.
2409 */
2410static struct highmem_pbe *highmem_pblist;
2411
2412/**
ef96f639
RW
2413 * count_highmem_image_pages - Compute the number of highmem pages in the image.
2414 * @bm: Memory bitmap.
2415 *
2416 * The bits in @bm that correspond to image pages are assumed to be set.
8357376d 2417 */
8357376d
RW
2418static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
2419{
2420 unsigned long pfn;
2421 unsigned int cnt = 0;
2422
2423 memory_bm_position_reset(bm);
2424 pfn = memory_bm_next_pfn(bm);
2425 while (pfn != BM_END_OF_MAP) {
2426 if (PageHighMem(pfn_to_page(pfn)))
2427 cnt++;
2428
2429 pfn = memory_bm_next_pfn(bm);
2430 }
2431 return cnt;
2432}
2433
8357376d
RW
2434static unsigned int safe_highmem_pages;
2435
2436static struct memory_bitmap *safe_highmem_bm;
2437
ef96f639
RW
2438/**
2439 * prepare_highmem_image - Allocate memory for loading highmem data from image.
2440 * @bm: Pointer to an uninitialized memory bitmap structure.
2441 * @nr_highmem_p: Pointer to the number of highmem image pages.
2442 *
2443 * Try to allocate as many highmem pages as there are highmem image pages
2444 * (@nr_highmem_p points to the variable containing the number of highmem image
2445 * pages). The pages that are "safe" (ie. will not be overwritten when the
2446 * hibernation image is restored entirely) have the corresponding bits set in
6be2408a 2447 * @bm (it must be uninitialized).
ef96f639
RW
2448 *
2449 * NOTE: This function should not be called if there are no highmem image pages.
2450 */
efd5a852
RW
2451static int prepare_highmem_image(struct memory_bitmap *bm,
2452 unsigned int *nr_highmem_p)
8357376d
RW
2453{
2454 unsigned int to_alloc;
2455
2456 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
2457 return -ENOMEM;
2458
2459 if (get_highmem_buffer(PG_SAFE))
2460 return -ENOMEM;
2461
2462 to_alloc = count_free_highmem_pages();
2463 if (to_alloc > *nr_highmem_p)
2464 to_alloc = *nr_highmem_p;
2465 else
2466 *nr_highmem_p = to_alloc;
2467
2468 safe_highmem_pages = 0;
2469 while (to_alloc-- > 0) {
2470 struct page *page;
2471
2472 page = alloc_page(__GFP_HIGHMEM);
7be98234 2473 if (!swsusp_page_is_free(page)) {
8357376d
RW
2474 /* The page is "safe", set its bit the bitmap */
2475 memory_bm_set_bit(bm, page_to_pfn(page));
2476 safe_highmem_pages++;
2477 }
2478 /* Mark the page as allocated */
7be98234
RW
2479 swsusp_set_page_forbidden(page);
2480 swsusp_set_page_free(page);
8357376d
RW
2481 }
2482 memory_bm_position_reset(bm);
2483 safe_highmem_bm = bm;
2484 return 0;
2485}
2486
ef96f639
RW
2487static struct page *last_highmem_page;
2488
8357376d 2489/**
ef96f639
RW
2490 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page.
2491 *
2492 * For a given highmem image page get a buffer that suspend_write_next() should
2493 * return to its caller to write to.
8357376d 2494 *
ef96f639
RW
2495 * If the page is to be saved to its "original" page frame or a copy of
2496 * the page is to be made in the highmem, @buffer is returned. Otherwise,
2497 * the copy of the page is to be made in normal memory, so the address of
2498 * the copy is returned.
8357376d 2499 *
ef96f639
RW
2500 * If @buffer is returned, the caller of suspend_write_next() will write
2501 * the page's contents to @buffer, so they will have to be copied to the
2502 * right location on the next call to suspend_write_next() and it is done
2503 * with the help of copy_last_highmem_page(). For this purpose, if
2504 * @buffer is returned, @last_highmem_page is set to the page to which
2505 * the data will have to be copied from @buffer.
8357376d 2506 */
efd5a852
RW
2507static void *get_highmem_page_buffer(struct page *page,
2508 struct chain_allocator *ca)
8357376d
RW
2509{
2510 struct highmem_pbe *pbe;
2511 void *kaddr;
2512
7be98234 2513 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
ef96f639
RW
2514 /*
2515 * We have allocated the "original" page frame and we can
8357376d
RW
2516 * use it directly to store the loaded page.
2517 */
2518 last_highmem_page = page;
2519 return buffer;
2520 }
ef96f639
RW
2521 /*
2522 * The "original" page frame has not been allocated and we have to
8357376d
RW
2523 * use a "safe" page frame to store the loaded page.
2524 */
2525 pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
2526 if (!pbe) {
2527 swsusp_free();
69643279 2528 return ERR_PTR(-ENOMEM);
8357376d
RW
2529 }
2530 pbe->orig_page = page;
2531 if (safe_highmem_pages > 0) {
2532 struct page *tmp;
2533
2534 /* Copy of the page will be stored in high memory */
2535 kaddr = buffer;
2536 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
2537 safe_highmem_pages--;
2538 last_highmem_page = tmp;
2539 pbe->copy_page = tmp;
2540 } else {
2541 /* Copy of the page will be stored in normal memory */
f0c71830
BG
2542 kaddr = __get_safe_page(ca->gfp_mask);
2543 if (!kaddr)
2544 return ERR_PTR(-ENOMEM);
8357376d
RW
2545 pbe->copy_page = virt_to_page(kaddr);
2546 }
2547 pbe->next = highmem_pblist;
2548 highmem_pblist = pbe;
2549 return kaddr;
2550}
2551
2552/**
ef96f639
RW
2553 * copy_last_highmem_page - Copy most the most recent highmem image page.
2554 *
2555 * Copy the contents of a highmem image from @buffer, where the caller of
2556 * snapshot_write_next() has stored them, to the right location represented by
2557 * @last_highmem_page .
8357376d 2558 */
8357376d
RW
2559static void copy_last_highmem_page(void)
2560{
2561 if (last_highmem_page) {
2562 void *dst;
2563
0de9a1e2 2564 dst = kmap_atomic(last_highmem_page);
3ecb01df 2565 copy_page(dst, buffer);
0de9a1e2 2566 kunmap_atomic(dst);
8357376d
RW
2567 last_highmem_page = NULL;
2568 }
2569}
2570
2571static inline int last_highmem_page_copied(void)
2572{
2573 return !last_highmem_page;
2574}
2575
2576static inline void free_highmem_data(void)
2577{
2578 if (safe_highmem_bm)
2579 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2580
2581 if (buffer)
2582 free_image_page(buffer, PG_UNSAFE_CLEAR);
2583}
2584#else
efd5a852 2585static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
8357376d 2586
efd5a852
RW
2587static inline int prepare_highmem_image(struct memory_bitmap *bm,
2588 unsigned int *nr_highmem_p) { return 0; }
8357376d 2589
efd5a852
RW
2590static inline void *get_highmem_page_buffer(struct page *page,
2591 struct chain_allocator *ca)
8357376d 2592{
69643279 2593 return ERR_PTR(-EINVAL);
8357376d
RW
2594}
2595
2596static inline void copy_last_highmem_page(void) {}
2597static inline int last_highmem_page_copied(void) { return 1; }
2598static inline void free_highmem_data(void) {}
2599#endif /* CONFIG_HIGHMEM */
2600
ef96f639
RW
2601#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2602
f577eb30 2603/**
ef96f639 2604 * prepare_image - Make room for loading hibernation image.
6be2408a 2605 * @new_bm: Uninitialized memory bitmap structure.
ef96f639 2606 * @bm: Memory bitmap with unsafe pages marked.
005e8ddd 2607 * @zero_bm: Memory bitmap containing the zero pages.
ef96f639
RW
2608 *
2609 * Use @bm to mark the pages that will be overwritten in the process of
2610 * restoring the system memory state from the suspend image ("unsafe" pages)
2611 * and allocate memory for the image.
968808b8 2612 *
ef96f639
RW
2613 * The idea is to allocate a new memory bitmap first and then allocate
2614 * as many pages as needed for image data, but without specifying what those
2615 * pages will be used for just yet. Instead, we mark them all as allocated and
2616 * create a lists of "safe" pages to be used later. On systems with high
2617 * memory a list of "safe" highmem pages is created too.
005e8ddd
BG
2618 *
2619 * Because it was not known which pages were unsafe when @zero_bm was created,
2620 * make a copy of it and recreate it within safe pages.
f577eb30 2621 */
005e8ddd
BG
2622static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
2623 struct memory_bitmap *zero_bm)
f577eb30 2624{
8357376d 2625 unsigned int nr_pages, nr_highmem;
005e8ddd 2626 struct memory_bitmap tmp;
9c744481 2627 struct linked_page *lp;
940864dd 2628 int error;
f577eb30 2629
8357376d
RW
2630 /* If there is no highmem, the buffer will not be necessary */
2631 free_image_page(buffer, PG_UNSAFE_CLEAR);
2632 buffer = NULL;
2633
2634 nr_highmem = count_highmem_image_pages(bm);
6dbecfd3 2635 mark_unsafe_pages(bm);
940864dd
RW
2636
2637 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2638 if (error)
2639 goto Free;
2640
2641 duplicate_memory_bitmap(new_bm, bm);
2642 memory_bm_free(bm, PG_UNSAFE_KEEP);
005e8ddd
BG
2643
2644 /* Make a copy of zero_bm so it can be created in safe pages */
b21f18ef 2645 error = memory_bm_create(&tmp, GFP_ATOMIC, PG_SAFE);
005e8ddd
BG
2646 if (error)
2647 goto Free;
2648
2649 duplicate_memory_bitmap(&tmp, zero_bm);
2650 memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
2651
2652 /* Recreate zero_bm in safe pages */
2653 error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
2654 if (error)
2655 goto Free;
2656
2657 duplicate_memory_bitmap(zero_bm, &tmp);
b21f18ef 2658 memory_bm_free(&tmp, PG_UNSAFE_CLEAR);
005e8ddd
BG
2659 /* At this point zero_bm is in safe pages and it can be used for restoring. */
2660
8357376d
RW
2661 if (nr_highmem > 0) {
2662 error = prepare_highmem_image(bm, &nr_highmem);
2663 if (error)
2664 goto Free;
2665 }
ef96f639
RW
2666 /*
2667 * Reserve some safe pages for potential later use.
940864dd
RW
2668 *
2669 * NOTE: This way we make sure there will be enough safe pages for the
2670 * chain_alloc() in get_buffer(). It is a bit wasteful, but
2671 * nr_copy_pages cannot be greater than 50% of the memory anyway.
9c744481
RW
2672 *
2673 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
940864dd 2674 */
005e8ddd 2675 nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
940864dd
RW
2676 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2677 while (nr_pages > 0) {
8357376d 2678 lp = get_image_page(GFP_ATOMIC, PG_SAFE);
940864dd 2679 if (!lp) {
f577eb30 2680 error = -ENOMEM;
940864dd
RW
2681 goto Free;
2682 }
9c744481
RW
2683 lp->next = safe_pages_list;
2684 safe_pages_list = lp;
940864dd 2685 nr_pages--;
f577eb30 2686 }
940864dd 2687 /* Preallocate memory for the image */
005e8ddd 2688 nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
940864dd
RW
2689 while (nr_pages > 0) {
2690 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2691 if (!lp) {
2692 error = -ENOMEM;
2693 goto Free;
2694 }
7be98234 2695 if (!swsusp_page_is_free(virt_to_page(lp))) {
940864dd
RW
2696 /* The page is "safe", add it to the list */
2697 lp->next = safe_pages_list;
2698 safe_pages_list = lp;
968808b8 2699 }
940864dd 2700 /* Mark the page as allocated */
7be98234
RW
2701 swsusp_set_page_forbidden(virt_to_page(lp));
2702 swsusp_set_page_free(virt_to_page(lp));
940864dd 2703 nr_pages--;
968808b8 2704 }
940864dd
RW
2705 return 0;
2706
59a49335 2707 Free:
940864dd 2708 swsusp_free();
f577eb30
RW
2709 return error;
2710}
2711
940864dd 2712/**
ef96f639
RW
2713 * get_buffer - Get the address to store the next image data page.
2714 *
2715 * Get the address that snapshot_write_next() should return to its caller to
2716 * write to.
940864dd 2717 */
940864dd 2718static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
968808b8 2719{
940864dd 2720 struct pbe *pbe;
69643279
RW
2721 struct page *page;
2722 unsigned long pfn = memory_bm_next_pfn(bm);
968808b8 2723
69643279
RW
2724 if (pfn == BM_END_OF_MAP)
2725 return ERR_PTR(-EFAULT);
2726
2727 page = pfn_to_page(pfn);
8357376d
RW
2728 if (PageHighMem(page))
2729 return get_highmem_page_buffer(page, ca);
2730
7be98234 2731 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
ef96f639
RW
2732 /*
2733 * We have allocated the "original" page frame and we can
940864dd 2734 * use it directly to store the loaded page.
968808b8 2735 */
940864dd
RW
2736 return page_address(page);
2737
ef96f639
RW
2738 /*
2739 * The "original" page frame has not been allocated and we have to
940864dd 2740 * use a "safe" page frame to store the loaded page.
968808b8 2741 */
940864dd
RW
2742 pbe = chain_alloc(ca, sizeof(struct pbe));
2743 if (!pbe) {
2744 swsusp_free();
69643279 2745 return ERR_PTR(-ENOMEM);
940864dd 2746 }
8357376d 2747 pbe->orig_address = page_address(page);
f0c71830
BG
2748 pbe->address = __get_safe_page(ca->gfp_mask);
2749 if (!pbe->address)
2750 return ERR_PTR(-ENOMEM);
940864dd
RW
2751 pbe->next = restore_pblist;
2752 restore_pblist = pbe;
8357376d 2753 return pbe->address;
968808b8
RW
2754}
2755
f577eb30 2756/**
ef96f639
RW
2757 * snapshot_write_next - Get the address to store the next image page.
2758 * @handle: Snapshot handle structure to guide the writing.
f577eb30 2759 *
ef96f639
RW
2760 * On the first call, @handle should point to a zeroed snapshot_handle
2761 * structure. The structure gets populated then and a pointer to it should be
2762 * passed to this function every next time.
f577eb30 2763 *
ef96f639
RW
2764 * On success, the function returns a positive number. Then, the caller
2765 * is allowed to write up to the returned number of bytes to the memory
2766 * location computed by the data_of() macro.
f577eb30 2767 *
ef96f639
RW
2768 * The function returns 0 to indicate the "end of file" condition. Negative
2769 * numbers are returned on errors, in which cases the structure pointed to by
2770 * @handle is not updated and should not be used any more.
f577eb30 2771 */
d3c1b24c 2772int snapshot_write_next(struct snapshot_handle *handle)
f577eb30 2773{
940864dd 2774 static struct chain_allocator ca;
4ac934b1 2775 int error;
f577eb30 2776
005e8ddd 2777next:
940864dd 2778 /* Check if we have already loaded the entire image */
005e8ddd 2779 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
f577eb30 2780 return 0;
940864dd 2781
d3c1b24c 2782 if (!handle->cur) {
8357376d
RW
2783 if (!buffer)
2784 /* This makes the buffer be freed by swsusp_free() */
2785 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2786
f577eb30
RW
2787 if (!buffer)
2788 return -ENOMEM;
8357376d 2789
f577eb30 2790 handle->buffer = buffer;
d3c1b24c
JS
2791 } else if (handle->cur == 1) {
2792 error = load_header(buffer);
2793 if (error)
2794 return error;
940864dd 2795
9c744481
RW
2796 safe_pages_list = NULL;
2797
d3c1b24c
JS
2798 error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2799 if (error)
2800 return error;
2801
005e8ddd
BG
2802 error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
2803 if (error)
2804 return error;
2805
2806 nr_zero_pages = 0;
2807
4c0b6c10 2808 hibernate_restore_protection_begin();
d3c1b24c 2809 } else if (handle->cur <= nr_meta_pages + 1) {
005e8ddd 2810 error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
d3c1b24c
JS
2811 if (error)
2812 return error;
940864dd 2813
d3c1b24c 2814 if (handle->cur == nr_meta_pages + 1) {
005e8ddd 2815 error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
69643279
RW
2816 if (error)
2817 return error;
2818
d3c1b24c
JS
2819 chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2820 memory_bm_position_reset(&orig_bm);
005e8ddd 2821 memory_bm_position_reset(&zero_bm);
d3c1b24c 2822 restore_pblist = NULL;
940864dd 2823 handle->buffer = get_buffer(&orig_bm, &ca);
69643279
RW
2824 if (IS_ERR(handle->buffer))
2825 return PTR_ERR(handle->buffer);
f577eb30 2826 }
f577eb30 2827 } else {
d3c1b24c 2828 copy_last_highmem_page();
f4311756
CL
2829 error = hibernate_restore_protect_page(handle->buffer);
2830 if (error)
2831 return error;
d3c1b24c
JS
2832 handle->buffer = get_buffer(&orig_bm, &ca);
2833 if (IS_ERR(handle->buffer))
2834 return PTR_ERR(handle->buffer);
f577eb30 2835 }
d08970df 2836 handle->sync_read = (handle->buffer == buffer);
d3c1b24c 2837 handle->cur++;
005e8ddd
BG
2838
2839 /* Zero pages were not included in the image, memset it and move on. */
2840 if (handle->cur > nr_meta_pages + 1 &&
2841 memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
2842 memset(handle->buffer, 0, PAGE_SIZE);
2843 goto next;
2844 }
2845
d3c1b24c 2846 return PAGE_SIZE;
f577eb30
RW
2847}
2848
8357376d 2849/**
ef96f639
RW
2850 * snapshot_write_finalize - Complete the loading of a hibernation image.
2851 *
2852 * Must be called after the last call to snapshot_write_next() in case the last
2853 * page in the image happens to be a highmem page and its contents should be
2854 * stored in highmem. Additionally, it recycles bitmap memory that's not
2855 * necessary any more.
8357376d 2856 */
f4311756 2857int snapshot_write_finalize(struct snapshot_handle *handle)
8357376d 2858{
f4311756
CL
2859 int error;
2860
8357376d 2861 copy_last_highmem_page();
f4311756 2862 error = hibernate_restore_protect_page(handle->buffer);
307c5971 2863 /* Do that only if we have loaded the image entirely */
005e8ddd 2864 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
307c5971 2865 memory_bm_recycle(&orig_bm);
8357376d
RW
2866 free_highmem_data();
2867 }
f4311756 2868 return error;
8357376d
RW
2869}
2870
f577eb30
RW
2871int snapshot_image_loaded(struct snapshot_handle *handle)
2872{
8357376d 2873 return !(!nr_copy_pages || !last_highmem_page_copied() ||
005e8ddd 2874 handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages);
940864dd
RW
2875}
2876
8357376d
RW
2877#ifdef CONFIG_HIGHMEM
2878/* Assumes that @buf is ready and points to a "safe" page */
efd5a852
RW
2879static inline void swap_two_pages_data(struct page *p1, struct page *p2,
2880 void *buf)
940864dd 2881{
8357376d
RW
2882 void *kaddr1, *kaddr2;
2883
0de9a1e2
CW
2884 kaddr1 = kmap_atomic(p1);
2885 kaddr2 = kmap_atomic(p2);
3ecb01df
JB
2886 copy_page(buf, kaddr1);
2887 copy_page(kaddr1, kaddr2);
2888 copy_page(kaddr2, buf);
0de9a1e2
CW
2889 kunmap_atomic(kaddr2);
2890 kunmap_atomic(kaddr1);
8357376d
RW
2891}
2892
2893/**
ef96f639
RW
2894 * restore_highmem - Put highmem image pages into their original locations.
2895 *
2896 * For each highmem page that was in use before hibernation and is included in
2897 * the image, and also has been allocated by the "restore" kernel, swap its
2898 * current contents with the previous (ie. "before hibernation") ones.
8357376d 2899 *
ef96f639
RW
2900 * If the restore eventually fails, we can call this function once again and
2901 * restore the highmem state as seen by the restore kernel.
8357376d 2902 */
8357376d
RW
2903int restore_highmem(void)
2904{
2905 struct highmem_pbe *pbe = highmem_pblist;
2906 void *buf;
2907
2908 if (!pbe)
2909 return 0;
2910
2911 buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2912 if (!buf)
2913 return -ENOMEM;
2914
2915 while (pbe) {
2916 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2917 pbe = pbe->next;
2918 }
2919 free_image_page(buf, PG_UNSAFE_CLEAR);
2920 return 0;
f577eb30 2921}
8357376d 2922#endif /* CONFIG_HIGHMEM */
This page took 1.803355 seconds and 4 git commands to generate.