mm/page_isolation.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * linux/mm/page_isolation.c
   4  */
   5
   6 #include <linux/mm.h>
   7 #include <linux/page-isolation.h>
   8 #include <linux/pageblock-flags.h>
   9 #include <linux/memory.h>
  10 #include <linux/hugetlb.h>
  11 #include <linux/page_owner.h>
  12 #include <linux/migrate.h>
  13 #include "internal.h"
  14
  15 #define CREATE_TRACE_POINTS
  16 #include <trace/events/page_isolation.h>
  17
  18 /*
  19  * This function checks whether the range [start_pfn, end_pfn) includes
  20  * unmovable pages or not. The range must fall into a single pageblock and
  21  * consequently belong to a single zone.
  22  *
  23  * PageLRU check without isolation or lru_lock could race so that
  24  * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
  25  * check without lock_page also may miss some movable non-lru pages at
  26  * race condition. So you can't expect this function should be exact.
  27  *
  28  * Returns a page without holding a reference. If the caller wants to
  29  * dereference that page (e.g., dumping), it has to make sure that it
  30  * cannot get removed (e.g., via memory unplug) concurrently.
  31  *
  32  */
  33 static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long end_pfn,
  34                                 int migratetype, int flags)
  35 {
  36         struct page *page = pfn_to_page(start_pfn);
  37         struct zone *zone = page_zone(page);
  38         unsigned long pfn;
  39
  40         VM_BUG_ON(pageblock_start_pfn(start_pfn) !=
  41                   pageblock_start_pfn(end_pfn - 1));
  42
  43         if (is_migrate_cma_page(page)) {
  44                 /*
  45                  * CMA allocations (alloc_contig_range) really need to mark
  46                  * isolate CMA pageblocks even when they are not movable in fact
  47                  * so consider them movable here.
  48                  */
  49                 if (is_migrate_cma(migratetype))
  50                         return NULL;
  51
  52                 return page;
  53         }
  54
  55         for (pfn = start_pfn; pfn < end_pfn; pfn++) {
  56                 page = pfn_to_page(pfn);
  57
  58                 /*
  59                  * Both, bootmem allocations and memory holes are marked
  60                  * PG_reserved and are unmovable. We can even have unmovable
  61                  * allocations inside ZONE_MOVABLE, for example when
  62                  * specifying "movablecore".
  63                  */
  64                 if (PageReserved(page))
  65                         return page;
  66
  67                 /*
  68                  * If the zone is movable and we have ruled out all reserved
  69                  * pages then it should be reasonably safe to assume the rest
  70                  * is movable.
  71                  */
  72                 if (zone_idx(zone) == ZONE_MOVABLE)
  73                         continue;
  74
  75                 /*
  76                  * Hugepages are not in LRU lists, but they're movable.
  77                  * THPs are on the LRU, but need to be counted as #small pages.
  78                  * We need not scan over tail pages because we don't
  79                  * handle each tail page individually in migration.
  80                  */
  81                 if (PageHuge(page) || PageTransCompound(page)) {
  82                         struct folio *folio = page_folio(page);
  83                         unsigned int skip_pages;
  84
  85                         if (PageHuge(page)) {
  86                                 if (!hugepage_migration_supported(folio_hstate(folio)))
  87                                         return page;
  88                         } else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) {
  89                                 return page;
  90                         }
  91
  92                         skip_pages = folio_nr_pages(folio) - folio_page_idx(folio, page);
  93                         pfn += skip_pages - 1;
  94                         continue;
  95                 }
  96
  97                 /*
  98                  * We can't use page_count without pin a page
  99                  * because another CPU can free compound page.
 100                  * This check already skips compound tails of THP
 101                  * because their page->_refcount is zero at all time.
 102                  */
 103                 if (!page_ref_count(page)) {
 104                         if (PageBuddy(page))
 105                                 pfn += (1 << buddy_order(page)) - 1;
 106                         continue;
 107                 }
 108
 109                 /*
 110                  * The HWPoisoned page may be not in buddy system, and
 111                  * page_count() is not 0.
 112                  */
 113                 if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
 114                         continue;
 115
 116                 /*
 117                  * We treat all PageOffline() pages as movable when offlining
 118                  * to give drivers a chance to decrement their reference count
 119                  * in MEM_GOING_OFFLINE in order to indicate that these pages
 120                  * can be offlined as there are no direct references anymore.
 121                  * For actually unmovable PageOffline() where the driver does
 122                  * not support this, we will fail later when trying to actually
 123                  * move these pages that still have a reference count > 0.
 124                  * (false negatives in this function only)
 125                  */
 126                 if ((flags & MEMORY_OFFLINE) && PageOffline(page))
 127                         continue;
 128
 129                 if (__PageMovable(page) || PageLRU(page))
 130                         continue;
 131
 132                 /*
 133                  * If there are RECLAIMABLE pages, we need to check
 134                  * it.  But now, memory offline itself doesn't call
 135                  * shrink_node_slabs() and it still to be fixed.
 136                  */
 137                 return page;
 138         }
 139         return NULL;
 140 }
 141
 142 /*
 143  * This function set pageblock migratetype to isolate if no unmovable page is
 144  * present in [start_pfn, end_pfn). The pageblock must intersect with
 145  * [start_pfn, end_pfn).
 146  */
 147 static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags,
 148                         unsigned long start_pfn, unsigned long end_pfn)
 149 {
 150         struct zone *zone = page_zone(page);
 151         struct page *unmovable;
 152         unsigned long flags;
 153         unsigned long check_unmovable_start, check_unmovable_end;
 154
 155         spin_lock_irqsave(&zone->lock, flags);
 156
 157         /*
 158          * We assume the caller intended to SET migrate type to isolate.
 159          * If it is already set, then someone else must have raced and
 160          * set it before us.
 161          */
 162         if (is_migrate_isolate_page(page)) {
 163                 spin_unlock_irqrestore(&zone->lock, flags);
 164                 return -EBUSY;
 165         }
 166
 167         /*
 168          * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
 169          * We just check MOVABLE pages.
 170          *
 171          * Pass the intersection of [start_pfn, end_pfn) and the page's pageblock
 172          * to avoid redundant checks.
 173          */
 174         check_unmovable_start = max(page_to_pfn(page), start_pfn);
 175         check_unmovable_end = min(pageblock_end_pfn(page_to_pfn(page)),
 176                                   end_pfn);
 177
 178         unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
 179                         migratetype, isol_flags);
 180         if (!unmovable) {
 181                 unsigned long nr_pages;
 182                 int mt = get_pageblock_migratetype(page);
 183
 184                 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
 185                 zone->nr_isolate_pageblock++;
 186                 nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
 187                                                                         NULL);
 188
 189                 __mod_zone_freepage_state(zone, -nr_pages, mt);
 190                 spin_unlock_irqrestore(&zone->lock, flags);
 191                 return 0;
 192         }
 193
 194         spin_unlock_irqrestore(&zone->lock, flags);
 195         if (isol_flags & REPORT_FAILURE) {
 196                 /*
 197                  * printk() with zone->lock held will likely trigger a
 198                  * lockdep splat, so defer it here.
 199                  */
 200                 dump_page(unmovable, "unmovable page");
 201         }
 202
 203         return -EBUSY;
 204 }
 205
 206 static void unset_migratetype_isolate(struct page *page, int migratetype)
 207 {
 208         struct zone *zone;
 209         unsigned long flags, nr_pages;
 210         bool isolated_page = false;
 211         unsigned int order;
 212         struct page *buddy;
 213
 214         zone = page_zone(page);
 215         spin_lock_irqsave(&zone->lock, flags);
 216         if (!is_migrate_isolate_page(page))
 217                 goto out;
 218
 219         /*
 220          * Because freepage with more than pageblock_order on isolated
 221          * pageblock is restricted to merge due to freepage counting problem,
 222          * it is possible that there is free buddy page.
 223          * move_freepages_block() doesn't care of merge so we need other
 224          * approach in order to merge them. Isolation and free will make
 225          * these pages to be merged.
 226          */
 227         if (PageBuddy(page)) {
 228                 order = buddy_order(page);
 229                 if (order >= pageblock_order && order < MAX_PAGE_ORDER) {
 230                         buddy = find_buddy_page_pfn(page, page_to_pfn(page),
 231                                                     order, NULL);
 232                         if (buddy && !is_migrate_isolate_page(buddy)) {
 233                                 isolated_page = !!__isolate_free_page(page, order);
 234                                 /*
 235                                  * Isolating a free page in an isolated pageblock
 236                                  * is expected to always work as watermarks don't
 237                                  * apply here.
 238                                  */
 239                                 VM_WARN_ON(!isolated_page);
 240                         }
 241                 }
 242         }
 243
 244         /*
 245          * If we isolate freepage with more than pageblock_order, there
 246          * should be no freepage in the range, so we could avoid costly
 247          * pageblock scanning for freepage moving.
 248          *
 249          * We didn't actually touch any of the isolated pages, so place them
 250          * to the tail of the freelist. This is an optimization for memory
 251          * onlining - just onlined memory won't immediately be considered for
 252          * allocation.
 253          */
 254         if (!isolated_page) {
 255                 nr_pages = move_freepages_block(zone, page, migratetype, NULL);
 256                 __mod_zone_freepage_state(zone, nr_pages, migratetype);
 257         }
 258         set_pageblock_migratetype(page, migratetype);
 259         if (isolated_page)
 260                 __putback_isolated_page(page, order, migratetype);
 261         zone->nr_isolate_pageblock--;
 262 out:
 263         spin_unlock_irqrestore(&zone->lock, flags);
 264 }
 265
 266 static inline struct page *
 267 __first_valid_page(unsigned long pfn, unsigned long nr_pages)
 268 {
 269         int i;
 270
 271         for (i = 0; i < nr_pages; i++) {
 272                 struct page *page;
 273
 274                 page = pfn_to_online_page(pfn + i);
 275                 if (!page)
 276                         continue;
 277                 return page;
 278         }
 279         return NULL;
 280 }
 281
 282 /**
 283  * isolate_single_pageblock() -- tries to isolate a pageblock that might be
 284  * within a free or in-use page.
 285  * @boundary_pfn:               pageblock-aligned pfn that a page might cross
 286  * @flags:                      isolation flags
 287  * @gfp_flags:                  GFP flags used for migrating pages
 288  * @isolate_before:     isolate the pageblock before the boundary_pfn
 289  * @skip_isolation:     the flag to skip the pageblock isolation in second
 290  *                      isolate_single_pageblock()
 291  * @migratetype:        migrate type to set in error recovery.
 292  *
 293  * Free and in-use pages can be as big as MAX_PAGE_ORDER and contain more than one
 294  * pageblock. When not all pageblocks within a page are isolated at the same
 295  * time, free page accounting can go wrong. For example, in the case of
 296  * MAX_PAGE_ORDER = pageblock_order + 1, a MAX_PAGE_ORDER page has two
 297  * pagelbocks.
 298  * [      MAX_PAGE_ORDER         ]
 299  * [  pageblock0  |  pageblock1  ]
 300  * When either pageblock is isolated, if it is a free page, the page is not
 301  * split into separate migratetype lists, which is supposed to; if it is an
 302  * in-use page and freed later, __free_one_page() does not split the free page
 303  * either. The function handles this by splitting the free page or migrating
 304  * the in-use page then splitting the free page.
 305  */
 306 static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 307                         gfp_t gfp_flags, bool isolate_before, bool skip_isolation,
 308                         int migratetype)
 309 {
 310         unsigned long start_pfn;
 311         unsigned long isolate_pageblock;
 312         unsigned long pfn;
 313         struct zone *zone;
 314         int ret;
 315
 316         VM_BUG_ON(!pageblock_aligned(boundary_pfn));
 317
 318         if (isolate_before)
 319                 isolate_pageblock = boundary_pfn - pageblock_nr_pages;
 320         else
 321                 isolate_pageblock = boundary_pfn;
 322
 323         /*
 324          * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid
 325          * only isolating a subset of pageblocks from a bigger than pageblock
 326          * free or in-use page. Also make sure all to-be-isolated pageblocks
 327          * are within the same zone.
 328          */
 329         zone  = page_zone(pfn_to_page(isolate_pageblock));
 330         start_pfn  = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
 331                                       zone->zone_start_pfn);
 332
 333         if (skip_isolation) {
 334                 int mt __maybe_unused = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
 335
 336                 VM_BUG_ON(!is_migrate_isolate(mt));
 337         } else {
 338                 ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype,
 339                                 flags, isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
 340
 341                 if (ret)
 342                         return ret;
 343         }
 344
 345         /*
 346          * Bail out early when the to-be-isolated pageblock does not form
 347          * a free or in-use page across boundary_pfn:
 348          *
 349          * 1. isolate before boundary_pfn: the page after is not online
 350          * 2. isolate after boundary_pfn: the page before is not online
 351          *
 352          * This also ensures correctness. Without it, when isolate after
 353          * boundary_pfn and [start_pfn, boundary_pfn) are not online,
 354          * __first_valid_page() will return unexpected NULL in the for loop
 355          * below.
 356          */
 357         if (isolate_before) {
 358                 if (!pfn_to_online_page(boundary_pfn))
 359                         return 0;
 360         } else {
 361                 if (!pfn_to_online_page(boundary_pfn - 1))
 362                         return 0;
 363         }
 364
 365         for (pfn = start_pfn; pfn < boundary_pfn;) {
 366                 struct page *page = __first_valid_page(pfn, boundary_pfn - pfn);
 367
 368                 VM_BUG_ON(!page);
 369                 pfn = page_to_pfn(page);
 370                 /*
 371                  * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any
 372                  * free pages in [start_pfn, boundary_pfn), its head page will
 373                  * always be in the range.
 374                  */
 375                 if (PageBuddy(page)) {
 376                         int order = buddy_order(page);
 377
 378                         if (pfn + (1UL << order) > boundary_pfn) {
 379                                 /* free page changed before split, check it again */
 380                                 if (split_free_page(page, order, boundary_pfn - pfn))
 381                                         continue;
 382                         }
 383
 384                         pfn += 1UL << order;
 385                         continue;
 386                 }
 387                 /*
 388                  * migrate compound pages then let the free page handling code
 389                  * above do the rest. If migration is not possible, just fail.
 390                  */
 391                 if (PageCompound(page)) {
 392                         struct page *head = compound_head(page);
 393                         unsigned long head_pfn = page_to_pfn(head);
 394                         unsigned long nr_pages = compound_nr(head);
 395
 396                         if (head_pfn + nr_pages <= boundary_pfn) {
 397                                 pfn = head_pfn + nr_pages;
 398                                 continue;
 399                         }
 400 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 401                         /*
 402                          * hugetlb, lru compound (THP), and movable compound pages
 403                          * can be migrated. Otherwise, fail the isolation.
 404                          */
 405                         if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) {
 406                                 int order;
 407                                 unsigned long outer_pfn;
 408                                 int page_mt = get_pageblock_migratetype(page);
 409                                 bool isolate_page = !is_migrate_isolate_page(page);
 410                                 struct compact_control cc = {
 411                                         .nr_migratepages = 0,
 412                                         .order = -1,
 413                                         .zone = page_zone(pfn_to_page(head_pfn)),
 414                                         .mode = MIGRATE_SYNC,
 415                                         .ignore_skip_hint = true,
 416                                         .no_set_skip_hint = true,
 417                                         .gfp_mask = gfp_flags,
 418                                         .alloc_contig = true,
 419                                 };
 420                                 INIT_LIST_HEAD(&cc.migratepages);
 421
 422                                 /*
 423                                  * XXX: mark the page as MIGRATE_ISOLATE so that
 424                                  * no one else can grab the freed page after migration.
 425                                  * Ideally, the page should be freed as two separate
 426                                  * pages to be added into separate migratetype free
 427                                  * lists.
 428                                  */
 429                                 if (isolate_page) {
 430                                         ret = set_migratetype_isolate(page, page_mt,
 431                                                 flags, head_pfn, head_pfn + nr_pages);
 432                                         if (ret)
 433                                                 goto failed;
 434                                 }
 435
 436                                 ret = __alloc_contig_migrate_range(&cc, head_pfn,
 437                                                         head_pfn + nr_pages);
 438
 439                                 /*
 440                                  * restore the page's migratetype so that it can
 441                                  * be split into separate migratetype free lists
 442                                  * later.
 443                                  */
 444                                 if (isolate_page)
 445                                         unset_migratetype_isolate(page, page_mt);
 446
 447                                 if (ret)
 448                                         goto failed;
 449                                 /*
 450                                  * reset pfn to the head of the free page, so
 451                                  * that the free page handling code above can split
 452                                  * the free page to the right migratetype list.
 453                                  *
 454                                  * head_pfn is not used here as a hugetlb page order
 455                                  * can be bigger than MAX_PAGE_ORDER, but after it is
 456                                  * freed, the free page order is not. Use pfn within
 457                                  * the range to find the head of the free page.
 458                                  */
 459                                 order = 0;
 460                                 outer_pfn = pfn;
 461                                 while (!PageBuddy(pfn_to_page(outer_pfn))) {
 462                                         /* stop if we cannot find the free page */
 463                                         if (++order > MAX_PAGE_ORDER)
 464                                                 goto failed;
 465                                         outer_pfn &= ~0UL << order;
 466                                 }
 467                                 pfn = outer_pfn;
 468                                 continue;
 469                         } else
 470 #endif
 471                                 goto failed;
 472                 }
 473
 474                 pfn++;
 475         }
 476         return 0;
 477 failed:
 478         /* restore the original migratetype */
 479         if (!skip_isolation)
 480                 unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
 481         return -EBUSY;
 482 }
 483
 484 /**
 485  * start_isolate_page_range() - mark page range MIGRATE_ISOLATE
 486  * @start_pfn:          The first PFN of the range to be isolated.
 487  * @end_pfn:            The last PFN of the range to be isolated.
 488  * @migratetype:        Migrate type to set in error recovery.
 489  * @flags:              The following flags are allowed (they can be combined in
 490  *                      a bit mask)
 491  *                      MEMORY_OFFLINE - isolate to offline (!allocate) memory
 492  *                                       e.g., skip over PageHWPoison() pages
 493  *                                       and PageOffline() pages.
 494  *                      REPORT_FAILURE - report details about the failure to
 495  *                      isolate the range
 496  * @gfp_flags:          GFP flags used for migrating pages that sit across the
 497  *                      range boundaries.
 498  *
 499  * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
 500  * the range will never be allocated. Any free pages and pages freed in the
 501  * future will not be allocated again. If specified range includes migrate types
 502  * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
 503  * pages in the range finally, the caller have to free all pages in the range.
 504  * test_page_isolated() can be used for test it.
 505  *
 506  * The function first tries to isolate the pageblocks at the beginning and end
 507  * of the range, since there might be pages across the range boundaries.
 508  * Afterwards, it isolates the rest of the range.
 509  *
 510  * There is no high level synchronization mechanism that prevents two threads
 511  * from trying to isolate overlapping ranges. If this happens, one thread
 512  * will notice pageblocks in the overlapping range already set to isolate.
 513  * This happens in set_migratetype_isolate, and set_migratetype_isolate
 514  * returns an error. We then clean up by restoring the migration type on
 515  * pageblocks we may have modified and return -EBUSY to caller. This
 516  * prevents two threads from simultaneously working on overlapping ranges.
 517  *
 518  * Please note that there is no strong synchronization with the page allocator
 519  * either. Pages might be freed while their page blocks are marked ISOLATED.
 520  * A call to drain_all_pages() after isolation can flush most of them. However
 521  * in some cases pages might still end up on pcp lists and that would allow
 522  * for their allocation even when they are in fact isolated already. Depending
 523  * on how strong of a guarantee the caller needs, zone_pcp_disable/enable()
 524  * might be used to flush and disable pcplist before isolation and enable after
 525  * unisolation.
 526  *
 527  * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
 528  */
 529 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 530                              int migratetype, int flags, gfp_t gfp_flags)
 531 {
 532         unsigned long pfn;
 533         struct page *page;
 534         /* isolation is done at page block granularity */
 535         unsigned long isolate_start = pageblock_start_pfn(start_pfn);
 536         unsigned long isolate_end = pageblock_align(end_pfn);
 537         int ret;
 538         bool skip_isolation = false;
 539
 540         /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
 541         ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false,
 542                         skip_isolation, migratetype);
 543         if (ret)
 544                 return ret;
 545
 546         if (isolate_start == isolate_end - pageblock_nr_pages)
 547                 skip_isolation = true;
 548
 549         /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
 550         ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true,
 551                         skip_isolation, migratetype);
 552         if (ret) {
 553                 unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
 554                 return ret;
 555         }
 556
 557         /* skip isolated pageblocks at the beginning and end */
 558         for (pfn = isolate_start + pageblock_nr_pages;
 559              pfn < isolate_end - pageblock_nr_pages;
 560              pfn += pageblock_nr_pages) {
 561                 page = __first_valid_page(pfn, pageblock_nr_pages);
 562                 if (page && set_migratetype_isolate(page, migratetype, flags,
 563                                         start_pfn, end_pfn)) {
 564                         undo_isolate_page_range(isolate_start, pfn, migratetype);
 565                         unset_migratetype_isolate(
 566                                 pfn_to_page(isolate_end - pageblock_nr_pages),
 567                                 migratetype);
 568                         return -EBUSY;
 569                 }
 570         }
 571         return 0;
 572 }
 573
 574 /**
 575  * undo_isolate_page_range - undo effects of start_isolate_page_range()
 576  * @start_pfn:          The first PFN of the isolated range
 577  * @end_pfn:            The last PFN of the isolated range
 578  * @migratetype:        New migrate type to set on the range
 579  *
 580  * This finds every MIGRATE_ISOLATE page block in the given range
 581  * and switches it to @migratetype.
 582  */
 583 void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 584                             int migratetype)
 585 {
 586         unsigned long pfn;
 587         struct page *page;
 588         unsigned long isolate_start = pageblock_start_pfn(start_pfn);
 589         unsigned long isolate_end = pageblock_align(end_pfn);
 590
 591         for (pfn = isolate_start;
 592              pfn < isolate_end;
 593              pfn += pageblock_nr_pages) {
 594                 page = __first_valid_page(pfn, pageblock_nr_pages);
 595                 if (!page || !is_migrate_isolate_page(page))
 596                         continue;
 597                 unset_migratetype_isolate(page, migratetype);
 598         }
 599 }
 600 /*
 601  * Test all pages in the range is free(means isolated) or not.
 602  * all pages in [start_pfn...end_pfn) must be in the same zone.
 603  * zone->lock must be held before call this.
 604  *
 605  * Returns the last tested pfn.
 606  */
 607 static unsigned long
 608 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
 609                                   int flags)
 610 {
 611         struct page *page;
 612
 613         while (pfn < end_pfn) {
 614                 page = pfn_to_page(pfn);
 615                 if (PageBuddy(page))
 616                         /*
 617                          * If the page is on a free list, it has to be on
 618                          * the correct MIGRATE_ISOLATE freelist. There is no
 619                          * simple way to verify that as VM_BUG_ON(), though.
 620                          */
 621                         pfn += 1 << buddy_order(page);
 622                 else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
 623                         /* A HWPoisoned page cannot be also PageBuddy */
 624                         pfn++;
 625                 else if ((flags & MEMORY_OFFLINE) && PageOffline(page) &&
 626                          !page_count(page))
 627                         /*
 628                          * The responsible driver agreed to skip PageOffline()
 629                          * pages when offlining memory by dropping its
 630                          * reference in MEM_GOING_OFFLINE.
 631                          */
 632                         pfn++;
 633                 else
 634                         break;
 635         }
 636
 637         return pfn;
 638 }
 639
 640 /**
 641  * test_pages_isolated - check if pageblocks in range are isolated
 642  * @start_pfn:          The first PFN of the isolated range
 643  * @end_pfn:            The first PFN *after* the isolated range
 644  * @isol_flags:         Testing mode flags
 645  *
 646  * This tests if all in the specified range are free.
 647  *
 648  * If %MEMORY_OFFLINE is specified in @flags, it will consider
 649  * poisoned and offlined pages free as well.
 650  *
 651  * Caller must ensure the requested range doesn't span zones.
 652  *
 653  * Returns 0 if true, -EBUSY if one or more pages are in use.
 654  */
 655 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 656                         int isol_flags)
 657 {
 658         unsigned long pfn, flags;
 659         struct page *page;
 660         struct zone *zone;
 661         int ret;
 662
 663         /*
 664          * Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
 665          * pages are not aligned to pageblock_nr_pages.
 666          * Then we just check migratetype first.
 667          */
 668         for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
 669                 page = __first_valid_page(pfn, pageblock_nr_pages);
 670                 if (page && !is_migrate_isolate_page(page))
 671                         break;
 672         }
 673         page = __first_valid_page(start_pfn, end_pfn - start_pfn);
 674         if ((pfn < end_pfn) || !page) {
 675                 ret = -EBUSY;
 676                 goto out;
 677         }
 678
 679         /* Check all pages are free or marked as ISOLATED */
 680         zone = page_zone(page);
 681         spin_lock_irqsave(&zone->lock, flags);
 682         pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags);
 683         spin_unlock_irqrestore(&zone->lock, flags);
 684
 685         ret = pfn < end_pfn ? -EBUSY : 0;
 686
 687 out:
 688         trace_test_pages_isolated(start_pfn, end_pfn, pfn);
 689
 690         return ret;
 691 }