Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <[email protected]>

Fri, 16 Oct 2020 18:31:55 +0000 (11:31 -0700)

committer Linus Torvalds <[email protected]>

Fri, 16 Oct 2020 18:31:55 +0000 (11:31 -0700)
author Linus Torvalds <[email protected]>
Fri, 16 Oct 2020 18:31:55 +0000 (11:31 -0700)
committer Linus Torvalds <[email protected]>
Fri, 16 Oct 2020 18:31:55 +0000 (11:31 -0700)
diff --combined Documentation/admin-guide/kernel-parameters.txt

index 0f1fb7e862372ca82ffd20174c812a3de67f9776,6d9afb221ff7a689847235d093e5fb70c7800b25..d246ad46d845f7c5b6dac9f8025d47e8afca9c04
--- 1/Documentation/admin-guide/kernel-parameters.txt
--- 2/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@@ -1343,17 -1343,13 +1343,18 @@@
                         current integrity status.
   
         failslab=
+       fail_usercopy=
         fail_page_alloc=
         fail_make_request=[KNL]
                         General fault injection mechanism.
                         Format: <interval>,<probability>,<space>,<times>
                         See also Documentation/fault-injection/.
   
+ +      fb_tunnels=     [NET]
+ +                      Format: { initns | none }
+ +                      See Documentation/admin-guide/sysctl/net.rst for
+ +                      fb_tunnels_only_for_init_ns
+ +
         floppy=         [HW]
                         See Documentation/admin-guide/blockdev/floppy.rst.
   
diff --combined MAINTAINERS

index 0f59b0412953bc4a5abb8ad8e7aebd9eb4071b4b,c925fd3c5c3f362322dcd85f28900811363c2aa5..2c31900a6f1125104ab7024604eb21252173b809
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -1259,7 -1259,7 +1259,7 @@@ S:      Supporte
   F:    Documentation/devicetree/bindings/net/apm-xgene-enet.txt
   F:    Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
   F:    drivers/net/ethernet/apm/xgene/
- -F:    drivers/net/phy/mdio-xgene.c
+ +F:    drivers/net/mdio/mdio-xgene.c
   
   APPLIED MICRO (APM) X-GENE SOC PMU
   M:    Khuong Dinh <[email protected]>
@@@ -3238,7 -3238,7 +3238,7 @@@ M:      Daniel Borkmann <[email protected]
   R:    Martin KaFai Lau <[email protected]>
   R:    Song Liu <[email protected]>
   R:    Yonghong Song <[email protected]>
- -R:    Andrii Nakryiko <andrii[email protected]>
+ +R:    Andrii Nakryiko <andrii@kernel.org>
   R:    John Fastabend <[email protected]>
   R:    KP Singh <[email protected]>
   L:    [email protected]
@@@ -3914,7 -3914,6 +3914,7 @@@ F:      include/net/netns/can.
   F:    include/uapi/linux/can.h
   F:    include/uapi/linux/can/bcm.h
   F:    include/uapi/linux/can/gw.h
+ +F:    include/uapi/linux/can/isotp.h
   F:    include/uapi/linux/can/raw.h
   F:    net/can/
   
@@@ -4714,15 -4713,6 +4714,15 @@@ S:    Supporte
   W:    http://www.chelsio.com
   F:    drivers/crypto/chelsio
   
+ +CXGB4 INLINE CRYPTO DRIVER
+ +M:    Ayush Sawal <[email protected]>
+ +M:    Vinay Kumar Yadav <[email protected]>
+ +M:    Rohit Maheshwari <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +W:    http://www.chelsio.com
+ +F:    drivers/net/ethernet/chelsio/inline_crypto/
+ +
   CXGB4 ETHERNET DRIVER (CXGB4)
   M:    Vishal Kulkarni <[email protected]>
   L:    [email protected]
@@@ -6556,14 -6546,11 +6556,14 @@@ F:   Documentation/devicetree/bindings/ne
   F:    Documentation/devicetree/bindings/net/mdio*
   F:    Documentation/devicetree/bindings/net/qca,ar803x.yaml
   F:    Documentation/networking/phy.rst
+ +F:    drivers/net/mdio/
+ +F:    drivers/net/mdio/of_mdio.c
+ +F:    drivers/net/pcs/
   F:    drivers/net/phy/
- -F:    drivers/of/of_mdio.c
   F:    drivers/of/of_net.c
   F:    include/dt-bindings/net/qca-ar803x.h
   F:    include/linux/*mdio*.h
+ +F:    include/linux/mdio/*.h
   F:    include/linux/of_net.h
   F:    include/linux/phy.h
   F:    include/linux/phy_fixed.h
@@@ -6639,7 -6626,6 +6639,7 @@@ F:      fs/proc/bootconfig.
   F:    include/linux/bootconfig.h
   F:    lib/bootconfig.c
   F:    tools/bootconfig/*
+ +F:    tools/bootconfig/scripts/*
   
   EXYNOS DP DRIVER
   M:    Jingoo Han <[email protected]>
@@@ -9715,7 -9701,7 +9715,7 @@@ F:      security/keys/encrypted-keys
   
   KEYS-TRUSTED
   M:    James Bottomley <[email protected]>
- M:    Jarkko Sakkinen <jarkko[email protected]>
+ M:    Jarkko Sakkinen <jarkko@kernel.org>
   M:    Mimi Zohar <[email protected]>
   L:    [email protected]
   L:    [email protected]
@@@ -9727,7 -9713,7 +9727,7 @@@ F:      security/keys/trusted-keys
   
   KEYS/KEYRINGS
   M:    David Howells <[email protected]>
- M:    Jarkko Sakkinen <jarkko[email protected]>
+ M:    Jarkko Sakkinen <jarkko@kernel.org>
   L:    [email protected]
   S:    Maintained
   F:    Documentation/security/keys/core.rst
@@@ -10360,13 -10346,6 +10360,13 @@@ S: Maintaine
   W:    http://linux-test-project.github.io/
   T:    git git://github.com/linux-test-project/ltp.git
   
+ +LYNX PCS MODULE
+ +M:    Ioana Ciornei <[email protected]>
+ +L:    [email protected]
+ +S:    Supported
+ +F:    drivers/net/pcs/pcs-lynx.c
+ +F:    include/linux/pcs-lynx.h
+ +
   M68K ARCHITECTURE
   M:    Geert Uytterhoeven <[email protected]>
   L:    [email protected]
@@@ -10574,7 -10553,7 +10574,7 @@@ M:   Tobias Waldekranz <tobias@waldekranz
   L:    [email protected]
   S:    Maintained
   F:    Documentation/devicetree/bindings/net/marvell,mvusb.yaml
- -F:    drivers/net/phy/mdio-mvusb.c
+ +F:    drivers/net/mdio/mdio-mvusb.c
   
   MARVELL XENON MMC/SD/SDIO HOST CONTROLLER DRIVER
   M:    Hu Ziji <[email protected]>
@@@ -10721,15 -10700,6 +10721,15 @@@ L: [email protected]
   S:    Maintained
   F:    drivers/hid/hid-mcp2221.c
   
+ +MCP251XFD SPI-CAN NETWORK DRIVER
+ +M:    Marc Kleine-Budde <[email protected]>
+ +M:    Manivannan Sadhasivam <[email protected]>
+ +R:    Thomas Kopp <[email protected]>
+ +L:    [email protected]
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
+ +F:    drivers/net/can/spi/mcp251xfd/
+ +
   MCP4018 AND MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVERS
   M:    Peter Rosin <[email protected]>
   L:    [email protected]
@@@ -12119,6 -12089,7 +12119,6 @@@ M:   Neil Horman <[email protected]
   L:    [email protected]
   S:    Maintained
   W:    https://fedorahosted.org/dropwatch/
- -F:    include/net/drop_monitor.h
   F:    include/uapi/linux/net_dropmon.h
   F:    net/core/drop_monitor.c
   
@@@ -12213,7 -12184,6 +12213,7 @@@ F:   net/ipv6/ipcomp6.
   F:    net/ipv6/xfrm*
   F:    net/key/
   F:    net/xfrm/
+ +F:    tools/testing/selftests/net/ipsec.c
   
   NETWORKING [IPv4/IPv6]
   M:    "David S. Miller" <[email protected]>
@@@ -12626,7 -12596,6 +12626,7 @@@ F:   drivers/net/dsa/ocelot/
   F:    drivers/net/ethernet/mscc/
   F:    include/soc/mscc/ocelot*
   F:    net/dsa/tag_ocelot.c
+ +F:    tools/testing/selftests/drivers/net/ocelot/*
   
   OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
   M:    Frederic Barrat <[email protected]>
@@@ -15402,11 -15371,10 +15402,11 @@@ F:        drivers/media/platform/s3c-camif
   F:    include/media/drv-intf/s3c_camif.h
   
   SAMSUNG S3FWRN5 NFC DRIVER
- -M:    Robert Baldyga <[email protected]>
+ +M:    Krzysztof Kozlowski <[email protected]>
   M:    Krzysztof Opasiak <[email protected]>
   L:    [email protected] (moderated for non-subscribers)
- -S:    Supported
+ +S:    Maintained
+ +F:    Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
   F:    drivers/nfc/s3fwrn5
   
   SAMSUNG S5C73M3 CAMERA DRIVER
@@@ -15793,7 -15761,6 +15793,7 @@@ L:   [email protected]
   S:    Maintained
   F:    drivers/net/phy/phylink.c
   F:    drivers/net/phy/sfp*
+ +F:    include/linux/mdio/mdio-i2c.h
   F:    include/linux/phylink.h
   F:    include/linux/sfp.h
   K:    phylink\.h|struct\s+phylink|\.phylink|>phylink_|phylink_(autoneg|clear|connect|create|destroy|disconnect|ethtool|helper|mac|mii|of|set|start|stop|test|validate)
@@@ -16883,8 -16850,8 +16883,8 @@@ SYNOPSYS DESIGNWARE ETHERNET XPCS DRIVE
   M:    Jose Abreu <[email protected]>
   L:    [email protected]
   S:    Supported
- -F:    drivers/net/phy/mdio-xpcs.c
- -F:    include/linux/mdio-xpcs.h
+ +F:    drivers/net/pcs/pcs-xpcs.c
+ +F:    include/linux/pcs/pcs-xpcs.h
   
   SYNOPSYS DESIGNWARE I2C DRIVER
   M:    Jarkko Nikula <[email protected]>
@@@ -17717,7 -17684,7 +17717,7 @@@ F:   drivers/platform/x86/toshiba-wmi.
   
   TPM DEVICE DRIVER
   M:    Peter Huewe <[email protected]>
- M:    Jarkko Sakkinen <jarkko[email protected]>
+ M:    Jarkko Sakkinen <jarkko@kernel.org>
   R:    Jason Gunthorpe <[email protected]>
   L:    [email protected]
   S:    Maintained
diff --combined mm/filemap.c

index e3b8987153e67d8e4a92fb5781d8e34a9a506bfd,45e564a20f8e6e09a6126b8935a4a8c49977a733..1a6beaf69f49f3d9e58497e5719e71627111072c
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -249,7 -249,7 +249,7 @@@ static void page_cache_free_page(struc
                 freepage(page);
   
         if (PageTransHuge(page) && !PageHuge(page)) {
-               page_ref_sub(page, HPAGE_PMD_NR);
+               page_ref_sub(page, thp_nr_pages(page));
                 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
         } else {
                 put_page(page);
@@@ -827,15 -827,14 +827,14 @@@ int replace_page_cache_page(struct pag
   }
   EXPORT_SYMBOL_GPL(replace_page_cache_page);
   
- -static int __add_to_page_cache_locked(struct page *page,
- -                                    struct address_space *mapping,
- -                                    pgoff_t offset, gfp_t gfp,
- -                                    void **shadowp)
+ +noinline int __add_to_page_cache_locked(struct page *page,
+ +                                      struct address_space *mapping,
-                                       pgoff_t offset, gfp_t gfp_mask,
++                                      pgoff_t offset, gfp_t gfp,
+ +                                      void **shadowp)
   {
         XA_STATE(xas, &mapping->i_pages, offset);
         int huge = PageHuge(page);
         int error;
-       void *old;
   
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(PageSwapBacked(page), page);
@@@ -846,25 -845,46 +845,46 @@@
         page->index = offset;
   
         if (!huge) {
-               error = mem_cgroup_charge(page, current->mm, gfp_mask);
+               error = mem_cgroup_charge(page, current->mm, gfp);
                 if (error)
                         goto error;
         }
   
+       gfp &= GFP_RECLAIM_MASK;
+ 
         do {
+               unsigned int order = xa_get_order(xas.xa, xas.xa_index);
+               void *entry, *old = NULL;
+ 
+               if (order > thp_order(page))
+                       xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
+                                       order, gfp);
                 xas_lock_irq(&xas);
-               old = xas_load(&xas);
-               if (old && !xa_is_value(old))
-                       xas_set_err(&xas, -EEXIST);
+               xas_for_each_conflict(&xas, entry) {
+                       old = entry;
+                       if (!xa_is_value(entry)) {
+                               xas_set_err(&xas, -EEXIST);
+                               goto unlock;
+                       }
+               }
+ 
+               if (old) {
+                       if (shadowp)
+                               *shadowp = old;
+                       /* entry may have been split before we acquired lock */
+                       order = xa_get_order(xas.xa, xas.xa_index);
+                       if (order > thp_order(page)) {
+                               xas_split(&xas, old, order);
+                               xas_reset(&xas);
+                       }
+               }
+ 
                 xas_store(&xas, page);
                 if (xas_error(&xas))
                         goto unlock;
   
-               if (xa_is_value(old)) {
+               if (old)
                         mapping->nrexceptional--;
-                       if (shadowp)
-                               *shadowp = old;
-               }
                 mapping->nrpages++;
   
                 /* hugetlb pages do not participate in page cache accounting */
@@@ -872,7 -892,7 +892,7 @@@
                         __inc_lruvec_page_state(page, NR_FILE_PAGES);
   unlock:
                 xas_unlock_irq(&xas);
-       } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+       } while (xas_nomem(&xas, gfp));
   
         if (xas_error(&xas)) {
                 error = xas_error(&xas);
@@@ -1425,7 -1445,7 +1445,7 @@@ static inline bool clear_bit_unlock_is_
    * unlock_page - unlock a locked page
    * @page: the page
    *
-  * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
+  * Unlocks the page and wakes up sleepers in wait_on_page_locked().
    * Also wakes sleepers in wait_on_page_writeback() because the wakeup
    * mechanism between PageLocked pages and PageWriteback pages is shared.
    * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
@@@ -2568,8 -2588,8 +2588,8 @@@ static struct file *do_sync_mmap_readah
         struct file *file = vmf->vma->vm_file;
         struct file_ra_state *ra = &file->f_ra;
         struct address_space *mapping = file->f_mapping;
+       DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
         struct file *fpin = NULL;
-       pgoff_t offset = vmf->pgoff;
         unsigned int mmap_miss;
   
         /* If we don't want any read-ahead, don't bother */
@@@ -2580,8 -2600,7 +2600,7 @@@
   
         if (vmf->vma->vm_flags & VM_SEQ_READ) {
                 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-               page_cache_sync_readahead(mapping, ra, file, offset,
-                                         ra->ra_pages);
+               page_cache_sync_ra(&ractl, ra, ra->ra_pages);
                 return fpin;
         }
   
@@@ -2601,10 -2620,11 +2620,11 @@@
          * mmap read-around
          */
         fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-       ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
+       ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
         ra->size = ra->ra_pages;
         ra->async_size = ra->ra_pages / 4;
-       ra_submit(ra, mapping, file);
+       ractl._index = ra->start;
+       do_page_cache_ra(&ractl, ra->size, ra->async_size);
         return fpin;
   }
   
@@@ -2984,7 -3004,7 +3004,7 @@@ filler
                 goto out;
   
         /*
-        * Page is not up to date and may be locked due one of the following
+        * Page is not up to date and may be locked due to one of the following
          * case a: Page is being filled and the page lock is held
          * case b: Read/write error clearing the page uptodate status
          * case c: Truncation in progress (page locked)
diff --combined mm/page_alloc.c

index e0ff3a811ec528b45666711fe3cbbe4b1bf3927f,ccf615c0627e1d34845f116ab12457c00a4fcb00..23f5066bd4a554087fea3952d8423003fd7cc304
--- 1/mm/page_alloc.c
--- 2/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@@ -78,6 -78,34 +78,34 @@@
   #include "shuffle.h"
   #include "page_reporting.h"
   
+ /* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
+ typedef int __bitwise fpi_t;
+ 
+ /* No special request */
+ #define FPI_NONE              ((__force fpi_t)0)
+ 
+ /*
+  * Skip free page reporting notification for the (possibly merged) page.
+  * This does not hinder free page reporting from grabbing the page,
+  * reporting it and marking it "reported" -  it only skips notifying
+  * the free page reporting infrastructure about a newly freed page. For
+  * example, used when temporarily pulling a page from a freelist and
+  * putting it back unmodified.
+  */
+ #define FPI_SKIP_REPORT_NOTIFY        ((__force fpi_t)BIT(0))
+ 
+ /*
+  * Place the (possibly merged) page to the tail of the freelist. Will ignore
+  * page shuffling (relevant code - e.g., memory onlining - is expected to
+  * shuffle the whole zone).
+  *
+  * Note: No code should rely on this flag for correctness - it's purely
+  *       to allow for optimizations when handing back either fresh pages
+  *       (memory onlining) or untouched pages (page isolation, free page
+  *       reporting).
+  */
+ #define FPI_TO_TAIL           ((__force fpi_t)BIT(1))
+ 
   /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
   static DEFINE_MUTEX(pcp_batch_high_lock);
   #define MIN_PERCPU_PAGELIST_FRACTION  (8)
@@@ -247,7 -275,8 +275,8 @@@ bool pm_suspended_storage(void
   unsigned int pageblock_order __read_mostly;
   #endif
   
- static void __free_pages_ok(struct page *page, unsigned int order);
+ static void __free_pages_ok(struct page *page, unsigned int order,
+                           fpi_t fpi_flags);
   
   /*
    * results with 256, 32 in the lowmem_reserve sysctl:
@@@ -659,7 -688,7 +688,7 @@@ out
   void free_compound_page(struct page *page)
   {
         mem_cgroup_uncharge(page);
-       __free_pages_ok(page, compound_order(page));
+       __free_pages_ok(page, compound_order(page), FPI_NONE);
   }
   
   void prep_compound_page(struct page *page, unsigned int order)
@@@ -763,7 -792,7 +792,7 @@@ static inline void clear_page_guard(str
                                 unsigned int order, int migratetype) {}
   #endif
   
- static inline void set_page_order(struct page *page, unsigned int order)
+ static inline void set_buddy_order(struct page *page, unsigned int order)
   {
         set_page_private(page, order);
         __SetPageBuddy(page);
@@@ -788,7 -817,7 +817,7 @@@ static inline bool page_is_buddy(struc
         if (!page_is_guard(buddy) && !PageBuddy(buddy))
                 return false;
   
-       if (page_order(buddy) != order)
+       if (buddy_order(buddy) != order)
                 return false;
   
         /*
@@@ -873,13 -902,17 +902,17 @@@ static inline void add_to_free_list_tai
         area->nr_free++;
   }
   
- /* Used for pages which are on another list */
+ /*
+  * Used for pages which are on another list. Move the pages to the tail
+  * of the list - so the moved pages won't immediately be considered for
+  * allocation again (e.g., optimization for memory onlining).
+  */
   static inline void move_to_free_list(struct page *page, struct zone *zone,
                                      unsigned int order, int migratetype)
   {
         struct free_area *area = &zone->free_area[order];
   
-       list_move(&page->lru, &area->free_list[migratetype]);
+       list_move_tail(&page->lru, &area->free_list[migratetype]);
   }
   
   static inline void del_page_from_free_list(struct page *page, struct zone *zone,
@@@ -952,7 -985,7 +985,7 @@@ buddy_merge_likely(unsigned long pfn, u
   static inline void __free_one_page(struct page *page,
                 unsigned long pfn,
                 struct zone *zone, unsigned int order,
-               int migratetype, bool report)
+               int migratetype, fpi_t fpi_flags)
   {
         struct capture_control *capc = task_capc(zone);
         unsigned long buddy_pfn;
@@@ -1026,9 -1059,11 +1059,11 @@@ continue_merging
         }
   
   done_merging:
-       set_page_order(page, order);
+       set_buddy_order(page, order);
   
-       if (is_shuffle_order(order))
+       if (fpi_flags & FPI_TO_TAIL)
+               to_tail = true;
+       else if (is_shuffle_order(order))
                 to_tail = shuffle_pick_tail();
         else
                 to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
@@@ -1039,7 -1074,7 +1074,7 @@@
                 add_to_free_list(page, zone, order, migratetype);
   
         /* Notify page reporting subsystem of freed page */
-       if (report)
+       if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
                 page_reporting_notify_free(order);
   }
   
@@@ -1174,6 -1209,17 +1209,17 @@@ static __always_inline bool free_pages_
   
         trace_mm_page_free(page, order);
   
+       if (unlikely(PageHWPoison(page)) && !order) {
+               /*
+                * Do not let hwpoison pages hit pcplists/buddy
+                * Untie memcg state and reset page's owner
+                */
+               if (memcg_kmem_enabled() && PageKmemcg(page))
+                       __memcg_kmem_uncharge_page(page, order);
+               reset_page_owner(page, order);
+               return false;
+       }
+ 
         /*
          * Check tail pages before head page information is cleared to
          * avoid checking PageCompound for order-0 pages.
@@@ -1369,7 -1415,7 +1415,7 @@@ static void free_pcppages_bulk(struct z
                 if (unlikely(isolated_pageblocks))
                         mt = get_pageblock_migratetype(page);
   
-               __free_one_page(page, page_to_pfn(page), zone, 0, mt, true);
+               __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
                 trace_mm_page_pcpu_drain(page, 0, mt);
         }
         spin_unlock(&zone->lock);
@@@ -1378,14 -1424,14 +1424,14 @@@
   static void free_one_page(struct zone *zone,
                                 struct page *page, unsigned long pfn,
                                 unsigned int order,
-                               int migratetype)
+                               int migratetype, fpi_t fpi_flags)
   {
         spin_lock(&zone->lock);
         if (unlikely(has_isolate_pageblock(zone) ||
                 is_migrate_isolate(migratetype))) {
                 migratetype = get_pfnblock_migratetype(page, pfn);
         }
-       __free_one_page(page, pfn, zone, order, migratetype, true);
+       __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
         spin_unlock(&zone->lock);
   }
   
@@@ -1463,7 -1509,8 +1509,8 @@@ void __meminit reserve_bootmem_region(p
         }
   }
   
- static void __free_pages_ok(struct page *page, unsigned int order)
+ static void __free_pages_ok(struct page *page, unsigned int order,
+                           fpi_t fpi_flags)
   {
         unsigned long flags;
         int migratetype;
@@@ -1475,7 -1522,8 +1522,8 @@@
         migratetype = get_pfnblock_migratetype(page, pfn);
         local_irq_save(flags);
         __count_vm_events(PGFREE, 1 << order);
-       free_one_page(page_zone(page), page, pfn, order, migratetype);
+       free_one_page(page_zone(page), page, pfn, order, migratetype,
+                     fpi_flags);
         local_irq_restore(flags);
   }
   
@@@ -1485,6 -1533,11 +1533,11 @@@ void __free_pages_core(struct page *pag
         struct page *p = page;
         unsigned int loop;
   
+       /*
+        * When initializing the memmap, __init_single_page() sets the refcount
+        * of all pages to 1 ("allocated"/"not free"). We have to set the
+        * refcount of all involved pages to 0.
+        */
         prefetchw(p);
         for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
                 prefetchw(p + 1);
@@@ -1495,8 -1548,12 +1548,12 @@@
         set_page_count(p, 0);
   
         atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
-       set_page_refcounted(page);
-       __free_pages(page, order);
+ 
+       /*
+        * Bypass PCP and place fresh pages right to the tail, primarily
+        * relevant for memory onlining.
+        */
+       __free_pages_ok(page, order, FPI_TO_TAIL);
   }
   
   #ifdef CONFIG_NEED_MULTIPLE_NODES
@@@ -2121,7 -2178,7 +2178,7 @@@ static inline void expand(struct zone *
                         continue;
   
                 add_to_free_list(&page[size], zone, high, migratetype);
-               set_page_order(&page[size], high);
+               set_buddy_order(&page[size], high);
         }
   }
   
@@@ -2299,7 -2356,7 +2356,7 @@@ static inline struct page *__rmqueue_cm
   #endif
   
   /*
-  * Move the free pages in a range to the free lists of the requested type.
+  * Move the free pages in a range to the freelist tail of the requested type.
    * Note that start_page and end_pages are not aligned on a pageblock
    * boundary. If alignment is required, use move_freepages_block()
    */
@@@ -2335,7 -2392,7 +2392,7 @@@ static int move_freepages(struct zone *
                 VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
                 VM_BUG_ON_PAGE(page_zone(page) != zone, page);
   
-               order = page_order(page);
+               order = buddy_order(page);
                 move_to_free_list(page, zone, order, migratetype);
                 page += 1 << order;
                 pages_moved += 1 << order;
@@@ -2459,7 -2516,7 +2516,7 @@@ static inline void boost_watermark(stru
   static void steal_suitable_fallback(struct zone *zone, struct page *page,
                 unsigned int alloc_flags, int start_type, bool whole_block)
   {
-       unsigned int current_order = page_order(page);
+       unsigned int current_order = buddy_order(page);
         int free_pages, movable_pages, alike_pages;
         int old_block_type;
   
@@@ -3123,7 -3180,8 +3180,8 @@@ static void free_unref_page_commit(stru
          */
         if (migratetype >= MIGRATE_PCPTYPES) {
                 if (unlikely(is_migrate_isolate(migratetype))) {
-                       free_one_page(zone, page, pfn, 0, migratetype);
+                       free_one_page(zone, page, pfn, 0, migratetype,
+                                     FPI_NONE);
                         return;
                 }
                 migratetype = MIGRATE_MOVABLE;
@@@ -3209,7 -3267,7 +3267,7 @@@ void split_page(struct page *page, unsi
   
         for (i = 1; i < (1 << order); i++)
                 set_page_refcounted(page + i);
-       split_page_owner(page, order);
+       split_page_owner(page, 1 << order);
   }
   EXPORT_SYMBOL_GPL(split_page);
   
@@@ -3278,7 -3336,8 +3336,8 @@@ void __putback_isolated_page(struct pag
         lockdep_assert_held(&zone->lock);
   
         /* Return isolated page to tail of freelist. */
-       __free_one_page(page, page_to_pfn(page), zone, order, mt, false);
+       __free_one_page(page, page_to_pfn(page), zone, order, mt,
+                       FPI_SKIP_REPORT_NOTIFY | FPI_TO_TAIL);
   }
   
   /*
@@@ -3496,7 -3555,7 +3555,7 @@@ static inline bool __should_fail_alloc_
   
   #endif /* CONFIG_FAIL_PAGE_ALLOC */
   
- -static noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+ +noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
   {
         return __should_fail_alloc_page(gfp_mask, order);
   }
@@@ -4945,7 -5004,7 +5004,7 @@@ static inline void free_the_page(struc
         if (order == 0)         /* Via pcp? */
                 free_unref_page(page);
         else
-               __free_pages_ok(page, order);
+               __free_pages_ok(page, order, FPI_NONE);
   }
   
   void __free_pages(struct page *page, unsigned int order)
@@@ -5979,10 -6038,15 +6038,15 @@@ overlap_memmap_init(unsigned long zone
    * Initially all pages are reserved - free ones are freed
    * up by memblock_free_all() once the early boot process is
    * done. Non-atomic initialization, single-pass.
+  *
+  * All aligned pageblocks are initialized to the specified migratetype
+  * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
+  * zone stats (e.g., nr_isolate_pageblock) are touched.
    */
   void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
-               unsigned long start_pfn, enum meminit_context context,
-               struct vmem_altmap *altmap)
+               unsigned long start_pfn,
+               enum meminit_context context,
+               struct vmem_altmap *altmap, int migratetype)
   {
         unsigned long pfn, end_pfn = start_pfn + size;
         struct page *page;
@@@ -6026,19 -6090,12 +6090,12 @@@
                         __SetPageReserved(page);
   
                 /*
-                * Mark the block movable so that blocks are reserved for
-                * movable at startup. This will force kernel allocations
-                * to reserve their blocks rather than leaking throughout
-                * the address space during boot when many long-lived
-                * kernel allocations are made.
-                *
-                * bitmap is created for zone's valid pfn range. but memmap
-                * can be created for invalid pages (for alignment)
-                * check here not to call set_pageblock_migratetype() against
-                * pfn out of zone.
+                * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
+                * such that unmovable allocations won't be scattered all
+                * over the place during system boot.
                  */
-               if (!(pfn & (pageblock_nr_pages - 1))) {
-                       set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+               if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+                       set_pageblock_migratetype(page, migratetype);
                         cond_resched();
                 }
                 pfn++;
@@@ -6100,15 -6157,10 +6157,10 @@@ void __ref memmap_init_zone_device(stru
                  * the address space during boot when many long-lived
                  * kernel allocations are made.
                  *
-                * bitmap is created for zone's valid pfn range. but memmap
-                * can be created for invalid pages (for alignment)
-                * check here not to call set_pageblock_migratetype() against
-                * pfn out of zone.
-                *
                  * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
                  * because this is done early in section_activate()
                  */
-               if (!(pfn & (pageblock_nr_pages - 1))) {
+               if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
                         set_pageblock_migratetype(page, MIGRATE_MOVABLE);
                         cond_resched();
                 }
@@@ -6143,7 -6195,7 +6195,7 @@@ void __meminit __weak memmap_init(unsig
                 if (end_pfn > start_pfn) {
                         size = end_pfn - start_pfn;
                         memmap_init_zone(size, nid, zone, start_pfn,
-                                        MEMINIT_EARLY, NULL);
+                                        MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
                 }
         }
   }
@@@ -8292,7 -8344,7 +8344,7 @@@ struct page *has_unmovable_pages(struc
                  */
                 if (!page_ref_count(page)) {
                         if (PageBuddy(page))
-                               iter += (1 << page_order(page)) - 1;
+                               iter += (1 << buddy_order(page)) - 1;
                         continue;
                 }
   
@@@ -8457,7 -8509,7 +8509,7 @@@ int alloc_contig_range(unsigned long st
   
         ret = start_isolate_page_range(pfn_max_align_down(start),
                                        pfn_max_align_up(end), migratetype, 0);
-       if (ret < 0)
+       if (ret)
                 return ret;
   
         /*
@@@ -8505,7 -8557,7 +8557,7 @@@
         }
   
         if (outer_start != start) {
-               order = page_order(pfn_to_page(outer_start));
+               order = buddy_order(pfn_to_page(outer_start));
   
                 /*
                  * outer_start page could be small order buddy page and
@@@ -8693,35 -8745,21 +8745,21 @@@ void zone_pcp_reset(struct zone *zone
   
   #ifdef CONFIG_MEMORY_HOTREMOVE
   /*
-  * All pages in the range must be in a single zone and isolated
-  * before calling this.
+  * All pages in the range must be in a single zone, must not contain holes,
+  * must span full sections, and must be isolated before calling this function.
    */
- unsigned long
- __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
+ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
   {
+       unsigned long pfn = start_pfn;
         struct page *page;
         struct zone *zone;
         unsigned int order;
-       unsigned long pfn;
         unsigned long flags;
-       unsigned long offlined_pages = 0;
- 
-       /* find the first valid pfn */
-       for (pfn = start_pfn; pfn < end_pfn; pfn++)
-               if (pfn_valid(pfn))
-                       break;
-       if (pfn == end_pfn)
-               return offlined_pages;
   
         offline_mem_sections(pfn, end_pfn);
         zone = page_zone(pfn_to_page(pfn));
         spin_lock_irqsave(&zone->lock, flags);
-       pfn = start_pfn;
         while (pfn < end_pfn) {
-               if (!pfn_valid(pfn)) {
-                       pfn++;
-                       continue;
-               }
                 page = pfn_to_page(pfn);
                 /*
                  * The HWPoisoned page may be not in buddy system, and
@@@ -8729,7 -8767,6 +8767,6 @@@
                  */
                 if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
                         pfn++;
-                       offlined_pages++;
                         continue;
                 }
                 /*
@@@ -8740,20 -8777,16 +8777,16 @@@
                         BUG_ON(page_count(page));
                         BUG_ON(PageBuddy(page));
                         pfn++;
-                       offlined_pages++;
                         continue;
                 }
   
                 BUG_ON(page_count(page));
                 BUG_ON(!PageBuddy(page));
-               order = page_order(page);
-               offlined_pages += 1 << order;
+               order = buddy_order(page);
                 del_page_from_free_list(page, zone, order);
                 pfn += (1 << order);
         }
         spin_unlock_irqrestore(&zone->lock, flags);
- 
-       return offlined_pages;
   }
   #endif
   
@@@ -8768,7 -8801,7 +8801,7 @@@ bool is_free_buddy_page(struct page *pa
         for (order = 0; order < MAX_ORDER; order++) {
                 struct page *page_head = page - (pfn & ((1 << order) - 1));
   
-               if (PageBuddy(page_head) && page_order(page_head) >= order)
+               if (PageBuddy(page_head) && buddy_order(page_head) >= order)
                         break;
         }
         spin_unlock_irqrestore(&zone->lock, flags);
@@@ -8778,30 -8811,70 +8811,70 @@@
   
   #ifdef CONFIG_MEMORY_FAILURE
   /*
-  * Set PG_hwpoison flag if a given page is confirmed to be a free page.  This
-  * test is performed under the zone lock to prevent a race against page
-  * allocation.
+  * Break down a higher-order page in sub-pages, and keep our target out of
+  * buddy allocator.
    */
- bool set_hwpoison_free_buddy_page(struct page *page)
+ static void break_down_buddy_pages(struct zone *zone, struct page *page,
+                                  struct page *target, int low, int high,
+                                  int migratetype)
+ {
+       unsigned long size = 1 << high;
+       struct page *current_buddy, *next_page;
+ 
+       while (high > low) {
+               high--;
+               size >>= 1;
+ 
+               if (target >= &page[size]) {
+                       next_page = page + size;
+                       current_buddy = page;
+               } else {
+                       next_page = page;
+                       current_buddy = page + size;
+               }
+ 
+               if (set_page_guard(zone, current_buddy, high, migratetype))
+                       continue;
+ 
+               if (current_buddy != target) {
+                       add_to_free_list(current_buddy, zone, high, migratetype);
+                       set_buddy_order(current_buddy, high);
+                       page = next_page;
+               }
+       }
+ }
+ 
+ /*
+  * Take a page that will be marked as poisoned off the buddy allocator.
+  */
+ bool take_page_off_buddy(struct page *page)
   {
         struct zone *zone = page_zone(page);
         unsigned long pfn = page_to_pfn(page);
         unsigned long flags;
         unsigned int order;
-       bool hwpoisoned = false;
+       bool ret = false;
   
         spin_lock_irqsave(&zone->lock, flags);
         for (order = 0; order < MAX_ORDER; order++) {
                 struct page *page_head = page - (pfn & ((1 << order) - 1));
+               int page_order = buddy_order(page_head);
   
-               if (PageBuddy(page_head) && page_order(page_head) >= order) {
-                       if (!TestSetPageHWPoison(page))
-                               hwpoisoned = true;
+               if (PageBuddy(page_head) && page_order >= order) {
+                       unsigned long pfn_head = page_to_pfn(page_head);
+                       int migratetype = get_pfnblock_migratetype(page_head,
+                                                                  pfn_head);
+ 
+                       del_page_from_free_list(page_head, zone, page_order);
+                       break_down_buddy_pages(zone, page_head, page, 0,
+                                               page_order, migratetype);
+                       ret = true;
                         break;
                 }
+               if (page_count(page_head) > 0)
+                       break;
         }
         spin_unlock_irqrestore(&zone->lock, flags);
- 
-       return hwpoisoned;
+       return ret;
   }
   #endif
author	Linus Torvalds <[email protected]>
	Fri, 16 Oct 2020 18:31:55 +0000 (11:31 -0700)
committer	Linus Torvalds <[email protected]>
	Fri, 16 Oct 2020 18:31:55 +0000 (11:31 -0700)
		1	2
Documentation/admin-guide/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page_alloc.c	patch \|	diff1 \|	diff2 \|	blob \| history