mm/page_alloc: disassociate the pcp->high from pcp->batch

author Mel Gorman <[email protected]>

Tue, 29 Jun 2021 02:42:12 +0000 (19:42 -0700)

committer Linus Torvalds <[email protected]>

Tue, 29 Jun 2021 17:53:54 +0000 (10:53 -0700)
author Mel Gorman <[email protected]>
Tue, 29 Jun 2021 02:42:12 +0000 (19:42 -0700)
committer Linus Torvalds <[email protected]>
Tue, 29 Jun 2021 17:53:54 +0000 (10:53 -0700)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 70620d0dd923a23bd77bffef4dd8b68f6e33eb14..974a565797d86d8e7e3f06660b80414d3e2000f3 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -961,7 +961,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
         node_states_set_node(nid, &arg);
         if (need_zonelists_rebuild)
                 build_all_zonelists(NULL);
-       zone_pcp_update(zone);
  
         /* Basic onlining is complete, allow allocation of onlined pages. */
         undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
@@ -974,6 +973,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
          */
         shuffle_zone(zone);
  
+       /* reinitialise watermarks and update pcp limits */
         init_per_zone_wmark_min();
  
         kswapd_run(nid);
@@ -1829,13 +1829,13 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
         adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
         adjust_present_page_count(zone, -nr_pages);
  
+       /* reinitialise watermarks and update pcp limits */
         init_per_zone_wmark_min();
  
         if (!populated_zone(zone)) {
                 zone_pcp_reset(zone);
                 build_all_zonelists(NULL);
-       } else
-               zone_pcp_update(zone);
+       }
  
         node_states_clear_node(node, &arg);
         if (arg.status_change_nid >= 0) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 5abf2c1d4c588f498f1b8bef58ec0945b09786e3..19ec81d403a03c8963df9ad80481093fbc9f85c1 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2174,14 +2174,6 @@ void __init page_alloc_init_late(void)
         /* Block until all are initialised */
         wait_for_completion(&pgdat_init_all_done_comp);
  
-       /*
-        * The number of managed pages has changed due to the initialisation
-        * so the pcpu batch and high limits needs to be updated or the limits
-        * will be artificially small.
-        */
-       for_each_populated_zone(zone)
-               zone_pcp_update(zone);
-
         /*
          * We initialized the rest of the deferred pages.  Permanently disable
          * on-demand struct page initialization.
@@ -6633,13 +6625,12 @@ static int zone_batchsize(struct zone *zone)
         int batch;
  
         /*
-        * The per-cpu-pages pools are set to around 1000th of the
-        * size of the zone.
+        * The number of pages to batch allocate is either ~0.1%
+        * of the zone or 1MB, whichever is smaller. The batch
+        * size is striking a balance between allocation latency
+        * and zone lock contention.
          */
-       batch = zone_managed_pages(zone) / 1024;
-       /* But no more than a meg. */
-       if (batch * PAGE_SIZE > 1024 * 1024)
-               batch = (1024 * 1024) / PAGE_SIZE;
+       batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE);
         batch /= 4;             /* We effectively *= 4 below */
         if (batch < 1)
                 batch = 1;
@@ -6676,6 +6667,34 @@ static int zone_batchsize(struct zone *zone)
  #endif
  }
  
+static int zone_highsize(struct zone *zone, int batch)
+{
+#ifdef CONFIG_MMU
+       int high;
+       int nr_local_cpus;
+
+       /*
+        * The high value of the pcp is based on the zone low watermark
+        * so that if they are full then background reclaim will not be
+        * started prematurely. The value is split across all online CPUs
+        * local to the zone. Note that early in boot that CPUs may not be
+        * online yet.
+        */
+       nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone))));
+       high = low_wmark_pages(zone) / nr_local_cpus;
+
+       /*
+        * Ensure high is at least batch*4. The multiple is based on the
+        * historical relationship between high and batch.
+        */
+       high = max(high, batch << 2);
+
+       return high;
+#else
+       return 0;
+#endif
+}
+
  /*
   * pcp->high and pcp->batch values are related and generally batch is lower
   * than high. They are also related to pcp->count such that count is lower
@@ -6737,11 +6756,10 @@ static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long h
   */
  static void zone_set_pageset_high_and_batch(struct zone *zone)
  {
-       unsigned long new_high, new_batch;
+       int new_high, new_batch;
  
-       new_batch = zone_batchsize(zone);
-       new_high = 6 * new_batch;
-       new_batch = max(1UL, 1 * new_batch);
+       new_batch = max(1, zone_batchsize(zone));
+       new_high = zone_highsize(zone, new_batch);
  
         if (zone->pageset_high == new_high &&
             zone->pageset_batch == new_batch)
@@ -8222,11 +8240,19 @@ static void __setup_per_zone_wmarks(void)
   */
  void setup_per_zone_wmarks(void)
  {
+       struct zone *zone;
         static DEFINE_SPINLOCK(lock);
  
         spin_lock(&lock);
         __setup_per_zone_wmarks();
         spin_unlock(&lock);
+
+       /*
+        * The watermark size have changed so update the pcpu batch
+        * and high limits or the limits may be inappropriate.
+        */
+       for_each_zone(zone)
+               zone_pcp_update(zone);
  }
  
  /*
author	Mel Gorman <[email protected]>
	Tue, 29 Jun 2021 02:42:12 +0000 (19:42 -0700)
committer	Linus Torvalds <[email protected]>
	Tue, 29 Jun 2021 17:53:54 +0000 (10:53 -0700)
mm/memory_hotplug.c		patch \| blob \| blame \| history
mm/page_alloc.c		patch \| blob \| blame \| history