]> Git Repo - linux.git/commitdiff
mm: vmscan: shrink deferred objects proportional to priority
authorYang Shi <[email protected]>
Wed, 5 May 2021 01:36:45 +0000 (18:36 -0700)
committerLinus Torvalds <[email protected]>
Wed, 5 May 2021 18:27:23 +0000 (11:27 -0700)
The number of deferred objects might get windup to an absurd number, and
it results in clamp of slab objects.  It is undesirable for sustaining
workingset.

So shrink deferred objects proportional to priority and cap nr_deferred
to twice of cache items.

The idea is borrowed from Dave Chinner's patch:
  https://lore.kernel.org/linux-xfs/20191031234618[email protected]/

Tested with kernel build and vfs metadata heavy workload in our
production environment, no regression is spotted so far.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Yang Shi <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Kirill Tkhai <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Roman Gushchin <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
mm/vmscan.c

index 8c2d2003acbea9afcbdd4118b97955e677ab7c4c..44c49acf10c4c6dbada1b039d4446082f9facaa4 100644 (file)
@@ -664,7 +664,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
         */
        nr = xchg_nr_deferred(shrinker, shrinkctl);
 
-       total_scan = nr;
        if (shrinker->seeks) {
                delta = freeable >> priority;
                delta *= 4;
@@ -678,37 +677,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
                delta = freeable / 2;
        }
 
+       total_scan = nr >> priority;
        total_scan += delta;
-       if (total_scan < 0) {
-               pr_err("shrink_slab: %pS negative objects to delete nr=%ld\n",
-                      shrinker->scan_objects, total_scan);
-               total_scan = freeable;
-               next_deferred = nr;
-       } else
-               next_deferred = total_scan;
-
-       /*
-        * We need to avoid excessive windup on filesystem shrinkers
-        * due to large numbers of GFP_NOFS allocations causing the
-        * shrinkers to return -1 all the time. This results in a large
-        * nr being built up so when a shrink that can do some work
-        * comes along it empties the entire cache due to nr >>>
-        * freeable. This is bad for sustaining a working set in
-        * memory.
-        *
-        * Hence only allow the shrinker to scan the entire cache when
-        * a large delta change is calculated directly.
-        */
-       if (delta < freeable / 4)
-               total_scan = min(total_scan, freeable / 2);
-
-       /*
-        * Avoid risking looping forever due to too large nr value:
-        * never try to free more than twice the estimate number of
-        * freeable entries.
-        */
-       if (total_scan > freeable * 2)
-               total_scan = freeable * 2;
+       total_scan = min(total_scan, (2 * freeable));
 
        trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
                                   freeable, delta, total_scan, priority);
@@ -747,10 +718,15 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
                cond_resched();
        }
 
-       if (next_deferred >= scanned)
-               next_deferred -= scanned;
-       else
-               next_deferred = 0;
+       /*
+        * The deferred work is increased by any new work (delta) that wasn't
+        * done, decreased by old deferred work that was done now.
+        *
+        * And it is capped to two times of the freeable items.
+        */
+       next_deferred = max_t(long, (nr + delta - scanned), 0);
+       next_deferred = min(next_deferred, (2 * freeable));
+
        /*
         * move the unused scan count back into the shrinker in a
         * manner that handles concurrent updates.
This page took 0.101055 seconds and 4 git commands to generate.