mm/damon/reclaim.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * DAMON-based page reclamation
   4  *
   5  * Author: SeongJae Park <sj@kernel.org>
   6  */
   7
   8 #define pr_fmt(fmt) "damon-reclaim: " fmt
   9
  10 #include <linux/damon.h>
  11 #include <linux/ioport.h>
  12 #include <linux/module.h>
  13 #include <linux/sched.h>
  14 #include <linux/workqueue.h>
  15
  16 #ifdef MODULE_PARAM_PREFIX
  17 #undef MODULE_PARAM_PREFIX
  18 #endif
  19 #define MODULE_PARAM_PREFIX "damon_reclaim."
  20
  21 /*
  22  * Enable or disable DAMON_RECLAIM.
  23  *
  24  * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``.
  25  * Setting it as ``N`` disables DAMON_RECLAIM.  Note that DAMON_RECLAIM could
  26  * do no real monitoring and reclamation due to the watermarks-based activation
  27  * condition.  Refer to below descriptions for the watermarks parameter for
  28  * this.
  29  */
  30 static bool enabled __read_mostly;
  31 module_param(enabled, bool, 0600);
  32
  33 /*
  34  * Time threshold for cold memory regions identification in microseconds.
  35  *
  36  * If a memory region is not accessed for this or longer time, DAMON_RECLAIM
  37  * identifies the region as cold, and reclaims.  120 seconds by default.
  38  */
  39 static unsigned long min_age __read_mostly = 120000000;
  40 module_param(min_age, ulong, 0600);
  41
  42 /*
  43  * Limit of time for trying the reclamation in milliseconds.
  44  *
  45  * DAMON_RECLAIM tries to use only up to this time within a time window
  46  * (quota_reset_interval_ms) for trying reclamation of cold pages.  This can be
  47  * used for limiting CPU consumption of DAMON_RECLAIM.  If the value is zero,
  48  * the limit is disabled.
  49  *
  50  * 10 ms by default.
  51  */
  52 static unsigned long quota_ms __read_mostly = 10;
  53 module_param(quota_ms, ulong, 0600);
  54
  55 /*
  56  * Limit of size of memory for the reclamation in bytes.
  57  *
  58  * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a
  59  * time window (quota_reset_interval_ms) and makes no more than this limit is
  60  * tried.  This can be used for limiting consumption of CPU and IO.  If this
  61  * value is zero, the limit is disabled.
  62  *
  63  * 128 MiB by default.
  64  */
  65 static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024;
  66 module_param(quota_sz, ulong, 0600);
  67
  68 /*
  69  * The time/size quota charge reset interval in milliseconds.
  70  *
  71  * The charge reset interval for the quota of time (quota_ms) and size
  72  * (quota_sz).  That is, DAMON_RECLAIM does not try reclamation for more than
  73  * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms
  74  * milliseconds.
  75  *
  76  * 1 second by default.
  77  */
  78 static unsigned long quota_reset_interval_ms __read_mostly = 1000;
  79 module_param(quota_reset_interval_ms, ulong, 0600);
  80
  81 /*
  82  * The watermarks check time interval in microseconds.
  83  *
  84  * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is
  85  * enabled but inactive due to its watermarks rule.  5 seconds by default.
  86  */
  87 static unsigned long wmarks_interval __read_mostly = 5000000;
  88 module_param(wmarks_interval, ulong, 0600);
  89
  90 /*
  91  * Free memory rate (per thousand) for the high watermark.
  92  *
  93  * If free memory of the system in bytes per thousand bytes is higher than
  94  * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically
  95  * checks the watermarks.  500 (50%) by default.
  96  */
  97 static unsigned long wmarks_high __read_mostly = 500;
  98 module_param(wmarks_high, ulong, 0600);
  99
 100 /*
 101  * Free memory rate (per thousand) for the middle watermark.
 102  *
 103  * If free memory of the system in bytes per thousand bytes is between this and
 104  * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring
 105  * and the reclaiming.  400 (40%) by default.
 106  */
 107 static unsigned long wmarks_mid __read_mostly = 400;
 108 module_param(wmarks_mid, ulong, 0600);
 109
 110 /*
 111  * Free memory rate (per thousand) for the low watermark.
 112  *
 113  * If free memory of the system in bytes per thousand bytes is lower than this,
 114  * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks
 115  * the watermarks.  In the case, the system falls back to the LRU-based page
 116  * granularity reclamation logic.  200 (20%) by default.
 117  */
 118 static unsigned long wmarks_low __read_mostly = 200;
 119 module_param(wmarks_low, ulong, 0600);
 120
 121 /*
 122  * Sampling interval for the monitoring in microseconds.
 123  *
 124  * The sampling interval of DAMON for the cold memory monitoring.  Please refer
 125  * to the DAMON documentation for more detail.  5 ms by default.
 126  */
 127 static unsigned long sample_interval __read_mostly = 5000;
 128 module_param(sample_interval, ulong, 0600);
 129
 130 /*
 131  * Aggregation interval for the monitoring in microseconds.
 132  *
 133  * The aggregation interval of DAMON for the cold memory monitoring.  Please
 134  * refer to the DAMON documentation for more detail.  100 ms by default.
 135  */
 136 static unsigned long aggr_interval __read_mostly = 100000;
 137 module_param(aggr_interval, ulong, 0600);
 138
 139 /*
 140  * Minimum number of monitoring regions.
 141  *
 142  * The minimal number of monitoring regions of DAMON for the cold memory
 143  * monitoring.  This can be used to set lower-bound of the monitoring quality.
 144  * But, setting this too high could result in increased monitoring overhead.
 145  * Please refer to the DAMON documentation for more detail.  10 by default.
 146  */
 147 static unsigned long min_nr_regions __read_mostly = 10;
 148 module_param(min_nr_regions, ulong, 0600);
 149
 150 /*
 151  * Maximum number of monitoring regions.
 152  *
 153  * The maximum number of monitoring regions of DAMON for the cold memory
 154  * monitoring.  This can be used to set upper-bound of the monitoring overhead.
 155  * However, setting this too low could result in bad monitoring quality.
 156  * Please refer to the DAMON documentation for more detail.  1000 by default.
 157  */
 158 static unsigned long max_nr_regions __read_mostly = 1000;
 159 module_param(max_nr_regions, ulong, 0600);
 160
 161 /*
 162  * Start of the target memory region in physical address.
 163  *
 164  * The start physical address of memory region that DAMON_RECLAIM will do work
 165  * against.  By default, biggest System RAM is used as the region.
 166  */
 167 static unsigned long monitor_region_start __read_mostly;
 168 module_param(monitor_region_start, ulong, 0600);
 169
 170 /*
 171  * End of the target memory region in physical address.
 172  *
 173  * The end physical address of memory region that DAMON_RECLAIM will do work
 174  * against.  By default, biggest System RAM is used as the region.
 175  */
 176 static unsigned long monitor_region_end __read_mostly;
 177 module_param(monitor_region_end, ulong, 0600);
 178
 179 /*
 180  * PID of the DAMON thread
 181  *
 182  * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
 183  * Else, -1.
 184  */
 185 static int kdamond_pid __read_mostly = -1;
 186 module_param(kdamond_pid, int, 0400);
 187
 188 static struct damon_ctx *ctx;
 189 static struct damon_target *target;
 190
 191 struct damon_reclaim_ram_walk_arg {
 192         unsigned long start;
 193         unsigned long end;
 194 };
 195
 196 static int walk_system_ram(struct resource *res, void *arg)
 197 {
 198         struct damon_reclaim_ram_walk_arg *a = arg;
 199
 200         if (a->end - a->start < res->end - res->start) {
 201                 a->start = res->start;
 202                 a->end = res->end;
 203         }
 204         return 0;
 205 }
 206
 207 /*
 208  * Find biggest 'System RAM' resource and store its start and end address in
 209  * @start and @end, respectively.  If no System RAM is found, returns false.
 210  */
 211 static bool get_monitoring_region(unsigned long *start, unsigned long *end)
 212 {
 213         struct damon_reclaim_ram_walk_arg arg = {};
 214
 215         walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
 216         if (arg.end <= arg.start)
 217                 return false;
 218
 219         *start = arg.start;
 220         *end = arg.end;
 221         return true;
 222 }
 223
 224 static struct damos *damon_reclaim_new_scheme(void)
 225 {
 226         struct damos_watermarks wmarks = {
 227                 .metric = DAMOS_WMARK_FREE_MEM_RATE,
 228                 .interval = wmarks_interval,
 229                 .high = wmarks_high,
 230                 .mid = wmarks_mid,
 231                 .low = wmarks_low,
 232         };
 233         struct damos_quota quota = {
 234                 /*
 235                  * Do not try reclamation for more than quota_ms milliseconds
 236                  * or quota_sz bytes within quota_reset_interval_ms.
 237                  */
 238                 .ms = quota_ms,
 239                 .sz = quota_sz,
 240                 .reset_interval = quota_reset_interval_ms,
 241                 /* Within the quota, page out older regions first. */
 242                 .weight_sz = 0,
 243                 .weight_nr_accesses = 0,
 244                 .weight_age = 1
 245         };
 246         struct damos *scheme = damon_new_scheme(
 247                         /* Find regions having PAGE_SIZE or larger size */
 248                         PAGE_SIZE, ULONG_MAX,
 249                         /* and not accessed at all */
 250                         0, 0,
 251                         /* for min_age or more micro-seconds, and */
 252                         min_age / aggr_interval, UINT_MAX,
 253                         /* page out those, as soon as found */
 254                         DAMOS_PAGEOUT,
 255                         /* under the quota. */
 256                         &quota,
 257                         /* (De)activate this according to the watermarks. */
 258                         &wmarks);
 259
 260         return scheme;
 261 }
 262
 263 static int damon_reclaim_turn(bool on)
 264 {
 265         struct damon_region *region;
 266         struct damos *scheme;
 267         int err;
 268
 269         if (!on) {
 270                 err = damon_stop(&ctx, 1);
 271                 if (!err)
 272                         kdamond_pid = -1;
 273                 return err;
 274         }
 275
 276         err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
 277                         min_nr_regions, max_nr_regions);
 278         if (err)
 279                 return err;
 280
 281         if (monitor_region_start > monitor_region_end)
 282                 return -EINVAL;
 283         if (!monitor_region_start && !monitor_region_end &&
 284                         !get_monitoring_region(&monitor_region_start,
 285                                 &monitor_region_end))
 286                 return -EINVAL;
 287         /* DAMON will free this on its own when finish monitoring */
 288         region = damon_new_region(monitor_region_start, monitor_region_end);
 289         if (!region)
 290                 return -ENOMEM;
 291         damon_add_region(region, target);
 292
 293         /* Will be freed by 'damon_set_schemes()' below */
 294         scheme = damon_reclaim_new_scheme();
 295         if (!scheme) {
 296                 err = -ENOMEM;
 297                 goto free_region_out;
 298         }
 299         err = damon_set_schemes(ctx, &scheme, 1);
 300         if (err)
 301                 goto free_scheme_out;
 302
 303         err = damon_start(&ctx, 1);
 304         if (!err) {
 305                 kdamond_pid = ctx->kdamond->pid;
 306                 return 0;
 307         }
 308
 309 free_scheme_out:
 310         damon_destroy_scheme(scheme);
 311 free_region_out:
 312         damon_destroy_region(region, target);
 313         return err;
 314 }
 315
 316 #define ENABLE_CHECK_INTERVAL_MS        1000
 317 static struct delayed_work damon_reclaim_timer;
 318 static void damon_reclaim_timer_fn(struct work_struct *work)
 319 {
 320         static bool last_enabled;
 321         bool now_enabled;
 322
 323         now_enabled = enabled;
 324         if (last_enabled != now_enabled) {
 325                 if (!damon_reclaim_turn(now_enabled))
 326                         last_enabled = now_enabled;
 327                 else
 328                         enabled = last_enabled;
 329         }
 330
 331         schedule_delayed_work(&damon_reclaim_timer,
 332                         msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS));
 333 }
 334 static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
 335
 336 static int __init damon_reclaim_init(void)
 337 {
 338         ctx = damon_new_ctx();
 339         if (!ctx)
 340                 return -ENOMEM;
 341
 342         damon_pa_set_primitives(ctx);
 343
 344         /* 4242 means nothing but fun */
 345         target = damon_new_target(4242);
 346         if (!target) {
 347                 damon_destroy_ctx(ctx);
 348                 return -ENOMEM;
 349         }
 350         damon_add_target(ctx, target);
 351
 352         schedule_delayed_work(&damon_reclaim_timer, 0);
 353         return 0;
 354 }
 355
 356 module_init(damon_reclaim_init);