mm/damon/core.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Data Access Monitor
   4  *
   5  * Author: SeongJae Park <[email protected]>
   6  */
   7
   8 #define pr_fmt(fmt) "damon: " fmt
   9
  10 #include <linux/damon.h>
  11 #include <linux/delay.h>
  12 #include <linux/kthread.h>
  13 #include <linux/mm.h>
  14 #include <linux/random.h>
  15 #include <linux/slab.h>
  16 #include <linux/string.h>
  17
  18 #define CREATE_TRACE_POINTS
  19 #include <trace/events/damon.h>
  20
  21 #ifdef CONFIG_DAMON_KUNIT_TEST
  22 #undef DAMON_MIN_REGION
  23 #define DAMON_MIN_REGION 1
  24 #endif
  25
  26 /* Get a random number in [l, r) */
  27 #define damon_rand(l, r) (l + prandom_u32_max(r - l))
  28
  29 static DEFINE_MUTEX(damon_lock);
  30 static int nr_running_ctxs;
  31
  32 /*
  33  * Construct a damon_region struct
  34  *
  35  * Returns the pointer to the new struct if success, or NULL otherwise
  36  */
  37 struct damon_region *damon_new_region(unsigned long start, unsigned long end)
  38 {
  39         struct damon_region *region;
  40
  41         region = kmalloc(sizeof(*region), GFP_KERNEL);
  42         if (!region)
  43                 return NULL;
  44
  45         region->ar.start = start;
  46         region->ar.end = end;
  47         region->nr_accesses = 0;
  48         INIT_LIST_HEAD(&region->list);
  49
  50         region->age = 0;
  51         region->last_nr_accesses = 0;
  52
  53         return region;
  54 }
  55
  56 /*
  57  * Add a region between two other regions
  58  */
  59 inline void damon_insert_region(struct damon_region *r,
  60                 struct damon_region *prev, struct damon_region *next,
  61                 struct damon_target *t)
  62 {
  63         __list_add(&r->list, &prev->list, &next->list);
  64         t->nr_regions++;
  65 }
  66
  67 void damon_add_region(struct damon_region *r, struct damon_target *t)
  68 {
  69         list_add_tail(&r->list, &t->regions_list);
  70         t->nr_regions++;
  71 }
  72
  73 static void damon_del_region(struct damon_region *r, struct damon_target *t)
  74 {
  75         list_del(&r->list);
  76         t->nr_regions--;
  77 }
  78
  79 static void damon_free_region(struct damon_region *r)
  80 {
  81         kfree(r);
  82 }
  83
  84 void damon_destroy_region(struct damon_region *r, struct damon_target *t)
  85 {
  86         damon_del_region(r, t);
  87         damon_free_region(r);
  88 }
  89
  90 struct damos *damon_new_scheme(
  91                 unsigned long min_sz_region, unsigned long max_sz_region,
  92                 unsigned int min_nr_accesses, unsigned int max_nr_accesses,
  93                 unsigned int min_age_region, unsigned int max_age_region,
  94                 enum damos_action action, struct damos_quota *quota,
  95                 struct damos_watermarks *wmarks)
  96 {
  97         struct damos *scheme;
  98
  99         scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
 100         if (!scheme)
 101                 return NULL;
 102         scheme->min_sz_region = min_sz_region;
 103         scheme->max_sz_region = max_sz_region;
 104         scheme->min_nr_accesses = min_nr_accesses;
 105         scheme->max_nr_accesses = max_nr_accesses;
 106         scheme->min_age_region = min_age_region;
 107         scheme->max_age_region = max_age_region;
 108         scheme->action = action;
 109         scheme->stat_count = 0;
 110         scheme->stat_sz = 0;
 111         INIT_LIST_HEAD(&scheme->list);
 112
 113         scheme->quota.ms = quota->ms;
 114         scheme->quota.sz = quota->sz;
 115         scheme->quota.reset_interval = quota->reset_interval;
 116         scheme->quota.weight_sz = quota->weight_sz;
 117         scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
 118         scheme->quota.weight_age = quota->weight_age;
 119         scheme->quota.total_charged_sz = 0;
 120         scheme->quota.total_charged_ns = 0;
 121         scheme->quota.esz = 0;
 122         scheme->quota.charged_sz = 0;
 123         scheme->quota.charged_from = 0;
 124         scheme->quota.charge_target_from = NULL;
 125         scheme->quota.charge_addr_from = 0;
 126
 127         scheme->wmarks.metric = wmarks->metric;
 128         scheme->wmarks.interval = wmarks->interval;
 129         scheme->wmarks.high = wmarks->high;
 130         scheme->wmarks.mid = wmarks->mid;
 131         scheme->wmarks.low = wmarks->low;
 132         scheme->wmarks.activated = true;
 133
 134         return scheme;
 135 }
 136
 137 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
 138 {
 139         list_add_tail(&s->list, &ctx->schemes);
 140 }
 141
 142 static void damon_del_scheme(struct damos *s)
 143 {
 144         list_del(&s->list);
 145 }
 146
 147 static void damon_free_scheme(struct damos *s)
 148 {
 149         kfree(s);
 150 }
 151
 152 void damon_destroy_scheme(struct damos *s)
 153 {
 154         damon_del_scheme(s);
 155         damon_free_scheme(s);
 156 }
 157
 158 /*
 159  * Construct a damon_target struct
 160  *
 161  * Returns the pointer to the new struct if success, or NULL otherwise
 162  */
 163 struct damon_target *damon_new_target(unsigned long id)
 164 {
 165         struct damon_target *t;
 166
 167         t = kmalloc(sizeof(*t), GFP_KERNEL);
 168         if (!t)
 169                 return NULL;
 170
 171         t->id = id;
 172         t->nr_regions = 0;
 173         INIT_LIST_HEAD(&t->regions_list);
 174
 175         return t;
 176 }
 177
 178 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
 179 {
 180         list_add_tail(&t->list, &ctx->adaptive_targets);
 181 }
 182
 183 bool damon_targets_empty(struct damon_ctx *ctx)
 184 {
 185         return list_empty(&ctx->adaptive_targets);
 186 }
 187
 188 static void damon_del_target(struct damon_target *t)
 189 {
 190         list_del(&t->list);
 191 }
 192
 193 void damon_free_target(struct damon_target *t)
 194 {
 195         struct damon_region *r, *next;
 196
 197         damon_for_each_region_safe(r, next, t)
 198                 damon_free_region(r);
 199         kfree(t);
 200 }
 201
 202 void damon_destroy_target(struct damon_target *t)
 203 {
 204         damon_del_target(t);
 205         damon_free_target(t);
 206 }
 207
 208 unsigned int damon_nr_regions(struct damon_target *t)
 209 {
 210         return t->nr_regions;
 211 }
 212
 213 struct damon_ctx *damon_new_ctx(void)
 214 {
 215         struct damon_ctx *ctx;
 216
 217         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 218         if (!ctx)
 219                 return NULL;
 220
 221         ctx->sample_interval = 5 * 1000;
 222         ctx->aggr_interval = 100 * 1000;
 223         ctx->primitive_update_interval = 60 * 1000 * 1000;
 224
 225         ktime_get_coarse_ts64(&ctx->last_aggregation);
 226         ctx->last_primitive_update = ctx->last_aggregation;
 227
 228         mutex_init(&ctx->kdamond_lock);
 229
 230         ctx->min_nr_regions = 10;
 231         ctx->max_nr_regions = 1000;
 232
 233         INIT_LIST_HEAD(&ctx->adaptive_targets);
 234         INIT_LIST_HEAD(&ctx->schemes);
 235
 236         return ctx;
 237 }
 238
 239 static void damon_destroy_targets(struct damon_ctx *ctx)
 240 {
 241         struct damon_target *t, *next_t;
 242
 243         if (ctx->primitive.cleanup) {
 244                 ctx->primitive.cleanup(ctx);
 245                 return;
 246         }
 247
 248         damon_for_each_target_safe(t, next_t, ctx)
 249                 damon_destroy_target(t);
 250 }
 251
 252 void damon_destroy_ctx(struct damon_ctx *ctx)
 253 {
 254         struct damos *s, *next_s;
 255
 256         damon_destroy_targets(ctx);
 257
 258         damon_for_each_scheme_safe(s, next_s, ctx)
 259                 damon_destroy_scheme(s);
 260
 261         kfree(ctx);
 262 }
 263
 264 /**
 265  * damon_set_targets() - Set monitoring targets.
 266  * @ctx:        monitoring context
 267  * @ids:        array of target ids
 268  * @nr_ids:     number of entries in @ids
 269  *
 270  * This function should not be called while the kdamond is running.
 271  *
 272  * Return: 0 on success, negative error code otherwise.
 273  */
 274 int damon_set_targets(struct damon_ctx *ctx,
 275                       unsigned long *ids, ssize_t nr_ids)
 276 {
 277         ssize_t i;
 278         struct damon_target *t, *next;
 279
 280         damon_destroy_targets(ctx);
 281
 282         for (i = 0; i < nr_ids; i++) {
 283                 t = damon_new_target(ids[i]);
 284                 if (!t) {
 285                         pr_err("Failed to alloc damon_target\n");
 286                         /* The caller should do cleanup of the ids itself */
 287                         damon_for_each_target_safe(t, next, ctx)
 288                                 damon_destroy_target(t);
 289                         return -ENOMEM;
 290                 }
 291                 damon_add_target(ctx, t);
 292         }
 293
 294         return 0;
 295 }
 296
 297 /**
 298  * damon_set_attrs() - Set attributes for the monitoring.
 299  * @ctx:                monitoring context
 300  * @sample_int:         time interval between samplings
 301  * @aggr_int:           time interval between aggregations
 302  * @primitive_upd_int:  time interval between monitoring primitive updates
 303  * @min_nr_reg:         minimal number of regions
 304  * @max_nr_reg:         maximum number of regions
 305  *
 306  * This function should not be called while the kdamond is running.
 307  * Every time interval is in micro-seconds.
 308  *
 309  * Return: 0 on success, negative error code otherwise.
 310  */
 311 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 312                     unsigned long aggr_int, unsigned long primitive_upd_int,
 313                     unsigned long min_nr_reg, unsigned long max_nr_reg)
 314 {
 315         if (min_nr_reg < 3) {
 316                 pr_err("min_nr_regions (%lu) must be at least 3\n",
 317                                 min_nr_reg);
 318                 return -EINVAL;
 319         }
 320         if (min_nr_reg > max_nr_reg) {
 321                 pr_err("invalid nr_regions.  min (%lu) > max (%lu)\n",
 322                                 min_nr_reg, max_nr_reg);
 323                 return -EINVAL;
 324         }
 325
 326         ctx->sample_interval = sample_int;
 327         ctx->aggr_interval = aggr_int;
 328         ctx->primitive_update_interval = primitive_upd_int;
 329         ctx->min_nr_regions = min_nr_reg;
 330         ctx->max_nr_regions = max_nr_reg;
 331
 332         return 0;
 333 }
 334
 335 /**
 336  * damon_set_schemes() - Set data access monitoring based operation schemes.
 337  * @ctx:        monitoring context
 338  * @schemes:    array of the schemes
 339  * @nr_schemes: number of entries in @schemes
 340  *
 341  * This function should not be called while the kdamond of the context is
 342  * running.
 343  *
 344  * Return: 0 if success, or negative error code otherwise.
 345  */
 346 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
 347                         ssize_t nr_schemes)
 348 {
 349         struct damos *s, *next;
 350         ssize_t i;
 351
 352         damon_for_each_scheme_safe(s, next, ctx)
 353                 damon_destroy_scheme(s);
 354         for (i = 0; i < nr_schemes; i++)
 355                 damon_add_scheme(ctx, schemes[i]);
 356         return 0;
 357 }
 358
 359 /**
 360  * damon_nr_running_ctxs() - Return number of currently running contexts.
 361  */
 362 int damon_nr_running_ctxs(void)
 363 {
 364         int nr_ctxs;
 365
 366         mutex_lock(&damon_lock);
 367         nr_ctxs = nr_running_ctxs;
 368         mutex_unlock(&damon_lock);
 369
 370         return nr_ctxs;
 371 }
 372
 373 /* Returns the size upper limit for each monitoring region */
 374 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
 375 {
 376         struct damon_target *t;
 377         struct damon_region *r;
 378         unsigned long sz = 0;
 379
 380         damon_for_each_target(t, ctx) {
 381                 damon_for_each_region(r, t)
 382                         sz += r->ar.end - r->ar.start;
 383         }
 384
 385         if (ctx->min_nr_regions)
 386                 sz /= ctx->min_nr_regions;
 387         if (sz < DAMON_MIN_REGION)
 388                 sz = DAMON_MIN_REGION;
 389
 390         return sz;
 391 }
 392
 393 static int kdamond_fn(void *data);
 394
 395 /*
 396  * __damon_start() - Starts monitoring with given context.
 397  * @ctx:        monitoring context
 398  *
 399  * This function should be called while damon_lock is hold.
 400  *
 401  * Return: 0 on success, negative error code otherwise.
 402  */
 403 static int __damon_start(struct damon_ctx *ctx)
 404 {
 405         int err = -EBUSY;
 406
 407         mutex_lock(&ctx->kdamond_lock);
 408         if (!ctx->kdamond) {
 409                 err = 0;
 410                 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
 411                                 nr_running_ctxs);
 412                 if (IS_ERR(ctx->kdamond)) {
 413                         err = PTR_ERR(ctx->kdamond);
 414                         ctx->kdamond = NULL;
 415                 }
 416         }
 417         mutex_unlock(&ctx->kdamond_lock);
 418
 419         return err;
 420 }
 421
 422 /**
 423  * damon_start() - Starts the monitorings for a given group of contexts.
 424  * @ctxs:       an array of the pointers for contexts to start monitoring
 425  * @nr_ctxs:    size of @ctxs
 426  *
 427  * This function starts a group of monitoring threads for a group of monitoring
 428  * contexts.  One thread per each context is created and run in parallel.  The
 429  * caller should handle synchronization between the threads by itself.  If a
 430  * group of threads that created by other 'damon_start()' call is currently
 431  * running, this function does nothing but returns -EBUSY.
 432  *
 433  * Return: 0 on success, negative error code otherwise.
 434  */
 435 int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
 436 {
 437         int i;
 438         int err = 0;
 439
 440         mutex_lock(&damon_lock);
 441         if (nr_running_ctxs) {
 442                 mutex_unlock(&damon_lock);
 443                 return -EBUSY;
 444         }
 445
 446         for (i = 0; i < nr_ctxs; i++) {
 447                 err = __damon_start(ctxs[i]);
 448                 if (err)
 449                         break;
 450                 nr_running_ctxs++;
 451         }
 452         mutex_unlock(&damon_lock);
 453
 454         return err;
 455 }
 456
 457 /*
 458  * __damon_stop() - Stops monitoring of given context.
 459  * @ctx:        monitoring context
 460  *
 461  * Return: 0 on success, negative error code otherwise.
 462  */
 463 static int __damon_stop(struct damon_ctx *ctx)
 464 {
 465         struct task_struct *tsk;
 466
 467         mutex_lock(&ctx->kdamond_lock);
 468         tsk = ctx->kdamond;
 469         if (tsk) {
 470                 get_task_struct(tsk);
 471                 mutex_unlock(&ctx->kdamond_lock);
 472                 kthread_stop(tsk);
 473                 put_task_struct(tsk);
 474                 return 0;
 475         }
 476         mutex_unlock(&ctx->kdamond_lock);
 477
 478         return -EPERM;
 479 }
 480
 481 /**
 482  * damon_stop() - Stops the monitorings for a given group of contexts.
 483  * @ctxs:       an array of the pointers for contexts to stop monitoring
 484  * @nr_ctxs:    size of @ctxs
 485  *
 486  * Return: 0 on success, negative error code otherwise.
 487  */
 488 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
 489 {
 490         int i, err = 0;
 491
 492         for (i = 0; i < nr_ctxs; i++) {
 493                 /* nr_running_ctxs is decremented in kdamond_fn */
 494                 err = __damon_stop(ctxs[i]);
 495                 if (err)
 496                         return err;
 497         }
 498
 499         return err;
 500 }
 501
 502 /*
 503  * damon_check_reset_time_interval() - Check if a time interval is elapsed.
 504  * @baseline:   the time to check whether the interval has elapsed since
 505  * @interval:   the time interval (microseconds)
 506  *
 507  * See whether the given time interval has passed since the given baseline
 508  * time.  If so, it also updates the baseline to current time for next check.
 509  *
 510  * Return:      true if the time interval has passed, or false otherwise.
 511  */
 512 static bool damon_check_reset_time_interval(struct timespec64 *baseline,
 513                 unsigned long interval)
 514 {
 515         struct timespec64 now;
 516
 517         ktime_get_coarse_ts64(&now);
 518         if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
 519                         interval * 1000)
 520                 return false;
 521         *baseline = now;
 522         return true;
 523 }
 524
 525 /*
 526  * Check whether it is time to flush the aggregated information
 527  */
 528 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
 529 {
 530         return damon_check_reset_time_interval(&ctx->last_aggregation,
 531                         ctx->aggr_interval);
 532 }
 533
 534 /*
 535  * Reset the aggregated monitoring results ('nr_accesses' of each region).
 536  */
 537 static void kdamond_reset_aggregated(struct damon_ctx *c)
 538 {
 539         struct damon_target *t;
 540
 541         damon_for_each_target(t, c) {
 542                 struct damon_region *r;
 543
 544                 damon_for_each_region(r, t) {
 545                         trace_damon_aggregated(t, r, damon_nr_regions(t));
 546                         r->last_nr_accesses = r->nr_accesses;
 547                         r->nr_accesses = 0;
 548                 }
 549         }
 550 }
 551
 552 static void damon_split_region_at(struct damon_ctx *ctx,
 553                 struct damon_target *t, struct damon_region *r,
 554                 unsigned long sz_r);
 555
 556 static bool __damos_valid_target(struct damon_region *r, struct damos *s)
 557 {
 558         unsigned long sz;
 559
 560         sz = r->ar.end - r->ar.start;
 561         return s->min_sz_region <= sz && sz <= s->max_sz_region &&
 562                 s->min_nr_accesses <= r->nr_accesses &&
 563                 r->nr_accesses <= s->max_nr_accesses &&
 564                 s->min_age_region <= r->age && r->age <= s->max_age_region;
 565 }
 566
 567 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
 568                 struct damon_region *r, struct damos *s)
 569 {
 570         bool ret = __damos_valid_target(r, s);
 571
 572         if (!ret || !s->quota.esz || !c->primitive.get_scheme_score)
 573                 return ret;
 574
 575         return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score;
 576 }
 577
 578 static void damon_do_apply_schemes(struct damon_ctx *c,
 579                                    struct damon_target *t,
 580                                    struct damon_region *r)
 581 {
 582         struct damos *s;
 583
 584         damon_for_each_scheme(s, c) {
 585                 struct damos_quota *quota = &s->quota;
 586                 unsigned long sz = r->ar.end - r->ar.start;
 587                 struct timespec64 begin, end;
 588
 589                 if (!s->wmarks.activated)
 590                         continue;
 591
 592                 /* Check the quota */
 593                 if (quota->esz && quota->charged_sz >= quota->esz)
 594                         continue;
 595
 596                 /* Skip previously charged regions */
 597                 if (quota->charge_target_from) {
 598                         if (t != quota->charge_target_from)
 599                                 continue;
 600                         if (r == damon_last_region(t)) {
 601                                 quota->charge_target_from = NULL;
 602                                 quota->charge_addr_from = 0;
 603                                 continue;
 604                         }
 605                         if (quota->charge_addr_from &&
 606                                         r->ar.end <= quota->charge_addr_from)
 607                                 continue;
 608
 609                         if (quota->charge_addr_from && r->ar.start <
 610                                         quota->charge_addr_from) {
 611                                 sz = ALIGN_DOWN(quota->charge_addr_from -
 612                                                 r->ar.start, DAMON_MIN_REGION);
 613                                 if (!sz) {
 614                                         if (r->ar.end - r->ar.start <=
 615                                                         DAMON_MIN_REGION)
 616                                                 continue;
 617                                         sz = DAMON_MIN_REGION;
 618                                 }
 619                                 damon_split_region_at(c, t, r, sz);
 620                                 r = damon_next_region(r);
 621                                 sz = r->ar.end - r->ar.start;
 622                         }
 623                         quota->charge_target_from = NULL;
 624                         quota->charge_addr_from = 0;
 625                 }
 626
 627                 if (!damos_valid_target(c, t, r, s))
 628                         continue;
 629
 630                 /* Apply the scheme */
 631                 if (c->primitive.apply_scheme) {
 632                         if (quota->esz &&
 633                                         quota->charged_sz + sz > quota->esz) {
 634                                 sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
 635                                                 DAMON_MIN_REGION);
 636                                 if (!sz)
 637                                         goto update_stat;
 638                                 damon_split_region_at(c, t, r, sz);
 639                         }
 640                         ktime_get_coarse_ts64(&begin);
 641                         c->primitive.apply_scheme(c, t, r, s);
 642                         ktime_get_coarse_ts64(&end);
 643                         quota->total_charged_ns += timespec64_to_ns(&end) -
 644                                 timespec64_to_ns(&begin);
 645                         quota->charged_sz += sz;
 646                         if (quota->esz && quota->charged_sz >= quota->esz) {
 647                                 quota->charge_target_from = t;
 648                                 quota->charge_addr_from = r->ar.end + 1;
 649                         }
 650                 }
 651                 if (s->action != DAMOS_STAT)
 652                         r->age = 0;
 653
 654 update_stat:
 655                 s->stat_count++;
 656                 s->stat_sz += sz;
 657         }
 658 }
 659
 660 /* Shouldn't be called if quota->ms and quota->sz are zero */
 661 static void damos_set_effective_quota(struct damos_quota *quota)
 662 {
 663         unsigned long throughput;
 664         unsigned long esz;
 665
 666         if (!quota->ms) {
 667                 quota->esz = quota->sz;
 668                 return;
 669         }
 670
 671         if (quota->total_charged_ns)
 672                 throughput = quota->total_charged_sz * 1000000 /
 673                         quota->total_charged_ns;
 674         else
 675                 throughput = PAGE_SIZE * 1024;
 676         esz = throughput * quota->ms;
 677
 678         if (quota->sz && quota->sz < esz)
 679                 esz = quota->sz;
 680         quota->esz = esz;
 681 }
 682
 683 static void kdamond_apply_schemes(struct damon_ctx *c)
 684 {
 685         struct damon_target *t;
 686         struct damon_region *r, *next_r;
 687         struct damos *s;
 688
 689         damon_for_each_scheme(s, c) {
 690                 struct damos_quota *quota = &s->quota;
 691                 unsigned long cumulated_sz;
 692                 unsigned int score, max_score = 0;
 693
 694                 if (!s->wmarks.activated)
 695                         continue;
 696
 697                 if (!quota->ms && !quota->sz)
 698                         continue;
 699
 700                 /* New charge window starts */
 701                 if (time_after_eq(jiffies, quota->charged_from +
 702                                         msecs_to_jiffies(
 703                                                 quota->reset_interval))) {
 704                         quota->total_charged_sz += quota->charged_sz;
 705                         quota->charged_from = jiffies;
 706                         quota->charged_sz = 0;
 707                         damos_set_effective_quota(quota);
 708                 }
 709
 710                 if (!c->primitive.get_scheme_score)
 711                         continue;
 712
 713                 /* Fill up the score histogram */
 714                 memset(quota->histogram, 0, sizeof(quota->histogram));
 715                 damon_for_each_target(t, c) {
 716                         damon_for_each_region(r, t) {
 717                                 if (!__damos_valid_target(r, s))
 718                                         continue;
 719                                 score = c->primitive.get_scheme_score(
 720                                                 c, t, r, s);
 721                                 quota->histogram[score] +=
 722                                         r->ar.end - r->ar.start;
 723                                 if (score > max_score)
 724                                         max_score = score;
 725                         }
 726                 }
 727
 728                 /* Set the min score limit */
 729                 for (cumulated_sz = 0, score = max_score; ; score--) {
 730                         cumulated_sz += quota->histogram[score];
 731                         if (cumulated_sz >= quota->esz || !score)
 732                                 break;
 733                 }
 734                 quota->min_score = score;
 735         }
 736
 737         damon_for_each_target(t, c) {
 738                 damon_for_each_region_safe(r, next_r, t)
 739                         damon_do_apply_schemes(c, t, r);
 740         }
 741 }
 742
 743 #define sz_damon_region(r) (r->ar.end - r->ar.start)
 744
 745 /*
 746  * Merge two adjacent regions into one region
 747  */
 748 static void damon_merge_two_regions(struct damon_target *t,
 749                 struct damon_region *l, struct damon_region *r)
 750 {
 751         unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
 752
 753         l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
 754                         (sz_l + sz_r);
 755         l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
 756         l->ar.end = r->ar.end;
 757         damon_destroy_region(r, t);
 758 }
 759
 760 #define diff_of(a, b) (a > b ? a - b : b - a)
 761
 762 /*
 763  * Merge adjacent regions having similar access frequencies
 764  *
 765  * t            target affected by this merge operation
 766  * thres        '->nr_accesses' diff threshold for the merge
 767  * sz_limit     size upper limit of each region
 768  */
 769 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
 770                                    unsigned long sz_limit)
 771 {
 772         struct damon_region *r, *prev = NULL, *next;
 773
 774         damon_for_each_region_safe(r, next, t) {
 775                 if (diff_of(r->nr_accesses, r->last_nr_accesses) > thres)
 776                         r->age = 0;
 777                 else
 778                         r->age++;
 779
 780                 if (prev && prev->ar.end == r->ar.start &&
 781                     diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
 782                     sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
 783                         damon_merge_two_regions(t, prev, r);
 784                 else
 785                         prev = r;
 786         }
 787 }
 788
 789 /*
 790  * Merge adjacent regions having similar access frequencies
 791  *
 792  * threshold    '->nr_accesses' diff threshold for the merge
 793  * sz_limit     size upper limit of each region
 794  *
 795  * This function merges monitoring target regions which are adjacent and their
 796  * access frequencies are similar.  This is for minimizing the monitoring
 797  * overhead under the dynamically changeable access pattern.  If a merge was
 798  * unnecessarily made, later 'kdamond_split_regions()' will revert it.
 799  */
 800 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
 801                                   unsigned long sz_limit)
 802 {
 803         struct damon_target *t;
 804
 805         damon_for_each_target(t, c)
 806                 damon_merge_regions_of(t, threshold, sz_limit);
 807 }
 808
 809 /*
 810  * Split a region in two
 811  *
 812  * r            the region to be split
 813  * sz_r         size of the first sub-region that will be made
 814  */
 815 static void damon_split_region_at(struct damon_ctx *ctx,
 816                 struct damon_target *t, struct damon_region *r,
 817                 unsigned long sz_r)
 818 {
 819         struct damon_region *new;
 820
 821         new = damon_new_region(r->ar.start + sz_r, r->ar.end);
 822         if (!new)
 823                 return;
 824
 825         r->ar.end = new->ar.start;
 826
 827         new->age = r->age;
 828         new->last_nr_accesses = r->last_nr_accesses;
 829
 830         damon_insert_region(new, r, damon_next_region(r), t);
 831 }
 832
 833 /* Split every region in the given target into 'nr_subs' regions */
 834 static void damon_split_regions_of(struct damon_ctx *ctx,
 835                                      struct damon_target *t, int nr_subs)
 836 {
 837         struct damon_region *r, *next;
 838         unsigned long sz_region, sz_sub = 0;
 839         int i;
 840
 841         damon_for_each_region_safe(r, next, t) {
 842                 sz_region = r->ar.end - r->ar.start;
 843
 844                 for (i = 0; i < nr_subs - 1 &&
 845                                 sz_region > 2 * DAMON_MIN_REGION; i++) {
 846                         /*
 847                          * Randomly select size of left sub-region to be at
 848                          * least 10 percent and at most 90% of original region
 849                          */
 850                         sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
 851                                         sz_region / 10, DAMON_MIN_REGION);
 852                         /* Do not allow blank region */
 853                         if (sz_sub == 0 || sz_sub >= sz_region)
 854                                 continue;
 855
 856                         damon_split_region_at(ctx, t, r, sz_sub);
 857                         sz_region = sz_sub;
 858                 }
 859         }
 860 }
 861
 862 /*
 863  * Split every target region into randomly-sized small regions
 864  *
 865  * This function splits every target region into random-sized small regions if
 866  * current total number of the regions is equal or smaller than half of the
 867  * user-specified maximum number of regions.  This is for maximizing the
 868  * monitoring accuracy under the dynamically changeable access patterns.  If a
 869  * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
 870  * it.
 871  */
 872 static void kdamond_split_regions(struct damon_ctx *ctx)
 873 {
 874         struct damon_target *t;
 875         unsigned int nr_regions = 0;
 876         static unsigned int last_nr_regions;
 877         int nr_subregions = 2;
 878
 879         damon_for_each_target(t, ctx)
 880                 nr_regions += damon_nr_regions(t);
 881
 882         if (nr_regions > ctx->max_nr_regions / 2)
 883                 return;
 884
 885         /* Maybe the middle of the region has different access frequency */
 886         if (last_nr_regions == nr_regions &&
 887                         nr_regions < ctx->max_nr_regions / 3)
 888                 nr_subregions = 3;
 889
 890         damon_for_each_target(t, ctx)
 891                 damon_split_regions_of(ctx, t, nr_subregions);
 892
 893         last_nr_regions = nr_regions;
 894 }
 895
 896 /*
 897  * Check whether it is time to check and apply the target monitoring regions
 898  *
 899  * Returns true if it is.
 900  */
 901 static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
 902 {
 903         return damon_check_reset_time_interval(&ctx->last_primitive_update,
 904                         ctx->primitive_update_interval);
 905 }
 906
 907 /*
 908  * Check whether current monitoring should be stopped
 909  *
 910  * The monitoring is stopped when either the user requested to stop, or all
 911  * monitoring targets are invalid.
 912  *
 913  * Returns true if need to stop current monitoring.
 914  */
 915 static bool kdamond_need_stop(struct damon_ctx *ctx)
 916 {
 917         struct damon_target *t;
 918
 919         if (kthread_should_stop())
 920                 return true;
 921
 922         if (!ctx->primitive.target_valid)
 923                 return false;
 924
 925         damon_for_each_target(t, ctx) {
 926                 if (ctx->primitive.target_valid(t))
 927                         return false;
 928         }
 929
 930         return true;
 931 }
 932
 933 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
 934 {
 935         struct sysinfo i;
 936
 937         switch (metric) {
 938         case DAMOS_WMARK_FREE_MEM_RATE:
 939                 si_meminfo(&i);
 940                 return i.freeram * 1000 / i.totalram;
 941         default:
 942                 break;
 943         }
 944         return -EINVAL;
 945 }
 946
 947 /*
 948  * Returns zero if the scheme is active.  Else, returns time to wait for next
 949  * watermark check in micro-seconds.
 950  */
 951 static unsigned long damos_wmark_wait_us(struct damos *scheme)
 952 {
 953         unsigned long metric;
 954
 955         if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
 956                 return 0;
 957
 958         metric = damos_wmark_metric_value(scheme->wmarks.metric);
 959         /* higher than high watermark or lower than low watermark */
 960         if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
 961                 if (scheme->wmarks.activated)
 962                         pr_debug("deactivate a scheme (%d) for %s wmark\n",
 963                                         scheme->action,
 964                                         metric > scheme->wmarks.high ?
 965                                         "high" : "low");
 966                 scheme->wmarks.activated = false;
 967                 return scheme->wmarks.interval;
 968         }
 969
 970         /* inactive and higher than middle watermark */
 971         if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
 972                         !scheme->wmarks.activated)
 973                 return scheme->wmarks.interval;
 974
 975         if (!scheme->wmarks.activated)
 976                 pr_debug("activate a scheme (%d)\n", scheme->action);
 977         scheme->wmarks.activated = true;
 978         return 0;
 979 }
 980
 981 static void kdamond_usleep(unsigned long usecs)
 982 {
 983         if (usecs > 100 * 1000)
 984                 schedule_timeout_interruptible(usecs_to_jiffies(usecs));
 985         else
 986                 usleep_range(usecs, usecs + 1);
 987 }
 988
 989 /* Returns negative error code if it's not activated but should return */
 990 static int kdamond_wait_activation(struct damon_ctx *ctx)
 991 {
 992         struct damos *s;
 993         unsigned long wait_time;
 994         unsigned long min_wait_time = 0;
 995
 996         while (!kdamond_need_stop(ctx)) {
 997                 damon_for_each_scheme(s, ctx) {
 998                         wait_time = damos_wmark_wait_us(s);
 999                         if (!min_wait_time || wait_time < min_wait_time)
1000                                 min_wait_time = wait_time;
1001                 }
1002                 if (!min_wait_time)
1003                         return 0;
1004
1005                 kdamond_usleep(min_wait_time);
1006         }
1007         return -EBUSY;
1008 }
1009
1010 /*
1011  * The monitoring daemon that runs as a kernel thread
1012  */
1013 static int kdamond_fn(void *data)
1014 {
1015         struct damon_ctx *ctx = (struct damon_ctx *)data;
1016         struct damon_target *t;
1017         struct damon_region *r, *next;
1018         unsigned int max_nr_accesses = 0;
1019         unsigned long sz_limit = 0;
1020         bool done = false;
1021
1022         pr_debug("kdamond (%d) starts\n", current->pid);
1023
1024         if (ctx->primitive.init)
1025                 ctx->primitive.init(ctx);
1026         if (ctx->callback.before_start && ctx->callback.before_start(ctx))
1027                 done = true;
1028
1029         sz_limit = damon_region_sz_limit(ctx);
1030
1031         while (!kdamond_need_stop(ctx) && !done) {
1032                 if (kdamond_wait_activation(ctx))
1033                         continue;
1034
1035                 if (ctx->primitive.prepare_access_checks)
1036                         ctx->primitive.prepare_access_checks(ctx);
1037                 if (ctx->callback.after_sampling &&
1038                                 ctx->callback.after_sampling(ctx))
1039                         done = true;
1040
1041                 usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
1042
1043                 if (ctx->primitive.check_accesses)
1044                         max_nr_accesses = ctx->primitive.check_accesses(ctx);
1045
1046                 if (kdamond_aggregate_interval_passed(ctx)) {
1047                         kdamond_merge_regions(ctx,
1048                                         max_nr_accesses / 10,
1049                                         sz_limit);
1050                         if (ctx->callback.after_aggregation &&
1051                                         ctx->callback.after_aggregation(ctx))
1052                                 done = true;
1053                         kdamond_apply_schemes(ctx);
1054                         kdamond_reset_aggregated(ctx);
1055                         kdamond_split_regions(ctx);
1056                         if (ctx->primitive.reset_aggregated)
1057                                 ctx->primitive.reset_aggregated(ctx);
1058                 }
1059
1060                 if (kdamond_need_update_primitive(ctx)) {
1061                         if (ctx->primitive.update)
1062                                 ctx->primitive.update(ctx);
1063                         sz_limit = damon_region_sz_limit(ctx);
1064                 }
1065         }
1066         damon_for_each_target(t, ctx) {
1067                 damon_for_each_region_safe(r, next, t)
1068                         damon_destroy_region(r, t);
1069         }
1070
1071         if (ctx->callback.before_terminate)
1072                 ctx->callback.before_terminate(ctx);
1073         if (ctx->primitive.cleanup)
1074                 ctx->primitive.cleanup(ctx);
1075
1076         pr_debug("kdamond (%d) finishes\n", current->pid);
1077         mutex_lock(&ctx->kdamond_lock);
1078         ctx->kdamond = NULL;
1079         mutex_unlock(&ctx->kdamond_lock);
1080
1081         mutex_lock(&damon_lock);
1082         nr_running_ctxs--;
1083         mutex_unlock(&damon_lock);
1084
1085         return 0;
1086 }
1087
1088 #include "core-test.h"