tools/tracing/rtla/src/utils.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]>
   4  */
   5
   6 #define _GNU_SOURCE
   7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
   8 #include <cpuidle.h>
   9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
  10 #include <dirent.h>
  11 #include <stdarg.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <unistd.h>
  15 #include <ctype.h>
  16 #include <errno.h>
  17 #include <fcntl.h>
  18 #include <sched.h>
  19 #include <stdio.h>
  20
  21 #include "utils.h"
  22
  23 #define MAX_MSG_LENGTH  1024
  24 int config_debug;
  25
  26 /*
  27  * err_msg - print an error message to the stderr
  28  */
  29 void err_msg(const char *fmt, ...)
  30 {
  31         char message[MAX_MSG_LENGTH];
  32         va_list ap;
  33
  34         va_start(ap, fmt);
  35         vsnprintf(message, sizeof(message), fmt, ap);
  36         va_end(ap);
  37
  38         fprintf(stderr, "%s", message);
  39 }
  40
  41 /*
  42  * debug_msg - print a debug message to stderr if debug is set
  43  */
  44 void debug_msg(const char *fmt, ...)
  45 {
  46         char message[MAX_MSG_LENGTH];
  47         va_list ap;
  48
  49         if (!config_debug)
  50                 return;
  51
  52         va_start(ap, fmt);
  53         vsnprintf(message, sizeof(message), fmt, ap);
  54         va_end(ap);
  55
  56         fprintf(stderr, "%s", message);
  57 }
  58
  59 /*
  60  * get_llong_from_str - get a long long int from a string
  61  */
  62 long long get_llong_from_str(char *start)
  63 {
  64         long long value;
  65         char *end;
  66
  67         errno = 0;
  68         value = strtoll(start, &end, 10);
  69         if (errno || start == end)
  70                 return -1;
  71
  72         return value;
  73 }
  74
  75 /*
  76  * get_duration - fill output with a human readable duration since start_time
  77  */
  78 void get_duration(time_t start_time, char *output, int output_size)
  79 {
  80         time_t now = time(NULL);
  81         struct tm *tm_info;
  82         time_t duration;
  83
  84         duration = difftime(now, start_time);
  85         tm_info = gmtime(&duration);
  86
  87         snprintf(output, output_size, "%3d %02d:%02d:%02d",
  88                         tm_info->tm_yday,
  89                         tm_info->tm_hour,
  90                         tm_info->tm_min,
  91                         tm_info->tm_sec);
  92 }
  93
  94 /*
  95  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
  96  *
  97  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
  98  * filling cpu_set_t argument.
  99  *
 100  * Returns 1 on success, 0 otherwise.
 101  */
 102 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
 103 {
 104         const char *p;
 105         int end_cpu;
 106         int nr_cpus;
 107         int cpu;
 108         int i;
 109
 110         CPU_ZERO(set);
 111
 112         nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
 113
 114         for (p = cpu_list; *p; ) {
 115                 cpu = atoi(p);
 116                 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
 117                         goto err;
 118
 119                 while (isdigit(*p))
 120                         p++;
 121                 if (*p == '-') {
 122                         p++;
 123                         end_cpu = atoi(p);
 124                         if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
 125                                 goto err;
 126                         while (isdigit(*p))
 127                                 p++;
 128                 } else
 129                         end_cpu = cpu;
 130
 131                 if (cpu == end_cpu) {
 132                         debug_msg("cpu_set: adding cpu %d\n", cpu);
 133                         CPU_SET(cpu, set);
 134                 } else {
 135                         for (i = cpu; i <= end_cpu; i++) {
 136                                 debug_msg("cpu_set: adding cpu %d\n", i);
 137                                 CPU_SET(i, set);
 138                         }
 139                 }
 140
 141                 if (*p == ',')
 142                         p++;
 143         }
 144
 145         return 0;
 146 err:
 147         debug_msg("Error parsing the cpu set %s\n", cpu_list);
 148         return 1;
 149 }
 150
 151 /*
 152  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
 153  */
 154 long parse_seconds_duration(char *val)
 155 {
 156         char *end;
 157         long t;
 158
 159         t = strtol(val, &end, 10);
 160
 161         if (end) {
 162                 switch (*end) {
 163                 case 's':
 164                 case 'S':
 165                         break;
 166                 case 'm':
 167                 case 'M':
 168                         t *= 60;
 169                         break;
 170                 case 'h':
 171                 case 'H':
 172                         t *= 60 * 60;
 173                         break;
 174
 175                 case 'd':
 176                 case 'D':
 177                         t *= 24 * 60 * 60;
 178                         break;
 179                 }
 180         }
 181
 182         return t;
 183 }
 184
 185 /*
 186  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
 187  */
 188 long parse_ns_duration(char *val)
 189 {
 190         char *end;
 191         long t;
 192
 193         t = strtol(val, &end, 10);
 194
 195         if (end) {
 196                 if (!strncmp(end, "ns", 2)) {
 197                         return t;
 198                 } else if (!strncmp(end, "us", 2)) {
 199                         t *= 1000;
 200                         return t;
 201                 } else if (!strncmp(end, "ms", 2)) {
 202                         t *= 1000 * 1000;
 203                         return t;
 204                 } else if (!strncmp(end, "s", 1)) {
 205                         t *= 1000 * 1000 * 1000;
 206                         return t;
 207                 }
 208                 return -1;
 209         }
 210
 211         return t;
 212 }
 213
 214 /*
 215  * This is a set of helper functions to use SCHED_DEADLINE.
 216  */
 217 #ifndef __NR_sched_setattr
 218 # ifdef __x86_64__
 219 #  define __NR_sched_setattr    314
 220 # elif __i386__
 221 #  define __NR_sched_setattr    351
 222 # elif __arm__
 223 #  define __NR_sched_setattr    380
 224 # elif __aarch64__ || __riscv
 225 #  define __NR_sched_setattr    274
 226 # elif __powerpc__
 227 #  define __NR_sched_setattr    355
 228 # elif __s390x__
 229 #  define __NR_sched_setattr    345
 230 # endif
 231 #endif
 232
 233 #define SCHED_DEADLINE          6
 234
 235 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
 236                                 unsigned int flags) {
 237         return syscall(__NR_sched_setattr, pid, attr, flags);
 238 }
 239
 240 int __set_sched_attr(int pid, struct sched_attr *attr)
 241 {
 242         int flags = 0;
 243         int retval;
 244
 245         retval = syscall_sched_setattr(pid, attr, flags);
 246         if (retval < 0) {
 247                 err_msg("Failed to set sched attributes to the pid %d: %s\n",
 248                         pid, strerror(errno));
 249                 return 1;
 250         }
 251
 252         return 0;
 253 }
 254
 255 /*
 256  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
 257  *
 258  * Check if the procfs entry is a directory of a process, and then check if the
 259  * process has a comm with the prefix set in char *comm_prefix. As the
 260  * current users of this function only check for kernel threads, there is no
 261  * need to check for the threads for the process.
 262  *
 263  * Return: True if the proc_entry contains a comm file with comm_prefix*.
 264  * Otherwise returns false.
 265  */
 266 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
 267 {
 268         char buffer[MAX_PATH];
 269         int comm_fd, retval;
 270         char *t_name;
 271
 272         if (proc_entry->d_type != DT_DIR)
 273                 return 0;
 274
 275         if (*proc_entry->d_name == '.')
 276                 return 0;
 277
 278         /* check if the string is a pid */
 279         for (t_name = proc_entry->d_name; t_name; t_name++) {
 280                 if (!isdigit(*t_name))
 281                         break;
 282         }
 283
 284         if (*t_name != '\0')
 285                 return 0;
 286
 287         snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
 288         comm_fd = open(buffer, O_RDONLY);
 289         if (comm_fd < 0)
 290                 return 0;
 291
 292         memset(buffer, 0, MAX_PATH);
 293         retval = read(comm_fd, buffer, MAX_PATH);
 294
 295         close(comm_fd);
 296
 297         if (retval <= 0)
 298                 return 0;
 299
 300         retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
 301         if (retval)
 302                 return 0;
 303
 304         /* comm already have \n */
 305         debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
 306
 307         return 1;
 308 }
 309
 310 /*
 311  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
 312  *
 313  * This function uses procfs to list the currently running threads and then set the
 314  * sched_attr *attr to the threads that start with char *comm_prefix. It is
 315  * mainly used to set the priority to the kernel threads created by the
 316  * tracers.
 317  */
 318 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
 319 {
 320         struct dirent *proc_entry;
 321         DIR *procfs;
 322         int retval;
 323
 324         if (strlen(comm_prefix) >= MAX_PATH) {
 325                 err_msg("Command prefix is too long: %d < strlen(%s)\n",
 326                         MAX_PATH, comm_prefix);
 327                 return 1;
 328         }
 329
 330         procfs = opendir("/proc");
 331         if (!procfs) {
 332                 err_msg("Could not open procfs\n");
 333                 return 1;
 334         }
 335
 336         while ((proc_entry = readdir(procfs))) {
 337
 338                 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
 339                 if (!retval)
 340                         continue;
 341
 342                 /* procfs_is_workload_pid confirmed it is a pid */
 343                 retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
 344                 if (retval) {
 345                         err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
 346                         goto out_err;
 347                 }
 348
 349                 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
 350         }
 351         return 0;
 352
 353 out_err:
 354         closedir(procfs);
 355         return 1;
 356 }
 357
 358 #define INVALID_VAL     (~0L)
 359 static long get_long_ns_after_colon(char *start)
 360 {
 361         long val = INVALID_VAL;
 362
 363         /* find the ":" */
 364         start = strstr(start, ":");
 365         if (!start)
 366                 return -1;
 367
 368         /* skip ":" */
 369         start++;
 370         val = parse_ns_duration(start);
 371
 372         return val;
 373 }
 374
 375 static long get_long_after_colon(char *start)
 376 {
 377         long val = INVALID_VAL;
 378
 379         /* find the ":" */
 380         start = strstr(start, ":");
 381         if (!start)
 382                 return -1;
 383
 384         /* skip ":" */
 385         start++;
 386         val = get_llong_from_str(start);
 387
 388         return val;
 389 }
 390
 391 /*
 392  * parse priority in the format:
 393  * SCHED_OTHER:
 394  *              o:<prio>
 395  *              O:<prio>
 396  * SCHED_RR:
 397  *              r:<prio>
 398  *              R:<prio>
 399  * SCHED_FIFO:
 400  *              f:<prio>
 401  *              F:<prio>
 402  * SCHED_DEADLINE:
 403  *              d:runtime:period
 404  *              D:runtime:period
 405  */
 406 int parse_prio(char *arg, struct sched_attr *sched_param)
 407 {
 408         long prio;
 409         long runtime;
 410         long period;
 411
 412         memset(sched_param, 0, sizeof(*sched_param));
 413         sched_param->size = sizeof(*sched_param);
 414
 415         switch (arg[0]) {
 416         case 'd':
 417         case 'D':
 418                 /* d:runtime:period */
 419                 if (strlen(arg) < 4)
 420                         return -1;
 421
 422                 runtime = get_long_ns_after_colon(arg);
 423                 if (runtime == INVALID_VAL)
 424                         return -1;
 425
 426                 period = get_long_ns_after_colon(&arg[2]);
 427                 if (period == INVALID_VAL)
 428                         return -1;
 429
 430                 if (runtime > period)
 431                         return -1;
 432
 433                 sched_param->sched_policy   = SCHED_DEADLINE;
 434                 sched_param->sched_runtime  = runtime;
 435                 sched_param->sched_deadline = period;
 436                 sched_param->sched_period   = period;
 437                 break;
 438         case 'f':
 439         case 'F':
 440                 /* f:prio */
 441                 prio = get_long_after_colon(arg);
 442                 if (prio == INVALID_VAL)
 443                         return -1;
 444
 445                 if (prio < sched_get_priority_min(SCHED_FIFO))
 446                         return -1;
 447                 if (prio > sched_get_priority_max(SCHED_FIFO))
 448                         return -1;
 449
 450                 sched_param->sched_policy   = SCHED_FIFO;
 451                 sched_param->sched_priority = prio;
 452                 break;
 453         case 'r':
 454         case 'R':
 455                 /* r:prio */
 456                 prio = get_long_after_colon(arg);
 457                 if (prio == INVALID_VAL)
 458                         return -1;
 459
 460                 if (prio < sched_get_priority_min(SCHED_RR))
 461                         return -1;
 462                 if (prio > sched_get_priority_max(SCHED_RR))
 463                         return -1;
 464
 465                 sched_param->sched_policy   = SCHED_RR;
 466                 sched_param->sched_priority = prio;
 467                 break;
 468         case 'o':
 469         case 'O':
 470                 /* o:prio */
 471                 prio = get_long_after_colon(arg);
 472                 if (prio == INVALID_VAL)
 473                         return -1;
 474
 475                 if (prio < MIN_NICE)
 476                         return -1;
 477                 if (prio > MAX_NICE)
 478                         return -1;
 479
 480                 sched_param->sched_policy   = SCHED_OTHER;
 481                 sched_param->sched_nice = prio;
 482                 break;
 483         default:
 484                 return -1;
 485         }
 486         return 0;
 487 }
 488
 489 /*
 490  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
 491  *
 492  * This is used to reduce the exit from idle latency. The value
 493  * will be reset once the file descriptor of /dev/cpu_dma_latecy
 494  * is closed.
 495  *
 496  * Return: the /dev/cpu_dma_latecy file descriptor
 497  */
 498 int set_cpu_dma_latency(int32_t latency)
 499 {
 500         int retval;
 501         int fd;
 502
 503         fd = open("/dev/cpu_dma_latency", O_RDWR);
 504         if (fd < 0) {
 505                 err_msg("Error opening /dev/cpu_dma_latency\n");
 506                 return -1;
 507         }
 508
 509         retval = write(fd, &latency, 4);
 510         if (retval < 1) {
 511                 err_msg("Error setting /dev/cpu_dma_latency\n");
 512                 close(fd);
 513                 return -1;
 514         }
 515
 516         debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
 517
 518         return fd;
 519 }
 520
 521 #ifdef HAVE_LIBCPUPOWER_SUPPORT
 522 static unsigned int **saved_cpu_idle_disable_state;
 523 static size_t saved_cpu_idle_disable_state_alloc_ctr;
 524
 525 /*
 526  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
 527  *
 528  * Saves the current disable of all idle states of a cpu, to be subsequently
 529  * restored via restore_cpu_idle_disable_state.
 530  *
 531  * Return: idle state count on success, negative on error
 532  */
 533 int save_cpu_idle_disable_state(unsigned int cpu)
 534 {
 535         unsigned int nr_states;
 536         unsigned int state;
 537         int disabled;
 538         int nr_cpus;
 539
 540         nr_states = cpuidle_state_count(cpu);
 541
 542         if (nr_states == 0)
 543                 return 0;
 544
 545         if (saved_cpu_idle_disable_state == NULL) {
 546                 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
 547                 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
 548                 if (!saved_cpu_idle_disable_state)
 549                         return -1;
 550         }
 551
 552         saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
 553         if (!saved_cpu_idle_disable_state[cpu])
 554                 return -1;
 555         saved_cpu_idle_disable_state_alloc_ctr++;
 556
 557         for (state = 0; state < nr_states; state++) {
 558                 disabled = cpuidle_is_state_disabled(cpu, state);
 559                 if (disabled < 0)
 560                         return disabled;
 561                 saved_cpu_idle_disable_state[cpu][state] = disabled;
 562         }
 563
 564         return nr_states;
 565 }
 566
 567 /*
 568  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
 569  *
 570  * Restores the current disable state of all idle states of a cpu that was
 571  * previously saved by save_cpu_idle_disable_state.
 572  *
 573  * Return: idle state count on success, negative on error
 574  */
 575 int restore_cpu_idle_disable_state(unsigned int cpu)
 576 {
 577         unsigned int nr_states;
 578         unsigned int state;
 579         int disabled;
 580         int result;
 581
 582         nr_states = cpuidle_state_count(cpu);
 583
 584         if (nr_states == 0)
 585                 return 0;
 586
 587         if (!saved_cpu_idle_disable_state)
 588                 return -1;
 589
 590         for (state = 0; state < nr_states; state++) {
 591                 if (!saved_cpu_idle_disable_state[cpu])
 592                         return -1;
 593                 disabled = saved_cpu_idle_disable_state[cpu][state];
 594                 result = cpuidle_state_disable(cpu, state, disabled);
 595                 if (result < 0)
 596                         return result;
 597         }
 598
 599         free(saved_cpu_idle_disable_state[cpu]);
 600         saved_cpu_idle_disable_state[cpu] = NULL;
 601         saved_cpu_idle_disable_state_alloc_ctr--;
 602         if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
 603                 free(saved_cpu_idle_disable_state);
 604                 saved_cpu_idle_disable_state = NULL;
 605         }
 606
 607         return nr_states;
 608 }
 609
 610 /*
 611  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
 612  *
 613  * Frees the memory used for storing cpu idle state disable for all cpus
 614  * and states.
 615  *
 616  * Normally, the memory is freed automatically in
 617  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
 618  * error.
 619  */
 620 void free_cpu_idle_disable_states(void)
 621 {
 622         int cpu;
 623         int nr_cpus;
 624
 625         if (!saved_cpu_idle_disable_state)
 626                 return;
 627
 628         nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
 629
 630         for (cpu = 0; cpu < nr_cpus; cpu++) {
 631                 free(saved_cpu_idle_disable_state[cpu]);
 632                 saved_cpu_idle_disable_state[cpu] = NULL;
 633         }
 634
 635         free(saved_cpu_idle_disable_state);
 636         saved_cpu_idle_disable_state = NULL;
 637 }
 638
 639 /*
 640  * set_deepest_cpu_idle_state - limit idle state of cpu
 641  *
 642  * Disables all idle states deeper than the one given in
 643  * deepest_state (assuming states with higher number are deeper).
 644  *
 645  * This is used to reduce the exit from idle latency. Unlike
 646  * set_cpu_dma_latency, it can disable idle states per cpu.
 647  *
 648  * Return: idle state count on success, negative on error
 649  */
 650 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
 651 {
 652         unsigned int nr_states;
 653         unsigned int state;
 654         int result;
 655
 656         nr_states = cpuidle_state_count(cpu);
 657
 658         for (state = deepest_state + 1; state < nr_states; state++) {
 659                 result = cpuidle_state_disable(cpu, state, 1);
 660                 if (result < 0)
 661                         return result;
 662         }
 663
 664         return nr_states;
 665 }
 666 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
 667
 668 #define _STR(x) #x
 669 #define STR(x) _STR(x)
 670
 671 /*
 672  * find_mount - find a the mount point of a given fs
 673  *
 674  * Returns 0 if mount is not found, otherwise return 1 and fill mp
 675  * with the mount point.
 676  */
 677 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
 678 {
 679         char mount_point[MAX_PATH+1];
 680         char type[100];
 681         int found = 0;
 682         FILE *fp;
 683
 684         fp = fopen("/proc/mounts", "r");
 685         if (!fp)
 686                 return 0;
 687
 688         while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
 689                 if (strcmp(type, fs) == 0) {
 690                         found = 1;
 691                         break;
 692                 }
 693         }
 694         fclose(fp);
 695
 696         if (!found)
 697                 return 0;
 698
 699         memset(mp, 0, sizeof_mp);
 700         strncpy(mp, mount_point, sizeof_mp - 1);
 701
 702         debug_msg("Fs %s found at %s\n", fs, mp);
 703         return 1;
 704 }
 705
 706 /*
 707  * get_self_cgroup - get the current thread cgroup path
 708  *
 709  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
 710  *
 711  * 0::/user.slice/user-0.slice/session-3.scope'\n'
 712  *
 713  * This function is interested in the content after the second : and before the '\n'.
 714  *
 715  * Returns 1 if a string was found, 0 otherwise.
 716  */
 717 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
 718 {
 719         char path[MAX_PATH], *start;
 720         int fd, retval;
 721
 722         snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
 723
 724         fd = open(path, O_RDONLY);
 725         if (fd < 0)
 726                 return 0;
 727
 728         retval = read(fd, path, MAX_PATH);
 729
 730         close(fd);
 731
 732         if (retval <= 0)
 733                 return 0;
 734
 735         start = path;
 736
 737         start = strstr(start, ":");
 738         if (!start)
 739                 return 0;
 740
 741         /* skip ":" */
 742         start++;
 743
 744         start = strstr(start, ":");
 745         if (!start)
 746                 return 0;
 747
 748         /* skip ":" */
 749         start++;
 750
 751         if (strlen(start) >= sizeof_self_cg)
 752                 return 0;
 753
 754         snprintf(self_cg, sizeof_self_cg, "%s", start);
 755
 756         /* Swap '\n' with '\0' */
 757         start = strstr(self_cg, "\n");
 758
 759         /* there must be '\n' */
 760         if (!start)
 761                 return 0;
 762
 763         /* ok, it found a string after the second : and before the \n */
 764         *start = '\0';
 765
 766         return 1;
 767 }
 768
 769 /*
 770  * set_comm_cgroup - Set cgroup to pid_t pid
 771  *
 772  * If cgroup argument is not NULL, the threads will move to the given cgroup.
 773  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
 774  *
 775  * Supports cgroup v2.
 776  *
 777  * Returns 1 on success, 0 otherwise.
 778  */
 779 int set_pid_cgroup(pid_t pid, const char *cgroup)
 780 {
 781         char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
 782         char cgroup_procs[MAX_PATH];
 783         char pid_str[24];
 784         int retval;
 785         int cg_fd;
 786
 787         retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
 788         if (!retval) {
 789                 err_msg("Did not find cgroupv2 mount point\n");
 790                 return 0;
 791         }
 792
 793         if (!cgroup) {
 794                 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
 795                                 sizeof(cgroup_path) - strlen(cgroup_path));
 796                 if (!retval) {
 797                         err_msg("Did not find self cgroup\n");
 798                         return 0;
 799                 }
 800         } else {
 801                 snprintf(&cgroup_path[strlen(cgroup_path)],
 802                                 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
 803         }
 804
 805         snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
 806
 807         debug_msg("Using cgroup path at: %s\n", cgroup_procs);
 808
 809         cg_fd = open(cgroup_procs, O_RDWR);
 810         if (cg_fd < 0)
 811                 return 0;
 812
 813         snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
 814
 815         retval = write(cg_fd, pid_str, strlen(pid_str));
 816         if (retval < 0)
 817                 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
 818                                 pid_str, strerror(errno));
 819         else
 820                 debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
 821
 822         close(cg_fd);
 823
 824         return (retval >= 0);
 825 }
 826
 827 /**
 828  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
 829  *
 830  * If cgroup argument is not NULL, the threads will move to the given cgroup.
 831  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
 832  *
 833  * Supports cgroup v2.
 834  *
 835  * Returns 1 on success, 0 otherwise.
 836  */
 837 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
 838 {
 839         char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
 840         char cgroup_procs[MAX_PATH];
 841         struct dirent *proc_entry;
 842         DIR *procfs;
 843         int retval;
 844         int cg_fd;
 845
 846         if (strlen(comm_prefix) >= MAX_PATH) {
 847                 err_msg("Command prefix is too long: %d < strlen(%s)\n",
 848                         MAX_PATH, comm_prefix);
 849                 return 0;
 850         }
 851
 852         retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
 853         if (!retval) {
 854                 err_msg("Did not find cgroupv2 mount point\n");
 855                 return 0;
 856         }
 857
 858         if (!cgroup) {
 859                 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
 860                                 sizeof(cgroup_path) - strlen(cgroup_path));
 861                 if (!retval) {
 862                         err_msg("Did not find self cgroup\n");
 863                         return 0;
 864                 }
 865         } else {
 866                 snprintf(&cgroup_path[strlen(cgroup_path)],
 867                                 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
 868         }
 869
 870         snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
 871
 872         debug_msg("Using cgroup path at: %s\n", cgroup_procs);
 873
 874         cg_fd = open(cgroup_procs, O_RDWR);
 875         if (cg_fd < 0)
 876                 return 0;
 877
 878         procfs = opendir("/proc");
 879         if (!procfs) {
 880                 err_msg("Could not open procfs\n");
 881                 goto out_cg;
 882         }
 883
 884         while ((proc_entry = readdir(procfs))) {
 885
 886                 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
 887                 if (!retval)
 888                         continue;
 889
 890                 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
 891                 if (retval < 0) {
 892                         err_msg("Error setting cgroup attributes for pid:%s - %s\n",
 893                                 proc_entry->d_name, strerror(errno));
 894                         goto out_procfs;
 895                 }
 896
 897                 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
 898         }
 899
 900         closedir(procfs);
 901         close(cg_fd);
 902         return 1;
 903
 904 out_procfs:
 905         closedir(procfs);
 906 out_cg:
 907         close(cg_fd);
 908         return 0;
 909 }
 910
 911 /**
 912  * auto_house_keeping - Automatically move rtla out of measurement threads
 913  *
 914  * Try to move rtla away from the tracer, if possible.
 915  *
 916  * Returns 1 on success, 0 otherwise.
 917  */
 918 int auto_house_keeping(cpu_set_t *monitored_cpus)
 919 {
 920         cpu_set_t rtla_cpus, house_keeping_cpus;
 921         int retval;
 922
 923         /* first get the CPUs in which rtla can actually run. */
 924         retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
 925         if (retval == -1) {
 926                 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
 927                 return 0;
 928         }
 929
 930         /* then check if the existing setup is already good. */
 931         CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
 932         if (!CPU_COUNT(&house_keeping_cpus)) {
 933                 debug_msg("rtla and the monitored CPUs do not share CPUs.");
 934                 debug_msg("Skipping auto house-keeping\n");
 935                 return 1;
 936         }
 937
 938         /* remove the intersection */
 939         CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
 940
 941         /* get only those that rtla can run */
 942         CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
 943
 944         /* is there any cpu left? */
 945         if (!CPU_COUNT(&house_keeping_cpus)) {
 946                 debug_msg("Could not find any CPU for auto house-keeping\n");
 947                 return 0;
 948         }
 949
 950         retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
 951         if (retval == -1) {
 952                 debug_msg("Could not set affinity for auto house-keeping\n");
 953                 return 0;
 954         }
 955
 956         debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
 957
 958         return 1;
 959 }