]> Git Repo - J-linux.git/blob - tools/testing/selftests/rseq/param_test.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[J-linux.git] / tools / testing / selftests / rseq / param_test.c
1 // SPDX-License-Identifier: LGPL-2.1
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <linux/membarrier.h>
5 #include <pthread.h>
6 #include <sched.h>
7 #include <stdatomic.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <poll.h>
15 #include <sys/types.h>
16 #include <signal.h>
17 #include <errno.h>
18 #include <stddef.h>
19 #include <stdbool.h>
20
21 static inline pid_t rseq_gettid(void)
22 {
23         return syscall(__NR_gettid);
24 }
25
26 #define NR_INJECT       9
27 static int loop_cnt[NR_INJECT + 1];
28
29 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
30 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
31 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
32 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
33 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
34 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
35
36 static int opt_modulo, verbose;
37
38 static int opt_yield, opt_signal, opt_sleep,
39                 opt_disable_rseq, opt_threads = 200,
40                 opt_disable_mod = 0, opt_test = 's';
41
42 static long long opt_reps = 5000;
43
44 static __thread __attribute__((tls_model("initial-exec")))
45 unsigned int signals_delivered;
46
47 #ifndef BENCHMARK
48
49 static __thread __attribute__((tls_model("initial-exec"), unused))
50 unsigned int yield_mod_cnt, nr_abort;
51
52 #define printf_verbose(fmt, ...)                        \
53         do {                                            \
54                 if (verbose)                            \
55                         printf(fmt, ## __VA_ARGS__);    \
56         } while (0)
57
58 #ifdef __i386__
59
60 #define INJECT_ASM_REG  "eax"
61
62 #define RSEQ_INJECT_CLOBBER \
63         , INJECT_ASM_REG
64
65 #define RSEQ_INJECT_ASM(n) \
66         "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
67         "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
68         "jz 333f\n\t" \
69         "222:\n\t" \
70         "dec %%" INJECT_ASM_REG "\n\t" \
71         "jnz 222b\n\t" \
72         "333:\n\t"
73
74 #elif defined(__x86_64__)
75
76 #define INJECT_ASM_REG_P        "rax"
77 #define INJECT_ASM_REG          "eax"
78
79 #define RSEQ_INJECT_CLOBBER \
80         , INJECT_ASM_REG_P \
81         , INJECT_ASM_REG
82
83 #define RSEQ_INJECT_ASM(n) \
84         "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
85         "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
86         "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
87         "jz 333f\n\t" \
88         "222:\n\t" \
89         "dec %%" INJECT_ASM_REG "\n\t" \
90         "jnz 222b\n\t" \
91         "333:\n\t"
92
93 #elif defined(__s390__)
94
95 #define RSEQ_INJECT_INPUT \
96         , [loop_cnt_1]"m"(loop_cnt[1]) \
97         , [loop_cnt_2]"m"(loop_cnt[2]) \
98         , [loop_cnt_3]"m"(loop_cnt[3]) \
99         , [loop_cnt_4]"m"(loop_cnt[4]) \
100         , [loop_cnt_5]"m"(loop_cnt[5]) \
101         , [loop_cnt_6]"m"(loop_cnt[6])
102
103 #define INJECT_ASM_REG  "r12"
104
105 #define RSEQ_INJECT_CLOBBER \
106         , INJECT_ASM_REG
107
108 #define RSEQ_INJECT_ASM(n) \
109         "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
110         "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
111         "je 333f\n\t" \
112         "222:\n\t" \
113         "ahi %%" INJECT_ASM_REG ", -1\n\t" \
114         "jnz 222b\n\t" \
115         "333:\n\t"
116
117 #elif defined(__ARMEL__)
118
119 #define RSEQ_INJECT_INPUT \
120         , [loop_cnt_1]"m"(loop_cnt[1]) \
121         , [loop_cnt_2]"m"(loop_cnt[2]) \
122         , [loop_cnt_3]"m"(loop_cnt[3]) \
123         , [loop_cnt_4]"m"(loop_cnt[4]) \
124         , [loop_cnt_5]"m"(loop_cnt[5]) \
125         , [loop_cnt_6]"m"(loop_cnt[6])
126
127 #define INJECT_ASM_REG  "r4"
128
129 #define RSEQ_INJECT_CLOBBER \
130         , INJECT_ASM_REG
131
132 #define RSEQ_INJECT_ASM(n) \
133         "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134         "cmp " INJECT_ASM_REG ", #0\n\t" \
135         "beq 333f\n\t" \
136         "222:\n\t" \
137         "subs " INJECT_ASM_REG ", #1\n\t" \
138         "bne 222b\n\t" \
139         "333:\n\t"
140
141 #elif defined(__AARCH64EL__)
142
143 #define RSEQ_INJECT_INPUT \
144         , [loop_cnt_1] "Qo" (loop_cnt[1]) \
145         , [loop_cnt_2] "Qo" (loop_cnt[2]) \
146         , [loop_cnt_3] "Qo" (loop_cnt[3]) \
147         , [loop_cnt_4] "Qo" (loop_cnt[4]) \
148         , [loop_cnt_5] "Qo" (loop_cnt[5]) \
149         , [loop_cnt_6] "Qo" (loop_cnt[6])
150
151 #define INJECT_ASM_REG  RSEQ_ASM_TMP_REG32
152
153 #define RSEQ_INJECT_ASM(n) \
154         "       ldr     " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"       \
155         "       cbz     " INJECT_ASM_REG ", 333f\n"                     \
156         "222:\n"                                                        \
157         "       sub     " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"   \
158         "       cbnz    " INJECT_ASM_REG ", 222b\n"                     \
159         "333:\n"
160
161 #elif defined(__PPC__)
162
163 #define RSEQ_INJECT_INPUT \
164         , [loop_cnt_1]"m"(loop_cnt[1]) \
165         , [loop_cnt_2]"m"(loop_cnt[2]) \
166         , [loop_cnt_3]"m"(loop_cnt[3]) \
167         , [loop_cnt_4]"m"(loop_cnt[4]) \
168         , [loop_cnt_5]"m"(loop_cnt[5]) \
169         , [loop_cnt_6]"m"(loop_cnt[6])
170
171 #define INJECT_ASM_REG  "r18"
172
173 #define RSEQ_INJECT_CLOBBER \
174         , INJECT_ASM_REG
175
176 #define RSEQ_INJECT_ASM(n) \
177         "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
178         "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
179         "beq 333f\n\t" \
180         "222:\n\t" \
181         "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
182         "bne 222b\n\t" \
183         "333:\n\t"
184
185 #elif defined(__mips__)
186
187 #define RSEQ_INJECT_INPUT \
188         , [loop_cnt_1]"m"(loop_cnt[1]) \
189         , [loop_cnt_2]"m"(loop_cnt[2]) \
190         , [loop_cnt_3]"m"(loop_cnt[3]) \
191         , [loop_cnt_4]"m"(loop_cnt[4]) \
192         , [loop_cnt_5]"m"(loop_cnt[5]) \
193         , [loop_cnt_6]"m"(loop_cnt[6])
194
195 #define INJECT_ASM_REG  "$5"
196
197 #define RSEQ_INJECT_CLOBBER \
198         , INJECT_ASM_REG
199
200 #define RSEQ_INJECT_ASM(n) \
201         "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202         "beqz " INJECT_ASM_REG ", 333f\n\t" \
203         "222:\n\t" \
204         "addiu " INJECT_ASM_REG ", -1\n\t" \
205         "bnez " INJECT_ASM_REG ", 222b\n\t" \
206         "333:\n\t"
207 #elif defined(__riscv)
208
209 #define RSEQ_INJECT_INPUT \
210         , [loop_cnt_1]"m"(loop_cnt[1]) \
211         , [loop_cnt_2]"m"(loop_cnt[2]) \
212         , [loop_cnt_3]"m"(loop_cnt[3]) \
213         , [loop_cnt_4]"m"(loop_cnt[4]) \
214         , [loop_cnt_5]"m"(loop_cnt[5]) \
215         , [loop_cnt_6]"m"(loop_cnt[6])
216
217 #define INJECT_ASM_REG  "t1"
218
219 #define RSEQ_INJECT_CLOBBER \
220         , INJECT_ASM_REG
221
222 #define RSEQ_INJECT_ASM(n)                                      \
223         "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"         \
224         "beqz " INJECT_ASM_REG ", 333f\n\t"                     \
225         "222:\n\t"                                              \
226         "addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"   \
227         "bnez " INJECT_ASM_REG ", 222b\n\t"                     \
228         "333:\n\t"
229
230
231 #else
232 #error unsupported target
233 #endif
234
235 #define RSEQ_INJECT_FAILED \
236         nr_abort++;
237
238 #define RSEQ_INJECT_C(n) \
239 { \
240         int loc_i, loc_nr_loops = loop_cnt[n]; \
241         \
242         for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
243                 rseq_barrier(); \
244         } \
245         if (loc_nr_loops == -1 && opt_modulo) { \
246                 if (yield_mod_cnt == opt_modulo - 1) { \
247                         if (opt_sleep > 0) \
248                                 poll(NULL, 0, opt_sleep); \
249                         if (opt_yield) \
250                                 sched_yield(); \
251                         if (opt_signal) \
252                                 raise(SIGUSR1); \
253                         yield_mod_cnt = 0; \
254                 } else { \
255                         yield_mod_cnt++; \
256                 } \
257         } \
258 }
259
260 #else
261
262 #define printf_verbose(fmt, ...)
263
264 #endif /* BENCHMARK */
265
266 #include "rseq.h"
267
268 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
269
270 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
271 #define TEST_MEMBARRIER
272
273 static int sys_membarrier(int cmd, int flags, int cpu_id)
274 {
275         return syscall(__NR_membarrier, cmd, flags, cpu_id);
276 }
277 #endif
278
279 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
280 # define RSEQ_PERCPU    RSEQ_PERCPU_MM_CID
281 static
282 int get_current_cpu_id(void)
283 {
284         return rseq_current_mm_cid();
285 }
286 static
287 bool rseq_validate_cpu_id(void)
288 {
289         return rseq_mm_cid_available();
290 }
291 # ifdef TEST_MEMBARRIER
292 /*
293  * Membarrier does not currently support targeting a mm_cid, so
294  * issue the barrier on all cpus.
295  */
296 static
297 int rseq_membarrier_expedited(int cpu)
298 {
299         return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
300                               0, 0);
301 }
302 # endif /* TEST_MEMBARRIER */
303 #else
304 # define RSEQ_PERCPU    RSEQ_PERCPU_CPU_ID
305 static
306 int get_current_cpu_id(void)
307 {
308         return rseq_cpu_start();
309 }
310 static
311 bool rseq_validate_cpu_id(void)
312 {
313         return rseq_current_cpu_raw() >= 0;
314 }
315 # ifdef TEST_MEMBARRIER
316 static
317 int rseq_membarrier_expedited(int cpu)
318 {
319         return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
320                               MEMBARRIER_CMD_FLAG_CPU, cpu);
321 }
322 # endif /* TEST_MEMBARRIER */
323 #endif
324
325 struct percpu_lock_entry {
326         intptr_t v;
327 } __attribute__((aligned(128)));
328
329 struct percpu_lock {
330         struct percpu_lock_entry c[CPU_SETSIZE];
331 };
332
333 struct test_data_entry {
334         intptr_t count;
335 } __attribute__((aligned(128)));
336
337 struct spinlock_test_data {
338         struct percpu_lock lock;
339         struct test_data_entry c[CPU_SETSIZE];
340 };
341
342 struct spinlock_thread_test_data {
343         struct spinlock_test_data *data;
344         long long reps;
345         int reg;
346 };
347
348 struct inc_test_data {
349         struct test_data_entry c[CPU_SETSIZE];
350 };
351
352 struct inc_thread_test_data {
353         struct inc_test_data *data;
354         long long reps;
355         int reg;
356 };
357
358 struct percpu_list_node {
359         intptr_t data;
360         struct percpu_list_node *next;
361 };
362
363 struct percpu_list_entry {
364         struct percpu_list_node *head;
365 } __attribute__((aligned(128)));
366
367 struct percpu_list {
368         struct percpu_list_entry c[CPU_SETSIZE];
369 };
370
371 #define BUFFER_ITEM_PER_CPU     100
372
373 struct percpu_buffer_node {
374         intptr_t data;
375 };
376
377 struct percpu_buffer_entry {
378         intptr_t offset;
379         intptr_t buflen;
380         struct percpu_buffer_node **array;
381 } __attribute__((aligned(128)));
382
383 struct percpu_buffer {
384         struct percpu_buffer_entry c[CPU_SETSIZE];
385 };
386
387 #define MEMCPY_BUFFER_ITEM_PER_CPU      100
388
389 struct percpu_memcpy_buffer_node {
390         intptr_t data1;
391         uint64_t data2;
392 };
393
394 struct percpu_memcpy_buffer_entry {
395         intptr_t offset;
396         intptr_t buflen;
397         struct percpu_memcpy_buffer_node *array;
398 } __attribute__((aligned(128)));
399
400 struct percpu_memcpy_buffer {
401         struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
402 };
403
404 /* A simple percpu spinlock. Grabs lock on current cpu. */
405 static int rseq_this_cpu_lock(struct percpu_lock *lock)
406 {
407         int cpu;
408
409         for (;;) {
410                 int ret;
411
412                 cpu = get_current_cpu_id();
413                 if (cpu < 0) {
414                         fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
415                                         getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
416                         abort();
417                 }
418                 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
419                                          &lock->c[cpu].v,
420                                          0, 1, cpu);
421                 if (rseq_likely(!ret))
422                         break;
423                 /* Retry if comparison fails or rseq aborts. */
424         }
425         /*
426          * Acquire semantic when taking lock after control dependency.
427          * Matches rseq_smp_store_release().
428          */
429         rseq_smp_acquire__after_ctrl_dep();
430         return cpu;
431 }
432
433 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
434 {
435         assert(lock->c[cpu].v == 1);
436         /*
437          * Release lock, with release semantic. Matches
438          * rseq_smp_acquire__after_ctrl_dep().
439          */
440         rseq_smp_store_release(&lock->c[cpu].v, 0);
441 }
442
443 void *test_percpu_spinlock_thread(void *arg)
444 {
445         struct spinlock_thread_test_data *thread_data = arg;
446         struct spinlock_test_data *data = thread_data->data;
447         long long i, reps;
448
449         if (!opt_disable_rseq && thread_data->reg &&
450             rseq_register_current_thread())
451                 abort();
452         reps = thread_data->reps;
453         for (i = 0; i < reps; i++) {
454                 int cpu = rseq_this_cpu_lock(&data->lock);
455                 data->c[cpu].count++;
456                 rseq_percpu_unlock(&data->lock, cpu);
457 #ifndef BENCHMARK
458                 if (i != 0 && !(i % (reps / 10)))
459                         printf_verbose("tid %d: count %lld\n",
460                                        (int) rseq_gettid(), i);
461 #endif
462         }
463         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
464                        (int) rseq_gettid(), nr_abort, signals_delivered);
465         if (!opt_disable_rseq && thread_data->reg &&
466             rseq_unregister_current_thread())
467                 abort();
468         return NULL;
469 }
470
471 /*
472  * A simple test which implements a sharded counter using a per-cpu
473  * lock.  Obviously real applications might prefer to simply use a
474  * per-cpu increment; however, this is reasonable for a test and the
475  * lock can be extended to synchronize more complicated operations.
476  */
477 void test_percpu_spinlock(void)
478 {
479         const int num_threads = opt_threads;
480         int i, ret;
481         uint64_t sum;
482         pthread_t test_threads[num_threads];
483         struct spinlock_test_data data;
484         struct spinlock_thread_test_data thread_data[num_threads];
485
486         memset(&data, 0, sizeof(data));
487         for (i = 0; i < num_threads; i++) {
488                 thread_data[i].reps = opt_reps;
489                 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
490                         thread_data[i].reg = 1;
491                 else
492                         thread_data[i].reg = 0;
493                 thread_data[i].data = &data;
494                 ret = pthread_create(&test_threads[i], NULL,
495                                      test_percpu_spinlock_thread,
496                                      &thread_data[i]);
497                 if (ret) {
498                         errno = ret;
499                         perror("pthread_create");
500                         abort();
501                 }
502         }
503
504         for (i = 0; i < num_threads; i++) {
505                 ret = pthread_join(test_threads[i], NULL);
506                 if (ret) {
507                         errno = ret;
508                         perror("pthread_join");
509                         abort();
510                 }
511         }
512
513         sum = 0;
514         for (i = 0; i < CPU_SETSIZE; i++)
515                 sum += data.c[i].count;
516
517         assert(sum == (uint64_t)opt_reps * num_threads);
518 }
519
520 void *test_percpu_inc_thread(void *arg)
521 {
522         struct inc_thread_test_data *thread_data = arg;
523         struct inc_test_data *data = thread_data->data;
524         long long i, reps;
525
526         if (!opt_disable_rseq && thread_data->reg &&
527             rseq_register_current_thread())
528                 abort();
529         reps = thread_data->reps;
530         for (i = 0; i < reps; i++) {
531                 int ret;
532
533                 do {
534                         int cpu;
535
536                         cpu = get_current_cpu_id();
537                         ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
538                                         &data->c[cpu].count, 1, cpu);
539                 } while (rseq_unlikely(ret));
540 #ifndef BENCHMARK
541                 if (i != 0 && !(i % (reps / 10)))
542                         printf_verbose("tid %d: count %lld\n",
543                                        (int) rseq_gettid(), i);
544 #endif
545         }
546         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
547                        (int) rseq_gettid(), nr_abort, signals_delivered);
548         if (!opt_disable_rseq && thread_data->reg &&
549             rseq_unregister_current_thread())
550                 abort();
551         return NULL;
552 }
553
554 void test_percpu_inc(void)
555 {
556         const int num_threads = opt_threads;
557         int i, ret;
558         uint64_t sum;
559         pthread_t test_threads[num_threads];
560         struct inc_test_data data;
561         struct inc_thread_test_data thread_data[num_threads];
562
563         memset(&data, 0, sizeof(data));
564         for (i = 0; i < num_threads; i++) {
565                 thread_data[i].reps = opt_reps;
566                 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
567                         thread_data[i].reg = 1;
568                 else
569                         thread_data[i].reg = 0;
570                 thread_data[i].data = &data;
571                 ret = pthread_create(&test_threads[i], NULL,
572                                      test_percpu_inc_thread,
573                                      &thread_data[i]);
574                 if (ret) {
575                         errno = ret;
576                         perror("pthread_create");
577                         abort();
578                 }
579         }
580
581         for (i = 0; i < num_threads; i++) {
582                 ret = pthread_join(test_threads[i], NULL);
583                 if (ret) {
584                         errno = ret;
585                         perror("pthread_join");
586                         abort();
587                 }
588         }
589
590         sum = 0;
591         for (i = 0; i < CPU_SETSIZE; i++)
592                 sum += data.c[i].count;
593
594         assert(sum == (uint64_t)opt_reps * num_threads);
595 }
596
597 void this_cpu_list_push(struct percpu_list *list,
598                         struct percpu_list_node *node,
599                         int *_cpu)
600 {
601         int cpu;
602
603         for (;;) {
604                 intptr_t *targetptr, newval, expect;
605                 int ret;
606
607                 cpu = get_current_cpu_id();
608                 /* Load list->c[cpu].head with single-copy atomicity. */
609                 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
610                 newval = (intptr_t)node;
611                 targetptr = (intptr_t *)&list->c[cpu].head;
612                 node->next = (struct percpu_list_node *)expect;
613                 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
614                                          targetptr, expect, newval, cpu);
615                 if (rseq_likely(!ret))
616                         break;
617                 /* Retry if comparison fails or rseq aborts. */
618         }
619         if (_cpu)
620                 *_cpu = cpu;
621 }
622
623 /*
624  * Unlike a traditional lock-less linked list; the availability of a
625  * rseq primitive allows us to implement pop without concerns over
626  * ABA-type races.
627  */
628 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
629                                            int *_cpu)
630 {
631         struct percpu_list_node *node = NULL;
632         int cpu;
633
634         for (;;) {
635                 struct percpu_list_node *head;
636                 intptr_t *targetptr, expectnot, *load;
637                 long offset;
638                 int ret;
639
640                 cpu = get_current_cpu_id();
641                 targetptr = (intptr_t *)&list->c[cpu].head;
642                 expectnot = (intptr_t)NULL;
643                 offset = offsetof(struct percpu_list_node, next);
644                 load = (intptr_t *)&head;
645                 ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
646                                                  targetptr, expectnot,
647                                                  offset, load, cpu);
648                 if (rseq_likely(!ret)) {
649                         node = head;
650                         break;
651                 }
652                 if (ret > 0)
653                         break;
654                 /* Retry if rseq aborts. */
655         }
656         if (_cpu)
657                 *_cpu = cpu;
658         return node;
659 }
660
661 /*
662  * __percpu_list_pop is not safe against concurrent accesses. Should
663  * only be used on lists that are not concurrently modified.
664  */
665 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
666 {
667         struct percpu_list_node *node;
668
669         node = list->c[cpu].head;
670         if (!node)
671                 return NULL;
672         list->c[cpu].head = node->next;
673         return node;
674 }
675
676 void *test_percpu_list_thread(void *arg)
677 {
678         long long i, reps;
679         struct percpu_list *list = (struct percpu_list *)arg;
680
681         if (!opt_disable_rseq && rseq_register_current_thread())
682                 abort();
683
684         reps = opt_reps;
685         for (i = 0; i < reps; i++) {
686                 struct percpu_list_node *node;
687
688                 node = this_cpu_list_pop(list, NULL);
689                 if (opt_yield)
690                         sched_yield();  /* encourage shuffling */
691                 if (node)
692                         this_cpu_list_push(list, node, NULL);
693         }
694
695         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
696                        (int) rseq_gettid(), nr_abort, signals_delivered);
697         if (!opt_disable_rseq && rseq_unregister_current_thread())
698                 abort();
699
700         return NULL;
701 }
702
703 /* Simultaneous modification to a per-cpu linked list from many threads.  */
704 void test_percpu_list(void)
705 {
706         const int num_threads = opt_threads;
707         int i, j, ret;
708         uint64_t sum = 0, expected_sum = 0;
709         struct percpu_list list;
710         pthread_t test_threads[num_threads];
711         cpu_set_t allowed_cpus;
712
713         memset(&list, 0, sizeof(list));
714
715         /* Generate list entries for every usable cpu. */
716         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
717         for (i = 0; i < CPU_SETSIZE; i++) {
718                 if (!CPU_ISSET(i, &allowed_cpus))
719                         continue;
720                 for (j = 1; j <= 100; j++) {
721                         struct percpu_list_node *node;
722
723                         expected_sum += j;
724
725                         node = malloc(sizeof(*node));
726                         assert(node);
727                         node->data = j;
728                         node->next = list.c[i].head;
729                         list.c[i].head = node;
730                 }
731         }
732
733         for (i = 0; i < num_threads; i++) {
734                 ret = pthread_create(&test_threads[i], NULL,
735                                      test_percpu_list_thread, &list);
736                 if (ret) {
737                         errno = ret;
738                         perror("pthread_create");
739                         abort();
740                 }
741         }
742
743         for (i = 0; i < num_threads; i++) {
744                 ret = pthread_join(test_threads[i], NULL);
745                 if (ret) {
746                         errno = ret;
747                         perror("pthread_join");
748                         abort();
749                 }
750         }
751
752         for (i = 0; i < CPU_SETSIZE; i++) {
753                 struct percpu_list_node *node;
754
755                 if (!CPU_ISSET(i, &allowed_cpus))
756                         continue;
757
758                 while ((node = __percpu_list_pop(&list, i))) {
759                         sum += node->data;
760                         free(node);
761                 }
762         }
763
764         /*
765          * All entries should now be accounted for (unless some external
766          * actor is interfering with our allowed affinity while this
767          * test is running).
768          */
769         assert(sum == expected_sum);
770 }
771
772 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
773                           struct percpu_buffer_node *node,
774                           int *_cpu)
775 {
776         bool result = false;
777         int cpu;
778
779         for (;;) {
780                 intptr_t *targetptr_spec, newval_spec;
781                 intptr_t *targetptr_final, newval_final;
782                 intptr_t offset;
783                 int ret;
784
785                 cpu = get_current_cpu_id();
786                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
787                 if (offset == buffer->c[cpu].buflen)
788                         break;
789                 newval_spec = (intptr_t)node;
790                 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
791                 newval_final = offset + 1;
792                 targetptr_final = &buffer->c[cpu].offset;
793                 ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
794                         targetptr_final, offset, targetptr_spec,
795                         newval_spec, newval_final, cpu);
796                 if (rseq_likely(!ret)) {
797                         result = true;
798                         break;
799                 }
800                 /* Retry if comparison fails or rseq aborts. */
801         }
802         if (_cpu)
803                 *_cpu = cpu;
804         return result;
805 }
806
807 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
808                                                int *_cpu)
809 {
810         struct percpu_buffer_node *head;
811         int cpu;
812
813         for (;;) {
814                 intptr_t *targetptr, newval;
815                 intptr_t offset;
816                 int ret;
817
818                 cpu = get_current_cpu_id();
819                 /* Load offset with single-copy atomicity. */
820                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
821                 if (offset == 0) {
822                         head = NULL;
823                         break;
824                 }
825                 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
826                 newval = offset - 1;
827                 targetptr = (intptr_t *)&buffer->c[cpu].offset;
828                 ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
829                         targetptr, offset,
830                         (intptr_t *)&buffer->c[cpu].array[offset - 1],
831                         (intptr_t)head, newval, cpu);
832                 if (rseq_likely(!ret))
833                         break;
834                 /* Retry if comparison fails or rseq aborts. */
835         }
836         if (_cpu)
837                 *_cpu = cpu;
838         return head;
839 }
840
841 /*
842  * __percpu_buffer_pop is not safe against concurrent accesses. Should
843  * only be used on buffers that are not concurrently modified.
844  */
845 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
846                                                int cpu)
847 {
848         struct percpu_buffer_node *head;
849         intptr_t offset;
850
851         offset = buffer->c[cpu].offset;
852         if (offset == 0)
853                 return NULL;
854         head = buffer->c[cpu].array[offset - 1];
855         buffer->c[cpu].offset = offset - 1;
856         return head;
857 }
858
859 void *test_percpu_buffer_thread(void *arg)
860 {
861         long long i, reps;
862         struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
863
864         if (!opt_disable_rseq && rseq_register_current_thread())
865                 abort();
866
867         reps = opt_reps;
868         for (i = 0; i < reps; i++) {
869                 struct percpu_buffer_node *node;
870
871                 node = this_cpu_buffer_pop(buffer, NULL);
872                 if (opt_yield)
873                         sched_yield();  /* encourage shuffling */
874                 if (node) {
875                         if (!this_cpu_buffer_push(buffer, node, NULL)) {
876                                 /* Should increase buffer size. */
877                                 abort();
878                         }
879                 }
880         }
881
882         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
883                        (int) rseq_gettid(), nr_abort, signals_delivered);
884         if (!opt_disable_rseq && rseq_unregister_current_thread())
885                 abort();
886
887         return NULL;
888 }
889
890 /* Simultaneous modification to a per-cpu buffer from many threads.  */
891 void test_percpu_buffer(void)
892 {
893         const int num_threads = opt_threads;
894         int i, j, ret;
895         uint64_t sum = 0, expected_sum = 0;
896         struct percpu_buffer buffer;
897         pthread_t test_threads[num_threads];
898         cpu_set_t allowed_cpus;
899
900         memset(&buffer, 0, sizeof(buffer));
901
902         /* Generate list entries for every usable cpu. */
903         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
904         for (i = 0; i < CPU_SETSIZE; i++) {
905                 if (!CPU_ISSET(i, &allowed_cpus))
906                         continue;
907                 /* Worse-case is every item in same CPU. */
908                 buffer.c[i].array =
909                         malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
910                                BUFFER_ITEM_PER_CPU);
911                 assert(buffer.c[i].array);
912                 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
913                 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
914                         struct percpu_buffer_node *node;
915
916                         expected_sum += j;
917
918                         /*
919                          * We could theoretically put the word-sized
920                          * "data" directly in the buffer. However, we
921                          * want to model objects that would not fit
922                          * within a single word, so allocate an object
923                          * for each node.
924                          */
925                         node = malloc(sizeof(*node));
926                         assert(node);
927                         node->data = j;
928                         buffer.c[i].array[j - 1] = node;
929                         buffer.c[i].offset++;
930                 }
931         }
932
933         for (i = 0; i < num_threads; i++) {
934                 ret = pthread_create(&test_threads[i], NULL,
935                                      test_percpu_buffer_thread, &buffer);
936                 if (ret) {
937                         errno = ret;
938                         perror("pthread_create");
939                         abort();
940                 }
941         }
942
943         for (i = 0; i < num_threads; i++) {
944                 ret = pthread_join(test_threads[i], NULL);
945                 if (ret) {
946                         errno = ret;
947                         perror("pthread_join");
948                         abort();
949                 }
950         }
951
952         for (i = 0; i < CPU_SETSIZE; i++) {
953                 struct percpu_buffer_node *node;
954
955                 if (!CPU_ISSET(i, &allowed_cpus))
956                         continue;
957
958                 while ((node = __percpu_buffer_pop(&buffer, i))) {
959                         sum += node->data;
960                         free(node);
961                 }
962                 free(buffer.c[i].array);
963         }
964
965         /*
966          * All entries should now be accounted for (unless some external
967          * actor is interfering with our allowed affinity while this
968          * test is running).
969          */
970         assert(sum == expected_sum);
971 }
972
973 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
974                                  struct percpu_memcpy_buffer_node item,
975                                  int *_cpu)
976 {
977         bool result = false;
978         int cpu;
979
980         for (;;) {
981                 intptr_t *targetptr_final, newval_final, offset;
982                 char *destptr, *srcptr;
983                 size_t copylen;
984                 int ret;
985
986                 cpu = get_current_cpu_id();
987                 /* Load offset with single-copy atomicity. */
988                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
989                 if (offset == buffer->c[cpu].buflen)
990                         break;
991                 destptr = (char *)&buffer->c[cpu].array[offset];
992                 srcptr = (char *)&item;
993                 /* copylen must be <= 4kB. */
994                 copylen = sizeof(item);
995                 newval_final = offset + 1;
996                 targetptr_final = &buffer->c[cpu].offset;
997                 ret = rseq_cmpeqv_trymemcpy_storev(
998                         opt_mo, RSEQ_PERCPU,
999                         targetptr_final, offset,
1000                         destptr, srcptr, copylen,
1001                         newval_final, cpu);
1002                 if (rseq_likely(!ret)) {
1003                         result = true;
1004                         break;
1005                 }
1006                 /* Retry if comparison fails or rseq aborts. */
1007         }
1008         if (_cpu)
1009                 *_cpu = cpu;
1010         return result;
1011 }
1012
1013 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1014                                 struct percpu_memcpy_buffer_node *item,
1015                                 int *_cpu)
1016 {
1017         bool result = false;
1018         int cpu;
1019
1020         for (;;) {
1021                 intptr_t *targetptr_final, newval_final, offset;
1022                 char *destptr, *srcptr;
1023                 size_t copylen;
1024                 int ret;
1025
1026                 cpu = get_current_cpu_id();
1027                 /* Load offset with single-copy atomicity. */
1028                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1029                 if (offset == 0)
1030                         break;
1031                 destptr = (char *)item;
1032                 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1033                 /* copylen must be <= 4kB. */
1034                 copylen = sizeof(*item);
1035                 newval_final = offset - 1;
1036                 targetptr_final = &buffer->c[cpu].offset;
1037                 ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1038                         targetptr_final, offset, destptr, srcptr, copylen,
1039                         newval_final, cpu);
1040                 if (rseq_likely(!ret)) {
1041                         result = true;
1042                         break;
1043                 }
1044                 /* Retry if comparison fails or rseq aborts. */
1045         }
1046         if (_cpu)
1047                 *_cpu = cpu;
1048         return result;
1049 }
1050
1051 /*
1052  * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1053  * only be used on buffers that are not concurrently modified.
1054  */
1055 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1056                                 struct percpu_memcpy_buffer_node *item,
1057                                 int cpu)
1058 {
1059         intptr_t offset;
1060
1061         offset = buffer->c[cpu].offset;
1062         if (offset == 0)
1063                 return false;
1064         memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1065         buffer->c[cpu].offset = offset - 1;
1066         return true;
1067 }
1068
1069 void *test_percpu_memcpy_buffer_thread(void *arg)
1070 {
1071         long long i, reps;
1072         struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1073
1074         if (!opt_disable_rseq && rseq_register_current_thread())
1075                 abort();
1076
1077         reps = opt_reps;
1078         for (i = 0; i < reps; i++) {
1079                 struct percpu_memcpy_buffer_node item;
1080                 bool result;
1081
1082                 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1083                 if (opt_yield)
1084                         sched_yield();  /* encourage shuffling */
1085                 if (result) {
1086                         if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1087                                 /* Should increase buffer size. */
1088                                 abort();
1089                         }
1090                 }
1091         }
1092
1093         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1094                        (int) rseq_gettid(), nr_abort, signals_delivered);
1095         if (!opt_disable_rseq && rseq_unregister_current_thread())
1096                 abort();
1097
1098         return NULL;
1099 }
1100
1101 /* Simultaneous modification to a per-cpu buffer from many threads.  */
1102 void test_percpu_memcpy_buffer(void)
1103 {
1104         const int num_threads = opt_threads;
1105         int i, j, ret;
1106         uint64_t sum = 0, expected_sum = 0;
1107         struct percpu_memcpy_buffer buffer;
1108         pthread_t test_threads[num_threads];
1109         cpu_set_t allowed_cpus;
1110
1111         memset(&buffer, 0, sizeof(buffer));
1112
1113         /* Generate list entries for every usable cpu. */
1114         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1115         for (i = 0; i < CPU_SETSIZE; i++) {
1116                 if (!CPU_ISSET(i, &allowed_cpus))
1117                         continue;
1118                 /* Worse-case is every item in same CPU. */
1119                 buffer.c[i].array =
1120                         malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1121                                MEMCPY_BUFFER_ITEM_PER_CPU);
1122                 assert(buffer.c[i].array);
1123                 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1124                 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1125                         expected_sum += 2 * j + 1;
1126
1127                         /*
1128                          * We could theoretically put the word-sized
1129                          * "data" directly in the buffer. However, we
1130                          * want to model objects that would not fit
1131                          * within a single word, so allocate an object
1132                          * for each node.
1133                          */
1134                         buffer.c[i].array[j - 1].data1 = j;
1135                         buffer.c[i].array[j - 1].data2 = j + 1;
1136                         buffer.c[i].offset++;
1137                 }
1138         }
1139
1140         for (i = 0; i < num_threads; i++) {
1141                 ret = pthread_create(&test_threads[i], NULL,
1142                                      test_percpu_memcpy_buffer_thread,
1143                                      &buffer);
1144                 if (ret) {
1145                         errno = ret;
1146                         perror("pthread_create");
1147                         abort();
1148                 }
1149         }
1150
1151         for (i = 0; i < num_threads; i++) {
1152                 ret = pthread_join(test_threads[i], NULL);
1153                 if (ret) {
1154                         errno = ret;
1155                         perror("pthread_join");
1156                         abort();
1157                 }
1158         }
1159
1160         for (i = 0; i < CPU_SETSIZE; i++) {
1161                 struct percpu_memcpy_buffer_node item;
1162
1163                 if (!CPU_ISSET(i, &allowed_cpus))
1164                         continue;
1165
1166                 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1167                         sum += item.data1;
1168                         sum += item.data2;
1169                 }
1170                 free(buffer.c[i].array);
1171         }
1172
1173         /*
1174          * All entries should now be accounted for (unless some external
1175          * actor is interfering with our allowed affinity while this
1176          * test is running).
1177          */
1178         assert(sum == expected_sum);
1179 }
1180
1181 static void test_signal_interrupt_handler(int signo)
1182 {
1183         signals_delivered++;
1184 }
1185
1186 static int set_signal_handler(void)
1187 {
1188         int ret = 0;
1189         struct sigaction sa;
1190         sigset_t sigset;
1191
1192         ret = sigemptyset(&sigset);
1193         if (ret < 0) {
1194                 perror("sigemptyset");
1195                 return ret;
1196         }
1197
1198         sa.sa_handler = test_signal_interrupt_handler;
1199         sa.sa_mask = sigset;
1200         sa.sa_flags = 0;
1201         ret = sigaction(SIGUSR1, &sa, NULL);
1202         if (ret < 0) {
1203                 perror("sigaction");
1204                 return ret;
1205         }
1206
1207         printf_verbose("Signal handler set for SIGUSR1\n");
1208
1209         return ret;
1210 }
1211
1212 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1213 #ifdef TEST_MEMBARRIER
1214 struct test_membarrier_thread_args {
1215         int stop;
1216         intptr_t percpu_list_ptr;
1217 };
1218
1219 /* Worker threads modify data in their "active" percpu lists. */
1220 void *test_membarrier_worker_thread(void *arg)
1221 {
1222         struct test_membarrier_thread_args *args =
1223                 (struct test_membarrier_thread_args *)arg;
1224         const int iters = opt_reps;
1225         int i;
1226
1227         if (rseq_register_current_thread()) {
1228                 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1229                         errno, strerror(errno));
1230                 abort();
1231         }
1232
1233         /* Wait for initialization. */
1234         while (!atomic_load(&args->percpu_list_ptr)) {}
1235
1236         for (i = 0; i < iters; ++i) {
1237                 int ret;
1238
1239                 do {
1240                         int cpu = get_current_cpu_id();
1241
1242                         ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1243                                 &args->percpu_list_ptr,
1244                                 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1245                 } while (rseq_unlikely(ret));
1246         }
1247
1248         if (rseq_unregister_current_thread()) {
1249                 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1250                         errno, strerror(errno));
1251                 abort();
1252         }
1253         return NULL;
1254 }
1255
1256 void test_membarrier_init_percpu_list(struct percpu_list *list)
1257 {
1258         int i;
1259
1260         memset(list, 0, sizeof(*list));
1261         for (i = 0; i < CPU_SETSIZE; i++) {
1262                 struct percpu_list_node *node;
1263
1264                 node = malloc(sizeof(*node));
1265                 assert(node);
1266                 node->data = 0;
1267                 node->next = NULL;
1268                 list->c[i].head = node;
1269         }
1270 }
1271
1272 void test_membarrier_free_percpu_list(struct percpu_list *list)
1273 {
1274         int i;
1275
1276         for (i = 0; i < CPU_SETSIZE; i++)
1277                 free(list->c[i].head);
1278 }
1279
1280 /*
1281  * The manager thread swaps per-cpu lists that worker threads see,
1282  * and validates that there are no unexpected modifications.
1283  */
1284 void *test_membarrier_manager_thread(void *arg)
1285 {
1286         struct test_membarrier_thread_args *args =
1287                 (struct test_membarrier_thread_args *)arg;
1288         struct percpu_list list_a, list_b;
1289         intptr_t expect_a = 0, expect_b = 0;
1290         int cpu_a = 0, cpu_b = 0;
1291
1292         if (rseq_register_current_thread()) {
1293                 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1294                         errno, strerror(errno));
1295                 abort();
1296         }
1297
1298         /* Init lists. */
1299         test_membarrier_init_percpu_list(&list_a);
1300         test_membarrier_init_percpu_list(&list_b);
1301
1302         atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1303
1304         while (!atomic_load(&args->stop)) {
1305                 /* list_a is "active". */
1306                 cpu_a = rand() % CPU_SETSIZE;
1307                 /*
1308                  * As list_b is "inactive", we should never see changes
1309                  * to list_b.
1310                  */
1311                 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1312                         fprintf(stderr, "Membarrier test failed\n");
1313                         abort();
1314                 }
1315
1316                 /* Make list_b "active". */
1317                 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1318                 if (rseq_membarrier_expedited(cpu_a) &&
1319                                 errno != ENXIO /* missing CPU */) {
1320                         perror("sys_membarrier");
1321                         abort();
1322                 }
1323                 /*
1324                  * Cpu A should now only modify list_b, so the values
1325                  * in list_a should be stable.
1326                  */
1327                 expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1328
1329                 cpu_b = rand() % CPU_SETSIZE;
1330                 /*
1331                  * As list_a is "inactive", we should never see changes
1332                  * to list_a.
1333                  */
1334                 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1335                         fprintf(stderr, "Membarrier test failed\n");
1336                         abort();
1337                 }
1338
1339                 /* Make list_a "active". */
1340                 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1341                 if (rseq_membarrier_expedited(cpu_b) &&
1342                                 errno != ENXIO /* missing CPU*/) {
1343                         perror("sys_membarrier");
1344                         abort();
1345                 }
1346                 /* Remember a value from list_b. */
1347                 expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1348         }
1349
1350         test_membarrier_free_percpu_list(&list_a);
1351         test_membarrier_free_percpu_list(&list_b);
1352
1353         if (rseq_unregister_current_thread()) {
1354                 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1355                         errno, strerror(errno));
1356                 abort();
1357         }
1358         return NULL;
1359 }
1360
1361 void test_membarrier(void)
1362 {
1363         const int num_threads = opt_threads;
1364         struct test_membarrier_thread_args thread_args;
1365         pthread_t worker_threads[num_threads];
1366         pthread_t manager_thread;
1367         int i, ret;
1368
1369         if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1370                 perror("sys_membarrier");
1371                 abort();
1372         }
1373
1374         thread_args.stop = 0;
1375         thread_args.percpu_list_ptr = 0;
1376         ret = pthread_create(&manager_thread, NULL,
1377                         test_membarrier_manager_thread, &thread_args);
1378         if (ret) {
1379                 errno = ret;
1380                 perror("pthread_create");
1381                 abort();
1382         }
1383
1384         for (i = 0; i < num_threads; i++) {
1385                 ret = pthread_create(&worker_threads[i], NULL,
1386                                 test_membarrier_worker_thread, &thread_args);
1387                 if (ret) {
1388                         errno = ret;
1389                         perror("pthread_create");
1390                         abort();
1391                 }
1392         }
1393
1394
1395         for (i = 0; i < num_threads; i++) {
1396                 ret = pthread_join(worker_threads[i], NULL);
1397                 if (ret) {
1398                         errno = ret;
1399                         perror("pthread_join");
1400                         abort();
1401                 }
1402         }
1403
1404         atomic_store(&thread_args.stop, 1);
1405         ret = pthread_join(manager_thread, NULL);
1406         if (ret) {
1407                 errno = ret;
1408                 perror("pthread_join");
1409                 abort();
1410         }
1411 }
1412 #else /* TEST_MEMBARRIER */
1413 void test_membarrier(void)
1414 {
1415         fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1416                         "Skipping membarrier test.\n");
1417 }
1418 #endif
1419
1420 static void show_usage(int argc, char **argv)
1421 {
1422         printf("Usage : %s <OPTIONS>\n",
1423                 argv[0]);
1424         printf("OPTIONS:\n");
1425         printf("        [-1 loops] Number of loops for delay injection 1\n");
1426         printf("        [-2 loops] Number of loops for delay injection 2\n");
1427         printf("        [-3 loops] Number of loops for delay injection 3\n");
1428         printf("        [-4 loops] Number of loops for delay injection 4\n");
1429         printf("        [-5 loops] Number of loops for delay injection 5\n");
1430         printf("        [-6 loops] Number of loops for delay injection 6\n");
1431         printf("        [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1432         printf("        [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1433         printf("        [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1434         printf("        [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1435         printf("        [-y] Yield\n");
1436         printf("        [-k] Kill thread with signal\n");
1437         printf("        [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1438         printf("        [-t N] Number of threads (default 200)\n");
1439         printf("        [-r N] Number of repetitions per thread (default 5000)\n");
1440         printf("        [-d] Disable rseq system call (no initialization)\n");
1441         printf("        [-D M] Disable rseq for each M threads\n");
1442         printf("        [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1443         printf("        [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1444         printf("        [-v] Verbose output.\n");
1445         printf("        [-h] Show this help.\n");
1446         printf("\n");
1447 }
1448
1449 int main(int argc, char **argv)
1450 {
1451         int i;
1452
1453         for (i = 1; i < argc; i++) {
1454                 if (argv[i][0] != '-')
1455                         continue;
1456                 switch (argv[i][1]) {
1457                 case '1':
1458                 case '2':
1459                 case '3':
1460                 case '4':
1461                 case '5':
1462                 case '6':
1463                 case '7':
1464                 case '8':
1465                 case '9':
1466                         if (argc < i + 2) {
1467                                 show_usage(argc, argv);
1468                                 goto error;
1469                         }
1470                         loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1471                         i++;
1472                         break;
1473                 case 'm':
1474                         if (argc < i + 2) {
1475                                 show_usage(argc, argv);
1476                                 goto error;
1477                         }
1478                         opt_modulo = atol(argv[i + 1]);
1479                         if (opt_modulo < 0) {
1480                                 show_usage(argc, argv);
1481                                 goto error;
1482                         }
1483                         i++;
1484                         break;
1485                 case 's':
1486                         if (argc < i + 2) {
1487                                 show_usage(argc, argv);
1488                                 goto error;
1489                         }
1490                         opt_sleep = atol(argv[i + 1]);
1491                         if (opt_sleep < 0) {
1492                                 show_usage(argc, argv);
1493                                 goto error;
1494                         }
1495                         i++;
1496                         break;
1497                 case 'y':
1498                         opt_yield = 1;
1499                         break;
1500                 case 'k':
1501                         opt_signal = 1;
1502                         break;
1503                 case 'd':
1504                         opt_disable_rseq = 1;
1505                         break;
1506                 case 'D':
1507                         if (argc < i + 2) {
1508                                 show_usage(argc, argv);
1509                                 goto error;
1510                         }
1511                         opt_disable_mod = atol(argv[i + 1]);
1512                         if (opt_disable_mod < 0) {
1513                                 show_usage(argc, argv);
1514                                 goto error;
1515                         }
1516                         i++;
1517                         break;
1518                 case 't':
1519                         if (argc < i + 2) {
1520                                 show_usage(argc, argv);
1521                                 goto error;
1522                         }
1523                         opt_threads = atol(argv[i + 1]);
1524                         if (opt_threads < 0) {
1525                                 show_usage(argc, argv);
1526                                 goto error;
1527                         }
1528                         i++;
1529                         break;
1530                 case 'r':
1531                         if (argc < i + 2) {
1532                                 show_usage(argc, argv);
1533                                 goto error;
1534                         }
1535                         opt_reps = atoll(argv[i + 1]);
1536                         if (opt_reps < 0) {
1537                                 show_usage(argc, argv);
1538                                 goto error;
1539                         }
1540                         i++;
1541                         break;
1542                 case 'h':
1543                         show_usage(argc, argv);
1544                         goto end;
1545                 case 'T':
1546                         if (argc < i + 2) {
1547                                 show_usage(argc, argv);
1548                                 goto error;
1549                         }
1550                         opt_test = *argv[i + 1];
1551                         switch (opt_test) {
1552                         case 's':
1553                         case 'l':
1554                         case 'i':
1555                         case 'b':
1556                         case 'm':
1557                         case 'r':
1558                                 break;
1559                         default:
1560                                 show_usage(argc, argv);
1561                                 goto error;
1562                         }
1563                         i++;
1564                         break;
1565                 case 'v':
1566                         verbose = 1;
1567                         break;
1568                 case 'M':
1569                         opt_mo = RSEQ_MO_RELEASE;
1570                         break;
1571                 default:
1572                         show_usage(argc, argv);
1573                         goto error;
1574                 }
1575         }
1576
1577         loop_cnt_1 = loop_cnt[1];
1578         loop_cnt_2 = loop_cnt[2];
1579         loop_cnt_3 = loop_cnt[3];
1580         loop_cnt_4 = loop_cnt[4];
1581         loop_cnt_5 = loop_cnt[5];
1582         loop_cnt_6 = loop_cnt[6];
1583
1584         if (set_signal_handler())
1585                 goto error;
1586
1587         if (!opt_disable_rseq && rseq_register_current_thread())
1588                 goto error;
1589         if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1590                 fprintf(stderr, "Error: cpu id getter unavailable\n");
1591                 goto error;
1592         }
1593         switch (opt_test) {
1594         case 's':
1595                 printf_verbose("spinlock\n");
1596                 test_percpu_spinlock();
1597                 break;
1598         case 'l':
1599                 printf_verbose("linked list\n");
1600                 test_percpu_list();
1601                 break;
1602         case 'b':
1603                 printf_verbose("buffer\n");
1604                 test_percpu_buffer();
1605                 break;
1606         case 'm':
1607                 printf_verbose("memcpy buffer\n");
1608                 test_percpu_memcpy_buffer();
1609                 break;
1610         case 'i':
1611                 printf_verbose("counter increment\n");
1612                 test_percpu_inc();
1613                 break;
1614         case 'r':
1615                 printf_verbose("membarrier\n");
1616                 test_membarrier();
1617                 break;
1618         }
1619         if (!opt_disable_rseq && rseq_unregister_current_thread())
1620                 abort();
1621 end:
1622         return 0;
1623
1624 error:
1625         return -1;
1626 }
This page took 0.124433 seconds and 4 git commands to generate.