]>
Commit | Line | Data |
---|---|---|
7f904d7e TG |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. | |
fad3917e JDB |
3 | */ |
4 | static const char *__doc__ = | |
5 | " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\""; | |
6 | ||
7 | #include <errno.h> | |
8 | #include <signal.h> | |
9 | #include <stdio.h> | |
10 | #include <stdlib.h> | |
11 | #include <stdbool.h> | |
12 | #include <string.h> | |
13 | #include <unistd.h> | |
14 | #include <locale.h> | |
15 | #include <sys/resource.h> | |
6a098154 | 16 | #include <sys/sysinfo.h> |
fad3917e JDB |
17 | #include <getopt.h> |
18 | #include <net/if.h> | |
19 | #include <time.h> | |
f9e6bfdb JDB |
20 | #include <linux/limits.h> |
21 | ||
fad3917e JDB |
22 | #include <arpa/inet.h> |
23 | #include <linux/if_link.h> | |
24 | ||
fad3917e | 25 | /* How many xdp_progs are defined in _kern.c */ |
1bca4e6b | 26 | #define MAX_PROG 6 |
fad3917e | 27 | |
2bf3e2ef | 28 | #include <bpf/bpf.h> |
7cf245a3 | 29 | #include <bpf/libbpf.h> |
fad3917e JDB |
30 | |
31 | #include "bpf_util.h" | |
32 | ||
33 | static int ifindex = -1; | |
34 | static char ifname_buf[IF_NAMESIZE]; | |
35 | static char *ifname; | |
3b7a8ec2 | 36 | static __u32 prog_id; |
fad3917e | 37 | |
743e568c | 38 | static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; |
6a098154 | 39 | static int n_cpus; |
151936bf DL |
40 | |
41 | enum map_type { | |
42 | CPU_MAP, | |
43 | RX_CNT, | |
44 | REDIRECT_ERR_CNT, | |
45 | CPUMAP_ENQUEUE_CNT, | |
46 | CPUMAP_KTHREAD_CNT, | |
47 | CPUS_AVAILABLE, | |
48 | CPUS_COUNT, | |
49 | CPUS_ITERATOR, | |
50 | EXCEPTION_CNT, | |
51 | }; | |
52 | ||
53 | static const char *const map_type_strings[] = { | |
54 | [CPU_MAP] = "cpu_map", | |
55 | [RX_CNT] = "rx_cnt", | |
56 | [REDIRECT_ERR_CNT] = "redirect_err_cnt", | |
57 | [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt", | |
58 | [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt", | |
59 | [CPUS_AVAILABLE] = "cpus_available", | |
60 | [CPUS_COUNT] = "cpus_count", | |
61 | [CPUS_ITERATOR] = "cpus_iterator", | |
62 | [EXCEPTION_CNT] = "exception_cnt", | |
63 | }; | |
fad3917e | 64 | |
f9e6bfdb | 65 | #define NUM_TP 5 |
151936bf DL |
66 | #define NUM_MAP 9 |
67 | struct bpf_link *tp_links[NUM_TP] = {}; | |
68 | static int map_fds[NUM_MAP]; | |
f9e6bfdb JDB |
69 | static int tp_cnt = 0; |
70 | ||
fad3917e JDB |
71 | /* Exit return codes */ |
72 | #define EXIT_OK 0 | |
73 | #define EXIT_FAIL 1 | |
74 | #define EXIT_FAIL_OPTION 2 | |
75 | #define EXIT_FAIL_XDP 3 | |
76 | #define EXIT_FAIL_BPF 4 | |
77 | #define EXIT_FAIL_MEM 5 | |
78 | ||
79 | static const struct option long_options[] = { | |
80 | {"help", no_argument, NULL, 'h' }, | |
81 | {"dev", required_argument, NULL, 'd' }, | |
82 | {"skb-mode", no_argument, NULL, 'S' }, | |
fad3917e | 83 | {"sec", required_argument, NULL, 's' }, |
bbaf6029 | 84 | {"progname", required_argument, NULL, 'p' }, |
fad3917e JDB |
85 | {"qsize", required_argument, NULL, 'q' }, |
86 | {"cpu", required_argument, NULL, 'c' }, | |
87 | {"stress-mode", no_argument, NULL, 'x' }, | |
88 | {"no-separators", no_argument, NULL, 'z' }, | |
743e568c | 89 | {"force", no_argument, NULL, 'F' }, |
ce4dade7 LB |
90 | {"mprog-disable", no_argument, NULL, 'n' }, |
91 | {"mprog-name", required_argument, NULL, 'e' }, | |
92 | {"mprog-filename", required_argument, NULL, 'f' }, | |
93 | {"redirect-device", required_argument, NULL, 'r' }, | |
94 | {"redirect-map", required_argument, NULL, 'm' }, | |
fad3917e JDB |
95 | {0, 0, NULL, 0 } |
96 | }; | |
97 | ||
98 | static void int_exit(int sig) | |
99 | { | |
3b7a8ec2 MF |
100 | __u32 curr_prog_id = 0; |
101 | ||
102 | if (ifindex > -1) { | |
103 | if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { | |
104 | printf("bpf_get_link_xdp_id failed\n"); | |
105 | exit(EXIT_FAIL); | |
106 | } | |
107 | if (prog_id == curr_prog_id) { | |
108 | fprintf(stderr, | |
109 | "Interrupted: Removing XDP program on ifindex:%d device:%s\n", | |
110 | ifindex, ifname); | |
111 | bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); | |
112 | } else if (!curr_prog_id) { | |
113 | printf("couldn't find a prog id on a given iface\n"); | |
114 | } else { | |
115 | printf("program on interface changed, not removing\n"); | |
116 | } | |
117 | } | |
f9e6bfdb JDB |
118 | /* Detach tracepoints */ |
119 | while (tp_cnt) | |
120 | bpf_link__destroy(tp_links[--tp_cnt]); | |
121 | ||
fad3917e JDB |
122 | exit(EXIT_OK); |
123 | } | |
124 | ||
bbaf6029 MF |
125 | static void print_avail_progs(struct bpf_object *obj) |
126 | { | |
127 | struct bpf_program *pos; | |
128 | ||
129 | bpf_object__for_each_program(pos, obj) { | |
130 | if (bpf_program__is_xdp(pos)) | |
698584df | 131 | printf(" %s\n", bpf_program__section_name(pos)); |
bbaf6029 MF |
132 | } |
133 | } | |
134 | ||
135 | static void usage(char *argv[], struct bpf_object *obj) | |
fad3917e JDB |
136 | { |
137 | int i; | |
138 | ||
139 | printf("\nDOCUMENTATION:\n%s\n", __doc__); | |
140 | printf("\n"); | |
141 | printf(" Usage: %s (options-see-below)\n", argv[0]); | |
142 | printf(" Listing options:\n"); | |
143 | for (i = 0; long_options[i].name != 0; i++) { | |
144 | printf(" --%-12s", long_options[i].name); | |
145 | if (long_options[i].flag != NULL) | |
146 | printf(" flag (internal value:%d)", | |
147 | *long_options[i].flag); | |
148 | else | |
149 | printf(" short-option: -%c", | |
150 | long_options[i].val); | |
151 | printf("\n"); | |
152 | } | |
bbaf6029 MF |
153 | printf("\n Programs to be used for --progname:\n"); |
154 | print_avail_progs(obj); | |
fad3917e JDB |
155 | printf("\n"); |
156 | } | |
157 | ||
158 | /* gettime returns the current time of day in nanoseconds. | |
159 | * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC) | |
160 | * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE) | |
161 | */ | |
162 | #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ | |
163 | static __u64 gettime(void) | |
164 | { | |
165 | struct timespec t; | |
166 | int res; | |
167 | ||
168 | res = clock_gettime(CLOCK_MONOTONIC, &t); | |
169 | if (res < 0) { | |
170 | fprintf(stderr, "Error with gettimeofday! (%i)\n", res); | |
171 | exit(EXIT_FAIL); | |
172 | } | |
173 | return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; | |
174 | } | |
175 | ||
176 | /* Common stats data record shared with _kern.c */ | |
177 | struct datarec { | |
178 | __u64 processed; | |
179 | __u64 dropped; | |
180 | __u64 issue; | |
ce4dade7 LB |
181 | __u64 xdp_pass; |
182 | __u64 xdp_drop; | |
183 | __u64 xdp_redirect; | |
fad3917e JDB |
184 | }; |
185 | struct record { | |
186 | __u64 timestamp; | |
187 | struct datarec total; | |
188 | struct datarec *cpu; | |
189 | }; | |
190 | struct stats_record { | |
191 | struct record rx_cnt; | |
192 | struct record redir_err; | |
193 | struct record kthread; | |
194 | struct record exception; | |
6a098154 | 195 | struct record enq[]; |
fad3917e JDB |
196 | }; |
197 | ||
198 | static bool map_collect_percpu(int fd, __u32 key, struct record *rec) | |
199 | { | |
200 | /* For percpu maps, userspace gets a value per possible CPU */ | |
201 | unsigned int nr_cpus = bpf_num_possible_cpus(); | |
202 | struct datarec values[nr_cpus]; | |
ce4dade7 LB |
203 | __u64 sum_xdp_redirect = 0; |
204 | __u64 sum_xdp_pass = 0; | |
205 | __u64 sum_xdp_drop = 0; | |
fad3917e JDB |
206 | __u64 sum_processed = 0; |
207 | __u64 sum_dropped = 0; | |
208 | __u64 sum_issue = 0; | |
209 | int i; | |
210 | ||
211 | if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { | |
212 | fprintf(stderr, | |
213 | "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); | |
214 | return false; | |
215 | } | |
216 | /* Get time as close as possible to reading map contents */ | |
217 | rec->timestamp = gettime(); | |
218 | ||
219 | /* Record and sum values from each CPU */ | |
220 | for (i = 0; i < nr_cpus; i++) { | |
221 | rec->cpu[i].processed = values[i].processed; | |
222 | sum_processed += values[i].processed; | |
223 | rec->cpu[i].dropped = values[i].dropped; | |
224 | sum_dropped += values[i].dropped; | |
225 | rec->cpu[i].issue = values[i].issue; | |
226 | sum_issue += values[i].issue; | |
ce4dade7 LB |
227 | rec->cpu[i].xdp_pass = values[i].xdp_pass; |
228 | sum_xdp_pass += values[i].xdp_pass; | |
229 | rec->cpu[i].xdp_drop = values[i].xdp_drop; | |
230 | sum_xdp_drop += values[i].xdp_drop; | |
231 | rec->cpu[i].xdp_redirect = values[i].xdp_redirect; | |
232 | sum_xdp_redirect += values[i].xdp_redirect; | |
fad3917e JDB |
233 | } |
234 | rec->total.processed = sum_processed; | |
235 | rec->total.dropped = sum_dropped; | |
236 | rec->total.issue = sum_issue; | |
ce4dade7 LB |
237 | rec->total.xdp_pass = sum_xdp_pass; |
238 | rec->total.xdp_drop = sum_xdp_drop; | |
239 | rec->total.xdp_redirect = sum_xdp_redirect; | |
fad3917e JDB |
240 | return true; |
241 | } | |
242 | ||
243 | static struct datarec *alloc_record_per_cpu(void) | |
244 | { | |
245 | unsigned int nr_cpus = bpf_num_possible_cpus(); | |
246 | struct datarec *array; | |
fad3917e | 247 | |
6903cdae | 248 | array = calloc(nr_cpus, sizeof(struct datarec)); |
fad3917e JDB |
249 | if (!array) { |
250 | fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); | |
251 | exit(EXIT_FAIL_MEM); | |
252 | } | |
253 | return array; | |
254 | } | |
255 | ||
256 | static struct stats_record *alloc_stats_record(void) | |
257 | { | |
258 | struct stats_record *rec; | |
6a098154 | 259 | int i, size; |
fad3917e | 260 | |
6a098154 LB |
261 | size = sizeof(*rec) + n_cpus * sizeof(struct record); |
262 | rec = malloc(size); | |
fad3917e JDB |
263 | if (!rec) { |
264 | fprintf(stderr, "Mem alloc error\n"); | |
265 | exit(EXIT_FAIL_MEM); | |
266 | } | |
6903cdae | 267 | memset(rec, 0, size); |
fad3917e JDB |
268 | rec->rx_cnt.cpu = alloc_record_per_cpu(); |
269 | rec->redir_err.cpu = alloc_record_per_cpu(); | |
270 | rec->kthread.cpu = alloc_record_per_cpu(); | |
271 | rec->exception.cpu = alloc_record_per_cpu(); | |
6a098154 | 272 | for (i = 0; i < n_cpus; i++) |
fad3917e JDB |
273 | rec->enq[i].cpu = alloc_record_per_cpu(); |
274 | ||
275 | return rec; | |
276 | } | |
277 | ||
278 | static void free_stats_record(struct stats_record *r) | |
279 | { | |
280 | int i; | |
281 | ||
6a098154 | 282 | for (i = 0; i < n_cpus; i++) |
fad3917e JDB |
283 | free(r->enq[i].cpu); |
284 | free(r->exception.cpu); | |
285 | free(r->kthread.cpu); | |
286 | free(r->redir_err.cpu); | |
287 | free(r->rx_cnt.cpu); | |
288 | free(r); | |
289 | } | |
290 | ||
291 | static double calc_period(struct record *r, struct record *p) | |
292 | { | |
293 | double period_ = 0; | |
294 | __u64 period = 0; | |
295 | ||
296 | period = r->timestamp - p->timestamp; | |
297 | if (period > 0) | |
298 | period_ = ((double) period / NANOSEC_PER_SEC); | |
299 | ||
300 | return period_; | |
301 | } | |
302 | ||
303 | static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) | |
304 | { | |
305 | __u64 packets = 0; | |
306 | __u64 pps = 0; | |
307 | ||
308 | if (period_ > 0) { | |
309 | packets = r->processed - p->processed; | |
310 | pps = packets / period_; | |
311 | } | |
312 | return pps; | |
313 | } | |
314 | ||
315 | static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_) | |
316 | { | |
317 | __u64 packets = 0; | |
318 | __u64 pps = 0; | |
319 | ||
320 | if (period_ > 0) { | |
321 | packets = r->dropped - p->dropped; | |
322 | pps = packets / period_; | |
323 | } | |
324 | return pps; | |
325 | } | |
326 | ||
327 | static __u64 calc_errs_pps(struct datarec *r, | |
328 | struct datarec *p, double period_) | |
329 | { | |
330 | __u64 packets = 0; | |
331 | __u64 pps = 0; | |
332 | ||
333 | if (period_ > 0) { | |
334 | packets = r->issue - p->issue; | |
335 | pps = packets / period_; | |
336 | } | |
337 | return pps; | |
338 | } | |
339 | ||
ce4dade7 LB |
340 | static void calc_xdp_pps(struct datarec *r, struct datarec *p, |
341 | double *xdp_pass, double *xdp_drop, | |
342 | double *xdp_redirect, double period_) | |
343 | { | |
344 | *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0; | |
345 | if (period_ > 0) { | |
346 | *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_; | |
347 | *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_; | |
348 | *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_; | |
349 | } | |
350 | } | |
351 | ||
fad3917e JDB |
352 | static void stats_print(struct stats_record *stats_rec, |
353 | struct stats_record *stats_prev, | |
ce4dade7 | 354 | char *prog_name, char *mprog_name, int mprog_fd) |
fad3917e JDB |
355 | { |
356 | unsigned int nr_cpus = bpf_num_possible_cpus(); | |
357 | double pps = 0, drop = 0, err = 0; | |
ce4dade7 | 358 | bool mprog_enabled = false; |
fad3917e JDB |
359 | struct record *rec, *prev; |
360 | int to_cpu; | |
361 | double t; | |
362 | int i; | |
363 | ||
ce4dade7 LB |
364 | if (mprog_fd > 0) |
365 | mprog_enabled = true; | |
366 | ||
fad3917e | 367 | /* Header */ |
bbaf6029 | 368 | printf("Running XDP/eBPF prog_name:%s\n", prog_name); |
fad3917e JDB |
369 | printf("%-15s %-7s %-14s %-11s %-9s\n", |
370 | "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info"); | |
371 | ||
372 | /* XDP rx_cnt */ | |
373 | { | |
374 | char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n"; | |
375 | char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n"; | |
376 | char *errstr = ""; | |
377 | ||
378 | rec = &stats_rec->rx_cnt; | |
379 | prev = &stats_prev->rx_cnt; | |
380 | t = calc_period(rec, prev); | |
381 | for (i = 0; i < nr_cpus; i++) { | |
382 | struct datarec *r = &rec->cpu[i]; | |
383 | struct datarec *p = &prev->cpu[i]; | |
384 | ||
385 | pps = calc_pps(r, p, t); | |
386 | drop = calc_drop_pps(r, p, t); | |
387 | err = calc_errs_pps(r, p, t); | |
388 | if (err > 0) | |
389 | errstr = "cpu-dest/err"; | |
390 | if (pps > 0) | |
391 | printf(fmt_rx, "XDP-RX", | |
392 | i, pps, drop, err, errstr); | |
393 | } | |
394 | pps = calc_pps(&rec->total, &prev->total, t); | |
395 | drop = calc_drop_pps(&rec->total, &prev->total, t); | |
396 | err = calc_errs_pps(&rec->total, &prev->total, t); | |
397 | printf(fm2_rx, "XDP-RX", "total", pps, drop); | |
398 | } | |
399 | ||
400 | /* cpumap enqueue stats */ | |
6a098154 | 401 | for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) { |
fad3917e JDB |
402 | char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; |
403 | char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; | |
404 | char *errstr = ""; | |
405 | ||
406 | rec = &stats_rec->enq[to_cpu]; | |
407 | prev = &stats_prev->enq[to_cpu]; | |
408 | t = calc_period(rec, prev); | |
409 | for (i = 0; i < nr_cpus; i++) { | |
410 | struct datarec *r = &rec->cpu[i]; | |
411 | struct datarec *p = &prev->cpu[i]; | |
412 | ||
413 | pps = calc_pps(r, p, t); | |
414 | drop = calc_drop_pps(r, p, t); | |
415 | err = calc_errs_pps(r, p, t); | |
416 | if (err > 0) { | |
417 | errstr = "bulk-average"; | |
418 | err = pps / err; /* calc average bulk size */ | |
419 | } | |
420 | if (pps > 0) | |
421 | printf(fmt, "cpumap-enqueue", | |
422 | i, to_cpu, pps, drop, err, errstr); | |
423 | } | |
424 | pps = calc_pps(&rec->total, &prev->total, t); | |
425 | if (pps > 0) { | |
426 | drop = calc_drop_pps(&rec->total, &prev->total, t); | |
427 | err = calc_errs_pps(&rec->total, &prev->total, t); | |
428 | if (err > 0) { | |
429 | errstr = "bulk-average"; | |
430 | err = pps / err; /* calc average bulk size */ | |
431 | } | |
432 | printf(fm2, "cpumap-enqueue", | |
433 | "sum", to_cpu, pps, drop, err, errstr); | |
434 | } | |
435 | } | |
436 | ||
437 | /* cpumap kthread stats */ | |
438 | { | |
439 | char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n"; | |
440 | char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n"; | |
441 | char *e_str = ""; | |
442 | ||
443 | rec = &stats_rec->kthread; | |
444 | prev = &stats_prev->kthread; | |
445 | t = calc_period(rec, prev); | |
446 | for (i = 0; i < nr_cpus; i++) { | |
447 | struct datarec *r = &rec->cpu[i]; | |
448 | struct datarec *p = &prev->cpu[i]; | |
449 | ||
450 | pps = calc_pps(r, p, t); | |
451 | drop = calc_drop_pps(r, p, t); | |
452 | err = calc_errs_pps(r, p, t); | |
453 | if (err > 0) | |
454 | e_str = "sched"; | |
455 | if (pps > 0) | |
456 | printf(fmt_k, "cpumap_kthread", | |
457 | i, pps, drop, err, e_str); | |
458 | } | |
459 | pps = calc_pps(&rec->total, &prev->total, t); | |
460 | drop = calc_drop_pps(&rec->total, &prev->total, t); | |
461 | err = calc_errs_pps(&rec->total, &prev->total, t); | |
462 | if (err > 0) | |
463 | e_str = "sched-sum"; | |
464 | printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str); | |
465 | } | |
466 | ||
467 | /* XDP redirect err tracepoints (very unlikely) */ | |
468 | { | |
469 | char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n"; | |
470 | char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n"; | |
471 | ||
472 | rec = &stats_rec->redir_err; | |
473 | prev = &stats_prev->redir_err; | |
474 | t = calc_period(rec, prev); | |
475 | for (i = 0; i < nr_cpus; i++) { | |
476 | struct datarec *r = &rec->cpu[i]; | |
477 | struct datarec *p = &prev->cpu[i]; | |
478 | ||
479 | pps = calc_pps(r, p, t); | |
480 | drop = calc_drop_pps(r, p, t); | |
481 | if (pps > 0) | |
482 | printf(fmt_err, "redirect_err", i, pps, drop); | |
483 | } | |
484 | pps = calc_pps(&rec->total, &prev->total, t); | |
485 | drop = calc_drop_pps(&rec->total, &prev->total, t); | |
486 | printf(fm2_err, "redirect_err", "total", pps, drop); | |
487 | } | |
488 | ||
489 | /* XDP general exception tracepoints */ | |
490 | { | |
491 | char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n"; | |
492 | char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n"; | |
493 | ||
494 | rec = &stats_rec->exception; | |
495 | prev = &stats_prev->exception; | |
496 | t = calc_period(rec, prev); | |
497 | for (i = 0; i < nr_cpus; i++) { | |
498 | struct datarec *r = &rec->cpu[i]; | |
499 | struct datarec *p = &prev->cpu[i]; | |
500 | ||
501 | pps = calc_pps(r, p, t); | |
502 | drop = calc_drop_pps(r, p, t); | |
503 | if (pps > 0) | |
504 | printf(fmt_err, "xdp_exception", i, pps, drop); | |
505 | } | |
506 | pps = calc_pps(&rec->total, &prev->total, t); | |
507 | drop = calc_drop_pps(&rec->total, &prev->total, t); | |
508 | printf(fm2_err, "xdp_exception", "total", pps, drop); | |
509 | } | |
510 | ||
ce4dade7 LB |
511 | /* CPUMAP attached XDP program that runs on remote/destination CPU */ |
512 | if (mprog_enabled) { | |
513 | char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n"; | |
514 | char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n"; | |
515 | double xdp_pass, xdp_drop, xdp_redirect; | |
516 | ||
517 | printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name); | |
518 | printf("%-15s %-7s %-14s %-11s %-9s\n", | |
519 | "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir"); | |
520 | ||
521 | rec = &stats_rec->kthread; | |
522 | prev = &stats_prev->kthread; | |
523 | t = calc_period(rec, prev); | |
524 | for (i = 0; i < nr_cpus; i++) { | |
525 | struct datarec *r = &rec->cpu[i]; | |
526 | struct datarec *p = &prev->cpu[i]; | |
527 | ||
528 | calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, | |
529 | &xdp_redirect, t); | |
530 | if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0) | |
531 | printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop, | |
532 | xdp_redirect); | |
533 | } | |
534 | calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop, | |
535 | &xdp_redirect, t); | |
536 | printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect); | |
537 | } | |
538 | ||
fad3917e JDB |
539 | printf("\n"); |
540 | fflush(stdout); | |
541 | } | |
542 | ||
543 | static void stats_collect(struct stats_record *rec) | |
544 | { | |
545 | int fd, i; | |
546 | ||
151936bf | 547 | fd = map_fds[RX_CNT]; |
fad3917e JDB |
548 | map_collect_percpu(fd, 0, &rec->rx_cnt); |
549 | ||
151936bf | 550 | fd = map_fds[REDIRECT_ERR_CNT]; |
fad3917e JDB |
551 | map_collect_percpu(fd, 1, &rec->redir_err); |
552 | ||
151936bf | 553 | fd = map_fds[CPUMAP_ENQUEUE_CNT]; |
6a098154 | 554 | for (i = 0; i < n_cpus; i++) |
fad3917e JDB |
555 | map_collect_percpu(fd, i, &rec->enq[i]); |
556 | ||
151936bf | 557 | fd = map_fds[CPUMAP_KTHREAD_CNT]; |
fad3917e JDB |
558 | map_collect_percpu(fd, 0, &rec->kthread); |
559 | ||
151936bf | 560 | fd = map_fds[EXCEPTION_CNT]; |
fad3917e JDB |
561 | map_collect_percpu(fd, 0, &rec->exception); |
562 | } | |
563 | ||
564 | ||
565 | /* Pointer swap trick */ | |
566 | static inline void swap(struct stats_record **a, struct stats_record **b) | |
567 | { | |
568 | struct stats_record *tmp; | |
569 | ||
570 | tmp = *a; | |
571 | *a = *b; | |
572 | *b = tmp; | |
573 | } | |
574 | ||
ce4dade7 | 575 | static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, |
fad3917e JDB |
576 | __u32 avail_idx, bool new) |
577 | { | |
578 | __u32 curr_cpus_count = 0; | |
579 | __u32 key = 0; | |
580 | int ret; | |
581 | ||
582 | /* Add a CPU entry to cpumap, as this allocate a cpu entry in | |
583 | * the kernel for the cpu. | |
584 | */ | |
151936bf | 585 | ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0); |
fad3917e JDB |
586 | if (ret) { |
587 | fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); | |
588 | exit(EXIT_FAIL_BPF); | |
589 | } | |
590 | ||
591 | /* Inform bpf_prog's that a new CPU is available to select | |
592 | * from via some control maps. | |
593 | */ | |
151936bf | 594 | ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0); |
fad3917e JDB |
595 | if (ret) { |
596 | fprintf(stderr, "Add to avail CPUs failed\n"); | |
597 | exit(EXIT_FAIL_BPF); | |
598 | } | |
599 | ||
600 | /* When not replacing/updating existing entry, bump the count */ | |
151936bf | 601 | ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count); |
fad3917e JDB |
602 | if (ret) { |
603 | fprintf(stderr, "Failed reading curr cpus_count\n"); | |
604 | exit(EXIT_FAIL_BPF); | |
605 | } | |
606 | if (new) { | |
607 | curr_cpus_count++; | |
151936bf | 608 | ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key, |
bbaf6029 | 609 | &curr_cpus_count, 0); |
fad3917e JDB |
610 | if (ret) { |
611 | fprintf(stderr, "Failed write curr cpus_count\n"); | |
612 | exit(EXIT_FAIL_BPF); | |
613 | } | |
614 | } | |
615 | /* map_fd[7] = cpus_iterator */ | |
ce4dade7 | 616 | printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n", |
fad3917e | 617 | new ? "Add-new":"Replace", cpu, avail_idx, |
ce4dade7 | 618 | value->qsize, value->bpf_prog.fd, curr_cpus_count); |
fad3917e JDB |
619 | |
620 | return 0; | |
621 | } | |
622 | ||
623 | /* CPUs are zero-indexed. Thus, add a special sentinel default value | |
624 | * in map cpus_available to mark CPU index'es not configured | |
625 | */ | |
626 | static void mark_cpus_unavailable(void) | |
627 | { | |
6a098154 | 628 | __u32 invalid_cpu = n_cpus; |
fad3917e JDB |
629 | int ret, i; |
630 | ||
6a098154 | 631 | for (i = 0; i < n_cpus; i++) { |
151936bf | 632 | ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i, |
bbaf6029 | 633 | &invalid_cpu, 0); |
fad3917e JDB |
634 | if (ret) { |
635 | fprintf(stderr, "Failed marking CPU unavailable\n"); | |
636 | exit(EXIT_FAIL_BPF); | |
637 | } | |
638 | } | |
639 | } | |
640 | ||
641 | /* Stress cpumap management code by concurrently changing underlying cpumap */ | |
ce4dade7 | 642 | static void stress_cpumap(struct bpf_cpumap_val *value) |
fad3917e JDB |
643 | { |
644 | /* Changing qsize will cause kernel to free and alloc a new | |
645 | * bpf_cpu_map_entry, with an associated/complicated tear-down | |
646 | * procedure. | |
647 | */ | |
ce4dade7 LB |
648 | value->qsize = 1024; |
649 | create_cpu_entry(1, value, 0, false); | |
650 | value->qsize = 8; | |
651 | create_cpu_entry(1, value, 0, false); | |
652 | value->qsize = 16000; | |
653 | create_cpu_entry(1, value, 0, false); | |
fad3917e JDB |
654 | } |
655 | ||
bbaf6029 | 656 | static void stats_poll(int interval, bool use_separators, char *prog_name, |
ce4dade7 | 657 | char *mprog_name, struct bpf_cpumap_val *value, |
fad3917e JDB |
658 | bool stress_mode) |
659 | { | |
660 | struct stats_record *record, *prev; | |
ce4dade7 | 661 | int mprog_fd; |
fad3917e JDB |
662 | |
663 | record = alloc_stats_record(); | |
664 | prev = alloc_stats_record(); | |
665 | stats_collect(record); | |
666 | ||
667 | /* Trick to pretty printf with thousands separators use %' */ | |
668 | if (use_separators) | |
669 | setlocale(LC_NUMERIC, "en_US"); | |
670 | ||
671 | while (1) { | |
672 | swap(&prev, &record); | |
ce4dade7 | 673 | mprog_fd = value->bpf_prog.fd; |
fad3917e | 674 | stats_collect(record); |
ce4dade7 | 675 | stats_print(record, prev, prog_name, mprog_name, mprog_fd); |
fad3917e JDB |
676 | sleep(interval); |
677 | if (stress_mode) | |
ce4dade7 | 678 | stress_cpumap(value); |
fad3917e JDB |
679 | } |
680 | ||
681 | free_stats_record(record); | |
682 | free_stats_record(prev); | |
683 | } | |
684 | ||
151936bf | 685 | static int init_tracepoints(struct bpf_object *obj) |
f9e6bfdb JDB |
686 | { |
687 | struct bpf_program *prog; | |
f9e6bfdb | 688 | |
151936bf DL |
689 | bpf_object__for_each_program(prog, obj) { |
690 | if (bpf_program__is_tracepoint(prog) != true) | |
691 | continue; | |
f9e6bfdb | 692 | |
151936bf DL |
693 | tp_links[tp_cnt] = bpf_program__attach(prog); |
694 | if (libbpf_get_error(tp_links[tp_cnt])) { | |
695 | tp_links[tp_cnt] = NULL; | |
696 | return -EINVAL; | |
697 | } | |
698 | tp_cnt++; | |
f9e6bfdb JDB |
699 | } |
700 | ||
151936bf | 701 | return 0; |
f9e6bfdb JDB |
702 | } |
703 | ||
bbaf6029 MF |
704 | static int init_map_fds(struct bpf_object *obj) |
705 | { | |
151936bf DL |
706 | enum map_type type; |
707 | ||
708 | for (type = 0; type < NUM_MAP; type++) { | |
709 | map_fds[type] = | |
710 | bpf_object__find_map_fd_by_name(obj, | |
711 | map_type_strings[type]); | |
712 | ||
713 | if (map_fds[type] < 0) | |
714 | return -ENOENT; | |
715 | } | |
bbaf6029 MF |
716 | |
717 | return 0; | |
718 | } | |
719 | ||
ce4dade7 LB |
720 | static int load_cpumap_prog(char *file_name, char *prog_name, |
721 | char *redir_interface, char *redir_map) | |
722 | { | |
723 | struct bpf_prog_load_attr prog_load_attr = { | |
724 | .prog_type = BPF_PROG_TYPE_XDP, | |
725 | .expected_attach_type = BPF_XDP_CPUMAP, | |
726 | .file = file_name, | |
727 | }; | |
728 | struct bpf_program *prog; | |
729 | struct bpf_object *obj; | |
730 | int fd; | |
731 | ||
732 | if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd)) | |
733 | return -1; | |
734 | ||
735 | if (fd < 0) { | |
736 | fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", | |
737 | strerror(errno)); | |
738 | return fd; | |
739 | } | |
740 | ||
741 | if (redir_interface && redir_map) { | |
742 | int err, map_fd, ifindex_out, key = 0; | |
743 | ||
744 | map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); | |
745 | if (map_fd < 0) | |
746 | return map_fd; | |
747 | ||
748 | ifindex_out = if_nametoindex(redir_interface); | |
749 | if (!ifindex_out) | |
750 | return -1; | |
751 | ||
752 | err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0); | |
753 | if (err < 0) | |
754 | return err; | |
755 | } | |
756 | ||
757 | prog = bpf_object__find_program_by_title(obj, prog_name); | |
758 | if (!prog) { | |
759 | fprintf(stderr, "bpf_object__find_program_by_title failed\n"); | |
760 | return EXIT_FAIL; | |
761 | } | |
762 | ||
763 | return bpf_program__fd(prog); | |
764 | } | |
765 | ||
fad3917e JDB |
766 | int main(int argc, char **argv) |
767 | { | |
c66dca98 | 768 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; |
bbaf6029 | 769 | char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; |
ce4dade7 LB |
770 | char *mprog_filename = "xdp_redirect_kern.o"; |
771 | char *redir_interface = NULL, *redir_map = NULL; | |
772 | char *mprog_name = "xdp_redirect_dummy"; | |
773 | bool mprog_disable = false; | |
bbaf6029 MF |
774 | struct bpf_prog_load_attr prog_load_attr = { |
775 | .prog_type = BPF_PROG_TYPE_UNSPEC, | |
776 | }; | |
3b7a8ec2 MF |
777 | struct bpf_prog_info info = {}; |
778 | __u32 info_len = sizeof(info); | |
ce4dade7 | 779 | struct bpf_cpumap_val value; |
fad3917e JDB |
780 | bool use_separators = true; |
781 | bool stress_mode = false; | |
bbaf6029 MF |
782 | struct bpf_program *prog; |
783 | struct bpf_object *obj; | |
151936bf | 784 | int err = EXIT_FAIL; |
fad3917e | 785 | char filename[256]; |
fad3917e JDB |
786 | int added_cpus = 0; |
787 | int longindex = 0; | |
788 | int interval = 2; | |
fad3917e | 789 | int add_cpu = -1; |
151936bf | 790 | int opt, prog_fd; |
a4e76f1b | 791 | int *cpu, i; |
fad3917e | 792 | __u32 qsize; |
fad3917e | 793 | |
6a098154 LB |
794 | n_cpus = get_nprocs_conf(); |
795 | ||
fad3917e JDB |
796 | /* Notice: choosing he queue size is very important with the |
797 | * ixgbe driver, because it's driver page recycling trick is | |
798 | * dependend on pages being returned quickly. The number of | |
799 | * out-standing packets in the system must be less-than 2x | |
800 | * RX-ring size. | |
801 | */ | |
802 | qsize = 128+64; | |
803 | ||
804 | snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | |
bbaf6029 | 805 | prog_load_attr.file = filename; |
fad3917e JDB |
806 | |
807 | if (setrlimit(RLIMIT_MEMLOCK, &r)) { | |
808 | perror("setrlimit(RLIMIT_MEMLOCK)"); | |
809 | return 1; | |
810 | } | |
811 | ||
bbaf6029 | 812 | if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) |
151936bf | 813 | return err; |
fad3917e | 814 | |
bbaf6029 MF |
815 | if (prog_fd < 0) { |
816 | fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", | |
817 | strerror(errno)); | |
151936bf | 818 | return err; |
bbaf6029 | 819 | } |
151936bf DL |
820 | |
821 | if (init_tracepoints(obj) < 0) { | |
822 | fprintf(stderr, "ERR: bpf_program__attach failed\n"); | |
823 | return err; | |
824 | } | |
825 | ||
bbaf6029 MF |
826 | if (init_map_fds(obj) < 0) { |
827 | fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); | |
151936bf | 828 | return err; |
fad3917e | 829 | } |
fad3917e JDB |
830 | mark_cpus_unavailable(); |
831 | ||
a4e76f1b LB |
832 | cpu = malloc(n_cpus * sizeof(int)); |
833 | if (!cpu) { | |
834 | fprintf(stderr, "failed to allocate cpu array\n"); | |
151936bf | 835 | return err; |
a4e76f1b LB |
836 | } |
837 | memset(cpu, 0, n_cpus * sizeof(int)); | |
838 | ||
fad3917e | 839 | /* Parse commands line args */ |
ce4dade7 | 840 | while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", |
fad3917e JDB |
841 | long_options, &longindex)) != -1) { |
842 | switch (opt) { | |
843 | case 'd': | |
844 | if (strlen(optarg) >= IF_NAMESIZE) { | |
845 | fprintf(stderr, "ERR: --dev name too long\n"); | |
846 | goto error; | |
847 | } | |
848 | ifname = (char *)&ifname_buf; | |
849 | strncpy(ifname, optarg, IF_NAMESIZE); | |
850 | ifindex = if_nametoindex(ifname); | |
851 | if (ifindex == 0) { | |
852 | fprintf(stderr, | |
853 | "ERR: --dev name unknown err(%d):%s\n", | |
854 | errno, strerror(errno)); | |
855 | goto error; | |
856 | } | |
857 | break; | |
858 | case 's': | |
859 | interval = atoi(optarg); | |
860 | break; | |
861 | case 'S': | |
862 | xdp_flags |= XDP_FLAGS_SKB_MODE; | |
863 | break; | |
fad3917e JDB |
864 | case 'x': |
865 | stress_mode = true; | |
866 | break; | |
867 | case 'z': | |
868 | use_separators = false; | |
869 | break; | |
870 | case 'p': | |
871 | /* Selecting eBPF prog to load */ | |
bbaf6029 | 872 | prog_name = optarg; |
fad3917e | 873 | break; |
ce4dade7 LB |
874 | case 'n': |
875 | mprog_disable = true; | |
876 | break; | |
877 | case 'f': | |
878 | mprog_filename = optarg; | |
879 | break; | |
880 | case 'e': | |
881 | mprog_name = optarg; | |
882 | break; | |
883 | case 'r': | |
884 | redir_interface = optarg; | |
885 | break; | |
886 | case 'm': | |
887 | redir_map = optarg; | |
888 | break; | |
fad3917e JDB |
889 | case 'c': |
890 | /* Add multiple CPUs */ | |
891 | add_cpu = strtoul(optarg, NULL, 0); | |
6a098154 | 892 | if (add_cpu >= n_cpus) { |
fad3917e JDB |
893 | fprintf(stderr, |
894 | "--cpu nr too large for cpumap err(%d):%s\n", | |
895 | errno, strerror(errno)); | |
896 | goto error; | |
897 | } | |
a4e76f1b | 898 | cpu[added_cpus++] = add_cpu; |
fad3917e JDB |
899 | break; |
900 | case 'q': | |
901 | qsize = atoi(optarg); | |
902 | break; | |
743e568c MF |
903 | case 'F': |
904 | xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; | |
905 | break; | |
fad3917e JDB |
906 | case 'h': |
907 | error: | |
908 | default: | |
a4e76f1b | 909 | free(cpu); |
bbaf6029 | 910 | usage(argv, obj); |
fad3917e JDB |
911 | return EXIT_FAIL_OPTION; |
912 | } | |
913 | } | |
d50ecc46 THJ |
914 | |
915 | if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) | |
916 | xdp_flags |= XDP_FLAGS_DRV_MODE; | |
917 | ||
fad3917e JDB |
918 | /* Required option */ |
919 | if (ifindex == -1) { | |
920 | fprintf(stderr, "ERR: required option --dev missing\n"); | |
bbaf6029 | 921 | usage(argv, obj); |
a4e76f1b LB |
922 | err = EXIT_FAIL_OPTION; |
923 | goto out; | |
fad3917e JDB |
924 | } |
925 | /* Required option */ | |
926 | if (add_cpu == -1) { | |
927 | fprintf(stderr, "ERR: required option --cpu missing\n"); | |
928 | fprintf(stderr, " Specify multiple --cpu option to add more\n"); | |
bbaf6029 | 929 | usage(argv, obj); |
a4e76f1b LB |
930 | err = EXIT_FAIL_OPTION; |
931 | goto out; | |
fad3917e JDB |
932 | } |
933 | ||
ce4dade7 LB |
934 | value.bpf_prog.fd = 0; |
935 | if (!mprog_disable) | |
936 | value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name, | |
937 | redir_interface, redir_map); | |
938 | if (value.bpf_prog.fd < 0) { | |
939 | err = value.bpf_prog.fd; | |
940 | goto out; | |
941 | } | |
942 | value.qsize = qsize; | |
943 | ||
a4e76f1b | 944 | for (i = 0; i < added_cpus; i++) |
ce4dade7 | 945 | create_cpu_entry(cpu[i], &value, i, true); |
a4e76f1b | 946 | |
817b89be | 947 | /* Remove XDP program when program is interrupted or killed */ |
fad3917e | 948 | signal(SIGINT, int_exit); |
817b89be | 949 | signal(SIGTERM, int_exit); |
fad3917e | 950 | |
bbaf6029 MF |
951 | prog = bpf_object__find_program_by_title(obj, prog_name); |
952 | if (!prog) { | |
953 | fprintf(stderr, "bpf_object__find_program_by_title failed\n"); | |
a4e76f1b | 954 | goto out; |
bbaf6029 MF |
955 | } |
956 | ||
957 | prog_fd = bpf_program__fd(prog); | |
958 | if (prog_fd < 0) { | |
959 | fprintf(stderr, "bpf_program__fd failed\n"); | |
a4e76f1b | 960 | goto out; |
bbaf6029 MF |
961 | } |
962 | ||
963 | if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { | |
fad3917e | 964 | fprintf(stderr, "link set xdp fd failed\n"); |
a4e76f1b LB |
965 | err = EXIT_FAIL_XDP; |
966 | goto out; | |
fad3917e JDB |
967 | } |
968 | ||
3b7a8ec2 MF |
969 | err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); |
970 | if (err) { | |
971 | printf("can't get prog info - %s\n", strerror(errno)); | |
a4e76f1b | 972 | goto out; |
3b7a8ec2 MF |
973 | } |
974 | prog_id = info.id; | |
975 | ||
ce4dade7 LB |
976 | stats_poll(interval, use_separators, prog_name, mprog_name, |
977 | &value, stress_mode); | |
151936bf DL |
978 | |
979 | err = EXIT_OK; | |
a4e76f1b LB |
980 | out: |
981 | free(cpu); | |
982 | return err; | |
fad3917e | 983 | } |