]> Git Repo - J-linux.git/blob - tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / tools / testing / selftests / bpf / prog_tests / migrate_reuseport.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Check if we can migrate child sockets.
4  *
5  *   1. call listen() for 4 server sockets.
6  *   2. call connect() for 25 client sockets.
7  *   3. call listen() for 1 server socket. (migration target)
8  *   4. update a map to migrate all child sockets
9  *        to the last server socket (migrate_map[cookie] = 4)
10  *   5. call shutdown() for first 4 server sockets
11  *        and migrate the requests in the accept queue
12  *        to the last server socket.
13  *   6. call listen() for the second server socket.
14  *   7. call shutdown() for the last server
15  *        and migrate the requests in the accept queue
16  *        to the second server socket.
17  *   8. call listen() for the last server.
18  *   9. call shutdown() for the second server
19  *        and migrate the requests in the accept queue
20  *        to the last server socket.
21  *  10. call accept() for the last server socket.
22  *
23  * Author: Kuniyuki Iwashima <[email protected]>
24  */
25
26 #include <bpf/bpf.h>
27 #include <bpf/libbpf.h>
28
29 #include "test_progs.h"
30 #include "test_migrate_reuseport.skel.h"
31 #include "network_helpers.h"
32
33 #ifndef TCP_FASTOPEN_CONNECT
34 #define TCP_FASTOPEN_CONNECT 30
35 #endif
36
37 #define IFINDEX_LO 1
38
39 #define NR_SERVERS 5
40 #define NR_CLIENTS (NR_SERVERS * 5)
41 #define MIGRATED_TO (NR_SERVERS - 1)
42
43 /* fastopenq->max_qlen and sk->sk_max_ack_backlog */
44 #define QLEN (NR_CLIENTS * 5)
45
46 #define MSG "Hello World\0"
47 #define MSGLEN 12
48
49 static struct migrate_reuseport_test_case {
50         const char *name;
51         __s64 servers[NR_SERVERS];
52         __s64 clients[NR_CLIENTS];
53         struct sockaddr_storage addr;
54         socklen_t addrlen;
55         int family;
56         int state;
57         bool drop_ack;
58         bool expire_synack_timer;
59         bool fastopen;
60         struct bpf_link *link;
61 } test_cases[] = {
62         {
63                 .name = "IPv4 TCP_ESTABLISHED  inet_csk_listen_stop",
64                 .family = AF_INET,
65                 .state = BPF_TCP_ESTABLISHED,
66                 .drop_ack = false,
67                 .expire_synack_timer = false,
68                 .fastopen = false,
69         },
70         {
71                 .name = "IPv4 TCP_SYN_RECV     inet_csk_listen_stop",
72                 .family = AF_INET,
73                 .state = BPF_TCP_SYN_RECV,
74                 .drop_ack = true,
75                 .expire_synack_timer = false,
76                 .fastopen = true,
77         },
78         {
79                 .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
80                 .family = AF_INET,
81                 .state = BPF_TCP_NEW_SYN_RECV,
82                 .drop_ack = true,
83                 .expire_synack_timer = true,
84                 .fastopen = false,
85         },
86         {
87                 .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
88                 .family = AF_INET,
89                 .state = BPF_TCP_NEW_SYN_RECV,
90                 .drop_ack = true,
91                 .expire_synack_timer = false,
92                 .fastopen = false,
93         },
94         {
95                 .name = "IPv6 TCP_ESTABLISHED  inet_csk_listen_stop",
96                 .family = AF_INET6,
97                 .state = BPF_TCP_ESTABLISHED,
98                 .drop_ack = false,
99                 .expire_synack_timer = false,
100                 .fastopen = false,
101         },
102         {
103                 .name = "IPv6 TCP_SYN_RECV     inet_csk_listen_stop",
104                 .family = AF_INET6,
105                 .state = BPF_TCP_SYN_RECV,
106                 .drop_ack = true,
107                 .expire_synack_timer = false,
108                 .fastopen = true,
109         },
110         {
111                 .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
112                 .family = AF_INET6,
113                 .state = BPF_TCP_NEW_SYN_RECV,
114                 .drop_ack = true,
115                 .expire_synack_timer = true,
116                 .fastopen = false,
117         },
118         {
119                 .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
120                 .family = AF_INET6,
121                 .state = BPF_TCP_NEW_SYN_RECV,
122                 .drop_ack = true,
123                 .expire_synack_timer = false,
124                 .fastopen = false,
125         }
126 };
127
128 static void init_fds(__s64 fds[], int len)
129 {
130         int i;
131
132         for (i = 0; i < len; i++)
133                 fds[i] = -1;
134 }
135
136 static void close_fds(__s64 fds[], int len)
137 {
138         int i;
139
140         for (i = 0; i < len; i++) {
141                 if (fds[i] != -1) {
142                         close(fds[i]);
143                         fds[i] = -1;
144                 }
145         }
146 }
147
148 static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
149 {
150         int err = 0, fd, len;
151
152         fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
153         if (!ASSERT_NEQ(fd, -1, "open"))
154                 return -1;
155
156         if (restore) {
157                 len = write(fd, buf, *saved_len);
158                 if (!ASSERT_EQ(len, *saved_len, "write - restore"))
159                         err = -1;
160         } else {
161                 *saved_len = read(fd, buf, size);
162                 if (!ASSERT_GE(*saved_len, 1, "read")) {
163                         err = -1;
164                         goto close;
165                 }
166
167                 err = lseek(fd, 0, SEEK_SET);
168                 if (!ASSERT_OK(err, "lseek"))
169                         goto close;
170
171                 /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
172                  *  TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
173                  */
174                 len = write(fd, "519", 3);
175                 if (!ASSERT_EQ(len, 3, "write - setup"))
176                         err = -1;
177         }
178
179 close:
180         close(fd);
181
182         return err;
183 }
184
185 static int drop_ack(struct migrate_reuseport_test_case *test_case,
186                     struct test_migrate_reuseport *skel)
187 {
188         if (test_case->family == AF_INET)
189                 skel->bss->server_port = ((struct sockaddr_in *)
190                                           &test_case->addr)->sin_port;
191         else
192                 skel->bss->server_port = ((struct sockaddr_in6 *)
193                                           &test_case->addr)->sin6_port;
194
195         test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
196                                                   IFINDEX_LO);
197         if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
198                 return -1;
199
200         return 0;
201 }
202
203 static int pass_ack(struct migrate_reuseport_test_case *test_case)
204 {
205         int err;
206
207         err = bpf_link__destroy(test_case->link);
208         if (!ASSERT_OK(err, "bpf_link__destroy"))
209                 return -1;
210
211         test_case->link = NULL;
212
213         return 0;
214 }
215
216 static int start_servers(struct migrate_reuseport_test_case *test_case,
217                          struct test_migrate_reuseport *skel)
218 {
219         int i, err, prog_fd, reuseport = 1, qlen = QLEN;
220
221         prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
222
223         make_sockaddr(test_case->family,
224                       test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
225                       &test_case->addr, &test_case->addrlen);
226
227         for (i = 0; i < NR_SERVERS; i++) {
228                 test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
229                                                IPPROTO_TCP);
230                 if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
231                         return -1;
232
233                 err = setsockopt(test_case->servers[i], SOL_SOCKET,
234                                  SO_REUSEPORT, &reuseport, sizeof(reuseport));
235                 if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
236                         return -1;
237
238                 err = bind(test_case->servers[i],
239                            (struct sockaddr *)&test_case->addr,
240                            test_case->addrlen);
241                 if (!ASSERT_OK(err, "bind"))
242                         return -1;
243
244                 if (i == 0) {
245                         err = setsockopt(test_case->servers[i], SOL_SOCKET,
246                                          SO_ATTACH_REUSEPORT_EBPF,
247                                          &prog_fd, sizeof(prog_fd));
248                         if (!ASSERT_OK(err,
249                                        "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
250                                 return -1;
251
252                         err = getsockname(test_case->servers[i],
253                                           (struct sockaddr *)&test_case->addr,
254                                           &test_case->addrlen);
255                         if (!ASSERT_OK(err, "getsockname"))
256                                 return -1;
257                 }
258
259                 if (test_case->fastopen) {
260                         err = setsockopt(test_case->servers[i],
261                                          SOL_TCP, TCP_FASTOPEN,
262                                          &qlen, sizeof(qlen));
263                         if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
264                                 return -1;
265                 }
266
267                 /* All requests will be tied to the first four listeners */
268                 if (i != MIGRATED_TO) {
269                         err = listen(test_case->servers[i], qlen);
270                         if (!ASSERT_OK(err, "listen"))
271                                 return -1;
272                 }
273         }
274
275         return 0;
276 }
277
278 static int start_clients(struct migrate_reuseport_test_case *test_case)
279 {
280         char buf[MSGLEN] = MSG;
281         int i, err;
282
283         for (i = 0; i < NR_CLIENTS; i++) {
284                 test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
285                                                IPPROTO_TCP);
286                 if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
287                         return -1;
288
289                 /* The attached XDP program drops only the final ACK, so
290                  * clients will transition to TCP_ESTABLISHED immediately.
291                  */
292                 err = settimeo(test_case->clients[i], 100);
293                 if (!ASSERT_OK(err, "settimeo"))
294                         return -1;
295
296                 if (test_case->fastopen) {
297                         int fastopen = 1;
298
299                         err = setsockopt(test_case->clients[i], IPPROTO_TCP,
300                                          TCP_FASTOPEN_CONNECT, &fastopen,
301                                          sizeof(fastopen));
302                         if (!ASSERT_OK(err,
303                                        "setsockopt - TCP_FASTOPEN_CONNECT"))
304                                 return -1;
305                 }
306
307                 err = connect(test_case->clients[i],
308                               (struct sockaddr *)&test_case->addr,
309                               test_case->addrlen);
310                 if (!ASSERT_OK(err, "connect"))
311                         return -1;
312
313                 err = write(test_case->clients[i], buf, MSGLEN);
314                 if (!ASSERT_EQ(err, MSGLEN, "write"))
315                         return -1;
316         }
317
318         return 0;
319 }
320
321 static int update_maps(struct migrate_reuseport_test_case *test_case,
322                        struct test_migrate_reuseport *skel)
323 {
324         int i, err, migrated_to = MIGRATED_TO;
325         int reuseport_map_fd, migrate_map_fd;
326         __u64 value;
327
328         reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
329         migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
330
331         for (i = 0; i < NR_SERVERS; i++) {
332                 value = (__u64)test_case->servers[i];
333                 err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
334                                           BPF_NOEXIST);
335                 if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
336                         return -1;
337
338                 err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
339                 if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
340                         return -1;
341
342                 err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
343                                           BPF_NOEXIST);
344                 if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
345                         return -1;
346         }
347
348         return 0;
349 }
350
351 static int migrate_dance(struct migrate_reuseport_test_case *test_case)
352 {
353         int i, err;
354
355         /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
356          * to the last listener based on eBPF.
357          */
358         for (i = 0; i < MIGRATED_TO; i++) {
359                 err = shutdown(test_case->servers[i], SHUT_RDWR);
360                 if (!ASSERT_OK(err, "shutdown"))
361                         return -1;
362         }
363
364         /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
365         if (test_case->state == BPF_TCP_NEW_SYN_RECV)
366                 return 0;
367
368         /* Note that we use the second listener instead of the
369          * first one here.
370          *
371          * The fist listener is bind()ed with port 0 and,
372          * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
373          * calling listen() again will bind() the first listener
374          * on a new ephemeral port and detach it from the existing
375          * reuseport group.  (See: __inet_bind(), tcp_set_state())
376          *
377          * OTOH, the second one is bind()ed with a specific port,
378          * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
379          * resurrect the listener on the existing reuseport group.
380          */
381         err = listen(test_case->servers[1], QLEN);
382         if (!ASSERT_OK(err, "listen"))
383                 return -1;
384
385         /* Migrate from the last listener to the second one.
386          *
387          * All listeners were detached out of the reuseport_map,
388          * so migration will be done by kernel random pick from here.
389          */
390         err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
391         if (!ASSERT_OK(err, "shutdown"))
392                 return -1;
393
394         /* Back to the existing reuseport group */
395         err = listen(test_case->servers[MIGRATED_TO], QLEN);
396         if (!ASSERT_OK(err, "listen"))
397                 return -1;
398
399         /* Migrate back to the last one from the second one */
400         err = shutdown(test_case->servers[1], SHUT_RDWR);
401         if (!ASSERT_OK(err, "shutdown"))
402                 return -1;
403
404         return 0;
405 }
406
407 static void count_requests(struct migrate_reuseport_test_case *test_case,
408                            struct test_migrate_reuseport *skel)
409 {
410         struct sockaddr_storage addr;
411         socklen_t len = sizeof(addr);
412         int err, cnt = 0, client;
413         char buf[MSGLEN];
414
415         err = settimeo(test_case->servers[MIGRATED_TO], 4000);
416         if (!ASSERT_OK(err, "settimeo"))
417                 goto out;
418
419         for (; cnt < NR_CLIENTS; cnt++) {
420                 client = accept(test_case->servers[MIGRATED_TO],
421                                 (struct sockaddr *)&addr, &len);
422                 if (!ASSERT_NEQ(client, -1, "accept"))
423                         goto out;
424
425                 memset(buf, 0, MSGLEN);
426                 read(client, &buf, MSGLEN);
427                 close(client);
428
429                 if (!ASSERT_STREQ(buf, MSG, "read"))
430                         goto out;
431         }
432
433 out:
434         ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
435
436         switch (test_case->state) {
437         case BPF_TCP_ESTABLISHED:
438                 cnt = skel->bss->migrated_at_close;
439                 break;
440         case BPF_TCP_SYN_RECV:
441                 cnt = skel->bss->migrated_at_close_fastopen;
442                 break;
443         case BPF_TCP_NEW_SYN_RECV:
444                 if (test_case->expire_synack_timer)
445                         cnt = skel->bss->migrated_at_send_synack;
446                 else
447                         cnt = skel->bss->migrated_at_recv_ack;
448                 break;
449         default:
450                 cnt = 0;
451         }
452
453         ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
454 }
455
456 static void run_test(struct migrate_reuseport_test_case *test_case,
457                      struct test_migrate_reuseport *skel)
458 {
459         int err, saved_len;
460         char buf[16];
461
462         skel->bss->migrated_at_close = 0;
463         skel->bss->migrated_at_close_fastopen = 0;
464         skel->bss->migrated_at_send_synack = 0;
465         skel->bss->migrated_at_recv_ack = 0;
466
467         init_fds(test_case->servers, NR_SERVERS);
468         init_fds(test_case->clients, NR_CLIENTS);
469
470         if (test_case->fastopen) {
471                 memset(buf, 0, sizeof(buf));
472
473                 err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
474                 if (!ASSERT_OK(err, "setup_fastopen - setup"))
475                         return;
476         }
477
478         err = start_servers(test_case, skel);
479         if (!ASSERT_OK(err, "start_servers"))
480                 goto close_servers;
481
482         if (test_case->drop_ack) {
483                 /* Drop the final ACK of the 3-way handshake and stick the
484                  * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
485                  */
486                 err = drop_ack(test_case, skel);
487                 if (!ASSERT_OK(err, "drop_ack"))
488                         goto close_servers;
489         }
490
491         /* Tie requests to the first four listeners */
492         err = start_clients(test_case);
493         if (!ASSERT_OK(err, "start_clients"))
494                 goto close_clients;
495
496         err = listen(test_case->servers[MIGRATED_TO], QLEN);
497         if (!ASSERT_OK(err, "listen"))
498                 goto close_clients;
499
500         err = update_maps(test_case, skel);
501         if (!ASSERT_OK(err, "fill_maps"))
502                 goto close_clients;
503
504         /* Migrate the requests in the accept queue only.
505          * TCP_NEW_SYN_RECV requests are not migrated at this point.
506          */
507         err = migrate_dance(test_case);
508         if (!ASSERT_OK(err, "migrate_dance"))
509                 goto close_clients;
510
511         if (test_case->expire_synack_timer) {
512                 /* Wait for SYN+ACK timers to expire so that
513                  * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
514                  */
515                 sleep(1);
516         }
517
518         if (test_case->link) {
519                 /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
520                 err = pass_ack(test_case);
521                 if (!ASSERT_OK(err, "pass_ack"))
522                         goto close_clients;
523         }
524
525         count_requests(test_case, skel);
526
527 close_clients:
528         close_fds(test_case->clients, NR_CLIENTS);
529
530         if (test_case->link) {
531                 err = pass_ack(test_case);
532                 ASSERT_OK(err, "pass_ack - clean up");
533         }
534
535 close_servers:
536         close_fds(test_case->servers, NR_SERVERS);
537
538         if (test_case->fastopen) {
539                 err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
540                 ASSERT_OK(err, "setup_fastopen - restore");
541         }
542 }
543
544 void serial_test_migrate_reuseport(void)
545 {
546         struct test_migrate_reuseport *skel;
547         int i;
548
549         skel = test_migrate_reuseport__open_and_load();
550         if (!ASSERT_OK_PTR(skel, "open_and_load"))
551                 return;
552
553         for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
554                 test__start_subtest(test_cases[i].name);
555                 run_test(&test_cases[i], skel);
556         }
557
558         test_migrate_reuseport__destroy(skel);
559 }
This page took 0.06522 seconds and 4 git commands to generate.