1 // SPDX-License-Identifier: GPL-2.0
3 * Check if we can migrate child sockets.
5 * 1. call listen() for 4 server sockets.
6 * 2. call connect() for 25 client sockets.
7 * 3. call listen() for 1 server socket. (migration target)
8 * 4. update a map to migrate all child sockets
9 * to the last server socket (migrate_map[cookie] = 4)
10 * 5. call shutdown() for first 4 server sockets
11 * and migrate the requests in the accept queue
12 * to the last server socket.
13 * 6. call listen() for the second server socket.
14 * 7. call shutdown() for the last server
15 * and migrate the requests in the accept queue
16 * to the second server socket.
17 * 8. call listen() for the last server.
18 * 9. call shutdown() for the second server
19 * and migrate the requests in the accept queue
20 * to the last server socket.
21 * 10. call accept() for the last server socket.
27 #include <bpf/libbpf.h>
29 #include "test_progs.h"
30 #include "test_migrate_reuseport.skel.h"
31 #include "network_helpers.h"
33 #ifndef TCP_FASTOPEN_CONNECT
34 #define TCP_FASTOPEN_CONNECT 30
40 #define NR_CLIENTS (NR_SERVERS * 5)
41 #define MIGRATED_TO (NR_SERVERS - 1)
43 /* fastopenq->max_qlen and sk->sk_max_ack_backlog */
44 #define QLEN (NR_CLIENTS * 5)
46 #define MSG "Hello World\0"
49 static struct migrate_reuseport_test_case {
51 __s64 servers[NR_SERVERS];
52 __s64 clients[NR_CLIENTS];
53 struct sockaddr_storage addr;
58 bool expire_synack_timer;
60 struct bpf_link *link;
63 .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop",
65 .state = BPF_TCP_ESTABLISHED,
67 .expire_synack_timer = false,
71 .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop",
73 .state = BPF_TCP_SYN_RECV,
75 .expire_synack_timer = false,
79 .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
81 .state = BPF_TCP_NEW_SYN_RECV,
83 .expire_synack_timer = true,
87 .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
89 .state = BPF_TCP_NEW_SYN_RECV,
91 .expire_synack_timer = false,
95 .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop",
97 .state = BPF_TCP_ESTABLISHED,
99 .expire_synack_timer = false,
103 .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop",
105 .state = BPF_TCP_SYN_RECV,
107 .expire_synack_timer = false,
111 .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
113 .state = BPF_TCP_NEW_SYN_RECV,
115 .expire_synack_timer = true,
119 .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
121 .state = BPF_TCP_NEW_SYN_RECV,
123 .expire_synack_timer = false,
128 static void init_fds(__s64 fds[], int len)
132 for (i = 0; i < len; i++)
136 static void close_fds(__s64 fds[], int len)
140 for (i = 0; i < len; i++) {
148 static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
150 int err = 0, fd, len;
152 fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
153 if (!ASSERT_NEQ(fd, -1, "open"))
157 len = write(fd, buf, *saved_len);
158 if (!ASSERT_EQ(len, *saved_len, "write - restore"))
161 *saved_len = read(fd, buf, size);
162 if (!ASSERT_GE(*saved_len, 1, "read")) {
167 err = lseek(fd, 0, SEEK_SET);
168 if (!ASSERT_OK(err, "lseek"))
171 /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
172 * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
174 len = write(fd, "519", 3);
175 if (!ASSERT_EQ(len, 3, "write - setup"))
185 static int drop_ack(struct migrate_reuseport_test_case *test_case,
186 struct test_migrate_reuseport *skel)
188 if (test_case->family == AF_INET)
189 skel->bss->server_port = ((struct sockaddr_in *)
190 &test_case->addr)->sin_port;
192 skel->bss->server_port = ((struct sockaddr_in6 *)
193 &test_case->addr)->sin6_port;
195 test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
197 if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
203 static int pass_ack(struct migrate_reuseport_test_case *test_case)
207 err = bpf_link__destroy(test_case->link);
208 if (!ASSERT_OK(err, "bpf_link__destroy"))
211 test_case->link = NULL;
216 static int start_servers(struct migrate_reuseport_test_case *test_case,
217 struct test_migrate_reuseport *skel)
219 int i, err, prog_fd, reuseport = 1, qlen = QLEN;
221 prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
223 make_sockaddr(test_case->family,
224 test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
225 &test_case->addr, &test_case->addrlen);
227 for (i = 0; i < NR_SERVERS; i++) {
228 test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
230 if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
233 err = setsockopt(test_case->servers[i], SOL_SOCKET,
234 SO_REUSEPORT, &reuseport, sizeof(reuseport));
235 if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
238 err = bind(test_case->servers[i],
239 (struct sockaddr *)&test_case->addr,
241 if (!ASSERT_OK(err, "bind"))
245 err = setsockopt(test_case->servers[i], SOL_SOCKET,
246 SO_ATTACH_REUSEPORT_EBPF,
247 &prog_fd, sizeof(prog_fd));
249 "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
252 err = getsockname(test_case->servers[i],
253 (struct sockaddr *)&test_case->addr,
254 &test_case->addrlen);
255 if (!ASSERT_OK(err, "getsockname"))
259 if (test_case->fastopen) {
260 err = setsockopt(test_case->servers[i],
261 SOL_TCP, TCP_FASTOPEN,
262 &qlen, sizeof(qlen));
263 if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
267 /* All requests will be tied to the first four listeners */
268 if (i != MIGRATED_TO) {
269 err = listen(test_case->servers[i], qlen);
270 if (!ASSERT_OK(err, "listen"))
278 static int start_clients(struct migrate_reuseport_test_case *test_case)
280 char buf[MSGLEN] = MSG;
283 for (i = 0; i < NR_CLIENTS; i++) {
284 test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
286 if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
289 /* The attached XDP program drops only the final ACK, so
290 * clients will transition to TCP_ESTABLISHED immediately.
292 err = settimeo(test_case->clients[i], 100);
293 if (!ASSERT_OK(err, "settimeo"))
296 if (test_case->fastopen) {
299 err = setsockopt(test_case->clients[i], IPPROTO_TCP,
300 TCP_FASTOPEN_CONNECT, &fastopen,
303 "setsockopt - TCP_FASTOPEN_CONNECT"))
307 err = connect(test_case->clients[i],
308 (struct sockaddr *)&test_case->addr,
310 if (!ASSERT_OK(err, "connect"))
313 err = write(test_case->clients[i], buf, MSGLEN);
314 if (!ASSERT_EQ(err, MSGLEN, "write"))
321 static int update_maps(struct migrate_reuseport_test_case *test_case,
322 struct test_migrate_reuseport *skel)
324 int i, err, migrated_to = MIGRATED_TO;
325 int reuseport_map_fd, migrate_map_fd;
328 reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
329 migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
331 for (i = 0; i < NR_SERVERS; i++) {
332 value = (__u64)test_case->servers[i];
333 err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
335 if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
338 err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
339 if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
342 err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
344 if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
351 static int migrate_dance(struct migrate_reuseport_test_case *test_case)
355 /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
356 * to the last listener based on eBPF.
358 for (i = 0; i < MIGRATED_TO; i++) {
359 err = shutdown(test_case->servers[i], SHUT_RDWR);
360 if (!ASSERT_OK(err, "shutdown"))
364 /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
365 if (test_case->state == BPF_TCP_NEW_SYN_RECV)
368 /* Note that we use the second listener instead of the
371 * The fist listener is bind()ed with port 0 and,
372 * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
373 * calling listen() again will bind() the first listener
374 * on a new ephemeral port and detach it from the existing
375 * reuseport group. (See: __inet_bind(), tcp_set_state())
377 * OTOH, the second one is bind()ed with a specific port,
378 * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
379 * resurrect the listener on the existing reuseport group.
381 err = listen(test_case->servers[1], QLEN);
382 if (!ASSERT_OK(err, "listen"))
385 /* Migrate from the last listener to the second one.
387 * All listeners were detached out of the reuseport_map,
388 * so migration will be done by kernel random pick from here.
390 err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
391 if (!ASSERT_OK(err, "shutdown"))
394 /* Back to the existing reuseport group */
395 err = listen(test_case->servers[MIGRATED_TO], QLEN);
396 if (!ASSERT_OK(err, "listen"))
399 /* Migrate back to the last one from the second one */
400 err = shutdown(test_case->servers[1], SHUT_RDWR);
401 if (!ASSERT_OK(err, "shutdown"))
407 static void count_requests(struct migrate_reuseport_test_case *test_case,
408 struct test_migrate_reuseport *skel)
410 struct sockaddr_storage addr;
411 socklen_t len = sizeof(addr);
412 int err, cnt = 0, client;
415 err = settimeo(test_case->servers[MIGRATED_TO], 4000);
416 if (!ASSERT_OK(err, "settimeo"))
419 for (; cnt < NR_CLIENTS; cnt++) {
420 client = accept(test_case->servers[MIGRATED_TO],
421 (struct sockaddr *)&addr, &len);
422 if (!ASSERT_NEQ(client, -1, "accept"))
425 memset(buf, 0, MSGLEN);
426 read(client, &buf, MSGLEN);
429 if (!ASSERT_STREQ(buf, MSG, "read"))
434 ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
436 switch (test_case->state) {
437 case BPF_TCP_ESTABLISHED:
438 cnt = skel->bss->migrated_at_close;
440 case BPF_TCP_SYN_RECV:
441 cnt = skel->bss->migrated_at_close_fastopen;
443 case BPF_TCP_NEW_SYN_RECV:
444 if (test_case->expire_synack_timer)
445 cnt = skel->bss->migrated_at_send_synack;
447 cnt = skel->bss->migrated_at_recv_ack;
453 ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
456 static void run_test(struct migrate_reuseport_test_case *test_case,
457 struct test_migrate_reuseport *skel)
462 skel->bss->migrated_at_close = 0;
463 skel->bss->migrated_at_close_fastopen = 0;
464 skel->bss->migrated_at_send_synack = 0;
465 skel->bss->migrated_at_recv_ack = 0;
467 init_fds(test_case->servers, NR_SERVERS);
468 init_fds(test_case->clients, NR_CLIENTS);
470 if (test_case->fastopen) {
471 memset(buf, 0, sizeof(buf));
473 err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
474 if (!ASSERT_OK(err, "setup_fastopen - setup"))
478 err = start_servers(test_case, skel);
479 if (!ASSERT_OK(err, "start_servers"))
482 if (test_case->drop_ack) {
483 /* Drop the final ACK of the 3-way handshake and stick the
484 * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
486 err = drop_ack(test_case, skel);
487 if (!ASSERT_OK(err, "drop_ack"))
491 /* Tie requests to the first four listeners */
492 err = start_clients(test_case);
493 if (!ASSERT_OK(err, "start_clients"))
496 err = listen(test_case->servers[MIGRATED_TO], QLEN);
497 if (!ASSERT_OK(err, "listen"))
500 err = update_maps(test_case, skel);
501 if (!ASSERT_OK(err, "fill_maps"))
504 /* Migrate the requests in the accept queue only.
505 * TCP_NEW_SYN_RECV requests are not migrated at this point.
507 err = migrate_dance(test_case);
508 if (!ASSERT_OK(err, "migrate_dance"))
511 if (test_case->expire_synack_timer) {
512 /* Wait for SYN+ACK timers to expire so that
513 * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
518 if (test_case->link) {
519 /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
520 err = pass_ack(test_case);
521 if (!ASSERT_OK(err, "pass_ack"))
525 count_requests(test_case, skel);
528 close_fds(test_case->clients, NR_CLIENTS);
530 if (test_case->link) {
531 err = pass_ack(test_case);
532 ASSERT_OK(err, "pass_ack - clean up");
536 close_fds(test_case->servers, NR_SERVERS);
538 if (test_case->fastopen) {
539 err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
540 ASSERT_OK(err, "setup_fastopen - restore");
544 void serial_test_migrate_reuseport(void)
546 struct test_migrate_reuseport *skel;
549 skel = test_migrate_reuseport__open_and_load();
550 if (!ASSERT_OK_PTR(skel, "open_and_load"))
553 for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
554 test__start_subtest(test_cases[i].name);
555 run_test(&test_cases[i], skel);
558 test_migrate_reuseport__destroy(skel);