1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
5 #include <sys/socket.h>
7 #include <sys/select.h>
8 #include <netinet/in.h>
21 #include <sys/types.h>
22 #include <sys/sendfile.h>
24 #include <linux/netlink.h>
25 #include <linux/socket.h>
26 #include <linux/sock_diag.h>
27 #include <linux/bpf.h>
28 #include <linux/if_link.h>
29 #include <linux/tls.h>
36 #include <bpf/libbpf.h>
39 #include "cgroup_helpers.h"
42 static void running_handler(int a);
51 /* randomly selected ports for testing on lo */
55 #define BPF_SOCKMAP_FILENAME "test_sockmap_kern.bpf.o"
56 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.bpf.o"
57 #define CG_PATH "/sockmap"
60 int s1, s2, c1, c2, p1, p2;
65 struct bpf_map *maps[9];
82 int txmsg_ktls_skb_drop;
83 int txmsg_ktls_skb_redir;
87 int txmsg_omit_skb_parser;
89 static const struct option long_options[] = {
90 {"help", no_argument, NULL, 'h' },
91 {"cgroup", required_argument, NULL, 'c' },
92 {"rate", required_argument, NULL, 'r' },
93 {"verbose", optional_argument, NULL, 'v' },
94 {"iov_count", required_argument, NULL, 'i' },
95 {"length", required_argument, NULL, 'l' },
96 {"test", required_argument, NULL, 't' },
97 {"data_test", no_argument, NULL, 'd' },
98 {"txmsg", no_argument, &txmsg_pass, 1 },
99 {"txmsg_redir", no_argument, &txmsg_redir, 1 },
100 {"txmsg_drop", no_argument, &txmsg_drop, 1 },
101 {"txmsg_apply", required_argument, NULL, 'a'},
102 {"txmsg_cork", required_argument, NULL, 'k'},
103 {"txmsg_start", required_argument, NULL, 's'},
104 {"txmsg_end", required_argument, NULL, 'e'},
105 {"txmsg_start_push", required_argument, NULL, 'p'},
106 {"txmsg_end_push", required_argument, NULL, 'q'},
107 {"txmsg_start_pop", required_argument, NULL, 'w'},
108 {"txmsg_pop", required_argument, NULL, 'x'},
109 {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
110 {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
111 {"ktls", no_argument, &ktls, 1 },
112 {"peek", no_argument, &peek_flag, 1 },
113 {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1},
114 {"whitelist", required_argument, NULL, 'n' },
115 {"blacklist", required_argument, NULL, 'b' },
134 struct sockmap_options {
140 bool check_recved_len;
153 void (*tester)(int cg_fd, struct sockmap_options *opt);
156 static void test_start(void)
161 static void test_fail(void)
166 static void test_pass(void)
171 static void test_reset(void)
173 txmsg_start = txmsg_end = 0;
174 txmsg_start_pop = txmsg_pop = 0;
175 txmsg_start_push = txmsg_end_push = 0;
176 txmsg_pass = txmsg_drop = txmsg_redir = 0;
177 txmsg_apply = txmsg_cork = 0;
178 txmsg_ingress = txmsg_redir_skb = 0;
179 txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
180 txmsg_omit_skb_parser = 0;
184 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
187 env.subtest = t->title;
188 env.prepend = o->prepend;
191 env.fail_last = env.fail_cnt;
196 static void test_end_subtest(void)
198 int error = env.fail_cnt - env.fail_last;
199 int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
204 fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
205 env.test_num, env.subtest_num,
206 !type ? "sockmap" : "sockhash",
208 env.subtest, error ? "FAIL" : "OK");
211 static void test_print_results(void)
213 fprintf(stdout, "Pass: %d Fail: %d\n",
214 env.succ_cnt, env.fail_cnt);
217 static void usage(char *argv[])
221 printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
222 printf(" options:\n");
223 for (i = 0; long_options[i].name != 0; i++) {
224 printf(" --%-12s", long_options[i].name);
225 if (long_options[i].flag != NULL)
226 printf(" flag (internal value:%d)\n",
227 *long_options[i].flag);
229 printf(" -%c\n", long_options[i].val);
234 char *sock_to_string(int s)
252 static int sockmap_init_ktls(int verbose, int s)
254 struct tls12_crypto_info_aes_gcm_128 tls_tx = {
256 .version = TLS_1_2_VERSION,
257 .cipher_type = TLS_CIPHER_AES_GCM_128,
260 struct tls12_crypto_info_aes_gcm_128 tls_rx = {
262 .version = TLS_1_2_VERSION,
263 .cipher_type = TLS_CIPHER_AES_GCM_128,
266 int so_buf = 6553500;
269 err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
271 fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
274 err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
276 fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
279 err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
281 fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
284 err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
286 fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
289 err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
291 fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
296 fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
299 static int sockmap_init_sockets(int verbose)
302 struct sockaddr_in addr;
303 int *fds[4] = {&s1, &s2, &c1, &c2};
305 s1 = s2 = p1 = p2 = c1 = c2 = 0;
308 for (i = 0; i < 4; i++) {
309 *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
311 perror("socket s1 failed()");
317 for (i = 0; i < 2; i++) {
318 err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
319 (char *)&one, sizeof(one));
321 perror("setsockopt failed()");
326 /* Non-blocking sockets */
327 for (i = 0; i < 2; i++) {
328 err = ioctl(*fds[i], FIONBIO, (char *)&one);
330 perror("ioctl s1 failed()");
335 /* Bind server sockets */
336 memset(&addr, 0, sizeof(struct sockaddr_in));
337 addr.sin_family = AF_INET;
338 addr.sin_addr.s_addr = inet_addr("127.0.0.1");
340 addr.sin_port = htons(S1_PORT);
341 err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
343 perror("bind s1 failed()");
347 addr.sin_port = htons(S2_PORT);
348 err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
350 perror("bind s2 failed()");
354 /* Listen server sockets */
355 addr.sin_port = htons(S1_PORT);
356 err = listen(s1, 32);
358 perror("listen s1 failed()");
362 addr.sin_port = htons(S2_PORT);
363 err = listen(s2, 32);
365 perror("listen s1 failed()");
369 /* Initiate Connect */
370 addr.sin_port = htons(S1_PORT);
371 err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
372 if (err < 0 && errno != EINPROGRESS) {
373 perror("connect c1 failed()");
377 addr.sin_port = htons(S2_PORT);
378 err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
379 if (err < 0 && errno != EINPROGRESS) {
380 perror("connect c2 failed()");
382 } else if (err < 0) {
386 /* Accept Connecrtions */
387 p1 = accept(s1, NULL, NULL);
389 perror("accept s1 failed()");
393 p2 = accept(s2, NULL, NULL);
395 perror("accept s1 failed()");
400 printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
401 printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
410 struct timespec start;
414 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
416 struct sockmap_options *opt)
418 bool drop = opt->drop_expected;
425 perror("create file for sendpage");
428 for (i = 0; i < iov_length * cnt; i++, k++)
429 fwrite(&k, sizeof(char), 1, file);
431 fseek(file, 0, SEEK_SET);
435 clock_gettime(CLOCK_MONOTONIC, &s->start);
436 for (i = 0; i < cnt; i++) {
440 sent = sendfile(fd, fp, NULL, iov_length);
442 if (!drop && sent < 0) {
443 perror("sendpage loop error");
446 } else if (drop && sent >= 0) {
447 printf("sendpage loop error expected: %i errno %i\n",
454 s->bytes_sent += sent;
456 clock_gettime(CLOCK_MONOTONIC, &s->end);
461 static void msg_free_iov(struct msghdr *msg)
465 for (i = 0; i < msg->msg_iovlen; i++)
466 free(msg->msg_iov[i].iov_base);
472 static int msg_alloc_iov(struct msghdr *msg,
473 int iov_count, int iov_length,
474 bool data, bool xmit)
480 iov = calloc(iov_count, sizeof(struct iovec));
484 for (i = 0; i < iov_count; i++) {
485 unsigned char *d = calloc(iov_length, sizeof(char));
488 fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
492 iov[i].iov_len = iov_length;
497 for (j = 0; j < iov_length; j++)
503 msg->msg_iovlen = iov_count;
507 for (i--; i >= 0 ; i--)
508 free(msg->msg_iov[i].iov_base);
512 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
514 int i, j = 0, bytes_cnt = 0;
517 for (i = 0; i < msg->msg_iovlen; i++) {
518 unsigned char *d = msg->msg_iov[i].iov_base;
520 /* Special case test for skb ingress + ktls */
521 if (i == 0 && txmsg_ktls_skb) {
522 if (msg->msg_iov[i].iov_len < 4)
524 if (memcmp(d, "PASS", 4) != 0) {
526 "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
527 i, 0, d[0], d[1], d[2], d[3]);
530 j = 4; /* advance index past PASS header */
533 for (; j < msg->msg_iov[i].iov_len && size; j++) {
536 "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
537 i, j, d[j], k - 1, d[j+1], k);
541 if (bytes_cnt == chunk_sz) {
551 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
552 struct msg_stats *s, bool tx,
553 struct sockmap_options *opt)
555 struct msghdr msg = {0}, msg_peek = {0};
556 int err, i, flags = MSG_NOSIGNAL;
557 bool drop = opt->drop_expected;
558 bool data = opt->data_test;
559 int iov_alloc_length = iov_length;
561 if (!tx && opt->check_recved_len)
562 iov_alloc_length *= 2;
564 err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx);
568 err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
574 clock_gettime(CLOCK_MONOTONIC, &s->start);
575 for (i = 0; i < cnt; i++) {
579 sent = sendmsg(fd, &msg, flags);
581 if (!drop && sent < 0) {
582 if (opt->tx_wait_mem && errno == EACCES) {
586 perror("sendmsg loop error");
588 } else if (drop && sent >= 0) {
590 "sendmsg loop error expected: %i errno %i\n",
596 s->bytes_sent += sent;
598 clock_gettime(CLOCK_MONOTONIC, &s->end);
600 int slct, recvp = 0, recv, max_fd = fd;
601 float total_bytes, txmsg_pop_total;
602 int fd_flags = O_NONBLOCK;
603 struct timeval timeout;
607 /* Account for pop bytes noting each iteration of apply will
608 * call msg_pop_data helper so we need to account for this
609 * by calculating the number of apply iterations. Note user
610 * of the tool can create cases where no data is sent by
611 * manipulating pop/push/pull/etc. For example txmsg_apply 1
612 * with txmsg_pop 1 will try to apply 1B at a time but each
613 * iteration will then pop 1B so no data will ever be sent.
614 * This is really only useful for testing edge cases in code
617 total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
619 txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
621 txmsg_pop_total = txmsg_pop * cnt;
622 total_bytes -= txmsg_pop_total;
623 err = clock_gettime(CLOCK_MONOTONIC, &s->start);
625 perror("recv start time");
626 while (s->bytes_recvd < total_bytes) {
629 timeout.tv_usec = 300000;
639 slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
642 clock_gettime(CLOCK_MONOTONIC, &s->end);
646 fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
648 clock_gettime(CLOCK_MONOTONIC, &s->end);
652 if (opt->tx_wait_mem) {
655 slct = select(max_fd + 1, NULL, NULL, &w, &timeout);
664 recvp = recvmsg(fd, &msg_peek, flags);
666 if (errno != EWOULDBLOCK) {
667 clock_gettime(CLOCK_MONOTONIC, &s->end);
674 recv = recvmsg(fd, &msg, flags);
676 if (errno != EWOULDBLOCK) {
677 clock_gettime(CLOCK_MONOTONIC, &s->end);
678 perror("recv failed()");
683 s->bytes_recvd += recv;
685 if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
687 fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n",
688 s->bytes_recvd, total_bytes);
693 int chunk_sz = opt->sendpage ?
695 iov_length * iov_count;
697 errno = msg_verify_data(&msg, recv, chunk_sz);
699 perror("data verify msg failed");
703 errno = msg_verify_data(&msg_peek,
707 perror("data verify msg_peek failed");
713 clock_gettime(CLOCK_MONOTONIC, &s->end);
717 msg_free_iov(&msg_peek);
721 msg_free_iov(&msg_peek);
725 static float giga = 1000000000;
727 static inline float sentBps(struct msg_stats s)
729 return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
732 static inline float recvdBps(struct msg_stats s)
734 return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
737 static int sendmsg_test(struct sockmap_options *opt)
739 float sent_Bps = 0, recvd_Bps = 0;
740 int rx_fd, txpid, rxpid, err = 0;
741 struct msg_stats s = {0};
742 int iov_count = opt->iov_count;
743 int iov_buf = opt->iov_length;
744 int rx_status, tx_status;
755 /* Redirecting into non-TLS socket which sends into a TLS
756 * socket is not a valid test. So in this case lets not
757 * enable kTLS but still run the test.
759 if (!txmsg_redir || txmsg_ingress) {
760 err = sockmap_init_ktls(opt->verbose, rx_fd);
764 err = sockmap_init_ktls(opt->verbose, c1);
769 if (opt->tx_wait_mem) {
770 struct timeval timeout;
771 int rxtx_buf_len = 1024;
776 err = setsockopt(c2, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(struct timeval));
777 err |= setsockopt(c2, SOL_SOCKET, SO_SNDBUFFORCE, &rxtx_buf_len, sizeof(int));
778 err |= setsockopt(p2, SOL_SOCKET, SO_RCVBUFFORCE, &rxtx_buf_len, sizeof(int));
780 perror("setsockopt failed()");
787 if (txmsg_pop || txmsg_start_pop)
788 iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
789 if (opt->drop_expected || txmsg_ktls_skb_drop)
792 if (!iov_buf) /* zero bytes sent case */
797 err = msg_loop(rx_fd, iov_count, iov_buf,
798 cnt, &s, false, opt);
799 if (opt->verbose > 1)
801 "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
802 iov_count, iov_buf, cnt, err);
803 if (s.end.tv_sec - s.start.tv_sec) {
804 sent_Bps = sentBps(s);
805 recvd_Bps = recvdBps(s);
807 if (opt->verbose > 1)
809 "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
810 s.bytes_sent, sent_Bps, sent_Bps/giga,
811 s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
812 peek_flag ? "(peek_msg)" : "");
813 if (err && txmsg_cork)
816 } else if (rxpid == -1) {
817 perror("msg_loop_rx");
821 if (opt->tx_wait_mem)
827 err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
829 err = msg_loop(c1, iov_count, iov_buf,
834 "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
835 iov_count, iov_buf, cnt, err);
836 if (s.end.tv_sec - s.start.tv_sec) {
837 sent_Bps = sentBps(s);
838 recvd_Bps = recvdBps(s);
840 if (opt->verbose > 1)
842 "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
843 s.bytes_sent, sent_Bps, sent_Bps/giga,
844 s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
846 } else if (txpid == -1) {
847 perror("msg_loop_tx");
851 assert(waitpid(rxpid, &rx_status, 0) == rxpid);
852 assert(waitpid(txpid, &tx_status, 0) == txpid);
853 if (WIFEXITED(rx_status)) {
854 err = WEXITSTATUS(rx_status);
856 fprintf(stderr, "rx thread exited with err %d.\n", err);
860 if (WIFEXITED(tx_status)) {
861 err = WEXITSTATUS(tx_status);
863 fprintf(stderr, "tx thread exited with err %d.\n", err);
869 static int forever_ping_pong(int rate, struct sockmap_options *opt)
871 struct timeval timeout;
872 char buf[1024] = {0};
878 /* Ping/Pong data from client to server */
879 sc = send(c1, buf, sizeof(buf), 0);
881 perror("send failed()");
886 int s, rc, i, max_fd = p2;
896 s = select(max_fd + 1, &w, NULL, NULL, &timeout);
901 fprintf(stderr, "unexpected timeout\n");
905 for (i = 0; i <= max_fd && s > 0; ++i) {
906 if (!FD_ISSET(i, &w))
911 rc = recv(i, buf, sizeof(buf), 0);
913 if (errno != EWOULDBLOCK) {
914 perror("recv failed()");
924 sc = send(i, buf, rc, 0);
926 perror("send failed()");
953 static int run_options(struct sockmap_options *options, int cg_fd, int test)
955 int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
957 /* If base test skip BPF setup */
958 if (test == BASE || test == BASE_SENDPAGE)
961 /* Attach programs to sockmap */
962 if (!txmsg_omit_skb_parser) {
963 err = bpf_prog_attach(prog_fd[0], map_fd[0],
964 BPF_SK_SKB_STREAM_PARSER, 0);
967 "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
968 prog_fd[0], map_fd[0], err, strerror(errno));
973 err = bpf_prog_attach(prog_fd[1], map_fd[0],
974 BPF_SK_SKB_STREAM_VERDICT, 0);
976 fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
977 err, strerror(errno));
981 /* Attach programs to TLS sockmap */
982 if (txmsg_ktls_skb) {
983 if (!txmsg_omit_skb_parser) {
984 err = bpf_prog_attach(prog_fd[0], map_fd[8],
985 BPF_SK_SKB_STREAM_PARSER, 0);
988 "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
989 prog_fd[0], map_fd[8], err, strerror(errno));
994 err = bpf_prog_attach(prog_fd[2], map_fd[8],
995 BPF_SK_SKB_STREAM_VERDICT, 0);
997 fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
998 err, strerror(errno));
1003 /* Attach to cgroups */
1004 err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
1006 fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
1007 err, strerror(errno));
1012 err = sockmap_init_sockets(options->verbose);
1014 fprintf(stderr, "ERROR: test socket failed: %d\n", err);
1018 /* Attach txmsg program to sockmap */
1020 tx_prog_fd = prog_fd[4];
1021 else if (txmsg_redir)
1022 tx_prog_fd = prog_fd[5];
1023 else if (txmsg_apply)
1024 tx_prog_fd = prog_fd[6];
1025 else if (txmsg_cork)
1026 tx_prog_fd = prog_fd[7];
1027 else if (txmsg_drop)
1028 tx_prog_fd = prog_fd[8];
1033 int redir_fd, i = 0;
1035 err = bpf_prog_attach(tx_prog_fd,
1036 map_fd[1], BPF_SK_MSG_VERDICT, 0);
1039 "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
1040 err, strerror(errno));
1044 err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
1047 "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
1048 err, strerror(errno));
1057 err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
1060 "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
1061 err, strerror(errno));
1066 err = bpf_map_update_elem(map_fd[3],
1067 &i, &txmsg_apply, BPF_ANY);
1070 "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
1071 err, strerror(errno));
1077 err = bpf_map_update_elem(map_fd[4],
1078 &i, &txmsg_cork, BPF_ANY);
1081 "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
1082 err, strerror(errno));
1088 err = bpf_map_update_elem(map_fd[5],
1089 &i, &txmsg_start, BPF_ANY);
1092 "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
1093 err, strerror(errno));
1100 err = bpf_map_update_elem(map_fd[5],
1101 &i, &txmsg_end, BPF_ANY);
1104 "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
1105 err, strerror(errno));
1110 if (txmsg_start_push) {
1112 err = bpf_map_update_elem(map_fd[5],
1113 &i, &txmsg_start_push, BPF_ANY);
1116 "ERROR: bpf_map_update_elem (txmsg_start_push): %d (%s)\n",
1117 err, strerror(errno));
1122 if (txmsg_end_push) {
1124 err = bpf_map_update_elem(map_fd[5],
1125 &i, &txmsg_end_push, BPF_ANY);
1128 "ERROR: bpf_map_update_elem %i@%i (txmsg_end_push): %d (%s)\n",
1129 txmsg_end_push, i, err, strerror(errno));
1134 if (txmsg_start_pop) {
1136 err = bpf_map_update_elem(map_fd[5],
1137 &i, &txmsg_start_pop, BPF_ANY);
1140 "ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop): %d (%s)\n",
1141 txmsg_start_pop, i, err, strerror(errno));
1146 bpf_map_update_elem(map_fd[5],
1147 &i, &txmsg_start_pop, BPF_ANY);
1152 err = bpf_map_update_elem(map_fd[5],
1153 &i, &txmsg_pop, BPF_ANY);
1156 "ERROR: bpf_map_update_elem %i@%i (txmsg_pop): %d (%s)\n",
1157 txmsg_pop, i, err, strerror(errno));
1162 bpf_map_update_elem(map_fd[5],
1163 &i, &txmsg_pop, BPF_ANY);
1167 if (txmsg_ingress) {
1168 int in = BPF_F_INGRESS;
1171 err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
1174 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1175 err, strerror(errno));
1178 err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
1181 "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
1182 err, strerror(errno));
1184 err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
1187 "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
1188 err, strerror(errno));
1192 err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
1195 "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
1196 err, strerror(errno));
1200 if (txmsg_ktls_skb) {
1201 int ingress = BPF_F_INGRESS;
1204 err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
1207 "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1208 err, strerror(errno));
1211 if (txmsg_ktls_skb_redir) {
1213 err = bpf_map_update_elem(map_fd[7],
1214 &i, &ingress, BPF_ANY);
1217 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1218 err, strerror(errno));
1222 if (txmsg_ktls_skb_drop) {
1224 err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
1228 if (txmsg_redir_skb) {
1229 int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
1231 int ingress = BPF_F_INGRESS;
1234 err = bpf_map_update_elem(map_fd[7],
1235 &i, &ingress, BPF_ANY);
1238 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1239 err, strerror(errno));
1243 err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
1246 "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1247 err, strerror(errno));
1252 if (skb_use_parser) {
1254 err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY);
1258 options->drop_expected = true;
1260 if (test == PING_PONG)
1261 err = forever_ping_pong(options->rate, options);
1262 else if (test == SENDMSG) {
1263 options->base = false;
1264 options->sendpage = false;
1265 err = sendmsg_test(options);
1266 } else if (test == SENDPAGE) {
1267 options->base = false;
1268 options->sendpage = true;
1269 err = sendmsg_test(options);
1270 } else if (test == BASE) {
1271 options->base = true;
1272 options->sendpage = false;
1273 err = sendmsg_test(options);
1274 } else if (test == BASE_SENDPAGE) {
1275 options->base = true;
1276 options->sendpage = true;
1277 err = sendmsg_test(options);
1279 fprintf(stderr, "unknown test\n");
1281 /* Detatch and zero all the maps */
1282 bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
1283 bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
1284 bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
1285 bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
1286 bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
1288 if (tx_prog_fd >= 0)
1289 bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
1291 for (i = 0; i < 8; i++) {
1293 bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1294 while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
1295 bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1309 static char *test_to_str(int test)
1320 static void append_str(char *dst, const char *src, size_t dst_cap)
1322 size_t avail = dst_cap - strlen(dst);
1324 if (avail <= 1) /* just zero byte could be written */
1327 strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
1330 #define OPTSTRING 60
1331 static void test_options(char *options)
1333 char tstr[OPTSTRING];
1335 memset(options, 0, OPTSTRING);
1338 append_str(options, "pass,", OPTSTRING);
1340 append_str(options, "redir,", OPTSTRING);
1342 append_str(options, "drop,", OPTSTRING);
1344 snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
1345 append_str(options, tstr, OPTSTRING);
1348 snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
1349 append_str(options, tstr, OPTSTRING);
1352 snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
1353 append_str(options, tstr, OPTSTRING);
1356 snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
1357 append_str(options, tstr, OPTSTRING);
1359 if (txmsg_start_pop) {
1360 snprintf(tstr, OPTSTRING, "pop (%d,%d),",
1361 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
1362 append_str(options, tstr, OPTSTRING);
1365 append_str(options, "ingress,", OPTSTRING);
1366 if (txmsg_redir_skb)
1367 append_str(options, "redir_skb,", OPTSTRING);
1369 append_str(options, "ktls_skb,", OPTSTRING);
1371 append_str(options, "ktls,", OPTSTRING);
1373 append_str(options, "peek,", OPTSTRING);
1376 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
1378 char *options = calloc(OPTSTRING, sizeof(char));
1381 if (test == SENDPAGE)
1382 opt->sendpage = true;
1384 opt->sendpage = false;
1387 opt->drop_expected = true;
1389 opt->drop_expected = false;
1391 test_options(options);
1395 " [TEST %i]: (%i, %i, %i, %s, %s): ",
1396 test_cnt, opt->rate, opt->iov_count, opt->iov_length,
1397 test_to_str(test), options);
1400 err = run_options(opt, cgrp, test);
1402 fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
1404 !err ? passed++ : failed++;
1409 static void test_exec(int cgrp, struct sockmap_options *opt)
1411 int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
1416 err = __test_exec(cgrp, SENDMSG, opt);
1421 err = __test_exec(cgrp, SENDPAGE, opt);
1427 static void test_send_one(struct sockmap_options *opt, int cgrp)
1429 opt->iov_length = 1;
1432 test_exec(cgrp, opt);
1434 opt->iov_length = 1;
1435 opt->iov_count = 1024;
1437 test_exec(cgrp, opt);
1439 opt->iov_length = 1024;
1442 test_exec(cgrp, opt);
1446 static void test_send_many(struct sockmap_options *opt, int cgrp)
1448 opt->iov_length = 3;
1451 test_exec(cgrp, opt);
1455 opt->iov_length = 5;
1456 test_exec(cgrp, opt);
1459 static void test_send_large(struct sockmap_options *opt, int cgrp)
1461 opt->iov_length = 256;
1462 opt->iov_count = 1024;
1464 test_exec(cgrp, opt);
1467 static void test_send(struct sockmap_options *opt, int cgrp)
1469 test_send_one(opt, cgrp);
1470 test_send_many(opt, cgrp);
1471 test_send_large(opt, cgrp);
1475 static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
1477 /* Test small and large iov_count values with pass/redir/apply/cork */
1479 test_send(opt, cgrp);
1482 static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
1485 test_send(opt, cgrp);
1488 static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
1491 opt->tx_wait_mem = true;
1492 test_send_large(opt, cgrp);
1493 opt->tx_wait_mem = false;
1496 static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
1499 test_send(opt, cgrp);
1502 static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
1504 txmsg_pass = txmsg_drop = 0;
1505 txmsg_ingress = txmsg_redir = 1;
1506 test_send(opt, cgrp);
1509 static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
1511 bool data = opt->data_test;
1514 opt->data_test = true;
1517 txmsg_pass = txmsg_drop = 0;
1518 txmsg_ingress = txmsg_redir = 0;
1522 /* Using data verification so ensure iov layout is
1523 * expected from test receiver side. e.g. has enough
1524 * bytes to write test code.
1526 opt->iov_length = 100;
1529 test_exec(cgrp, opt);
1531 txmsg_ktls_skb_drop = 1;
1532 test_exec(cgrp, opt);
1534 txmsg_ktls_skb_drop = 0;
1535 txmsg_ktls_skb_redir = 1;
1536 test_exec(cgrp, opt);
1537 txmsg_ktls_skb_redir = 0;
1539 /* Tests that omit skb_parser */
1540 txmsg_omit_skb_parser = 1;
1543 test_exec(cgrp, opt);
1545 txmsg_ktls_skb_drop = 1;
1546 test_exec(cgrp, opt);
1547 txmsg_ktls_skb_drop = 0;
1549 txmsg_ktls_skb_redir = 1;
1550 test_exec(cgrp, opt);
1553 test_exec(cgrp, opt);
1554 txmsg_omit_skb_parser = 0;
1556 opt->data_test = data;
1560 /* Test cork with hung data. This tests poor usage patterns where
1561 * cork can leave data on the ring if user program is buggy and
1562 * doesn't flush them somehow. They do take some time however
1563 * because they wait for a timeout. Test pass, redir and cork with
1564 * apply logic. Use cork size of 4097 with send_large to avoid
1565 * aligning cork size with send size.
1567 static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
1573 test_send_large(opt, cgrp);
1579 test_send_large(opt, cgrp);
1585 test_send_large(opt, cgrp);
1588 static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
1590 /* Test basic start/end */
1593 test_send(opt, cgrp);
1598 test_send_large(opt, cgrp);
1600 /* Test pull + redirect */
1604 test_send(opt, cgrp);
1606 /* Test pull + cork */
1611 test_send_many(opt, cgrp);
1613 /* Test pull + cork + redirect */
1618 test_send_many(opt, cgrp);
1621 static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
1623 /* Test basic pop */
1624 txmsg_start_pop = 1;
1626 test_send_many(opt, cgrp);
1628 /* Test pop with >4k */
1629 txmsg_start_pop = 4096;
1631 test_send_large(opt, cgrp);
1633 /* Test pop + redirect */
1635 txmsg_start_pop = 1;
1637 test_send_many(opt, cgrp);
1639 /* Test pop + cork */
1642 txmsg_start_pop = 1;
1644 test_send_many(opt, cgrp);
1646 /* Test pop + redirect + cork */
1649 txmsg_start_pop = 1;
1651 test_send_many(opt, cgrp);
1654 static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
1656 /* Test basic push */
1657 txmsg_start_push = 1;
1659 test_send(opt, cgrp);
1661 /* Test push 4kB >4k */
1662 txmsg_start_push = 4096;
1663 txmsg_end_push = 4096;
1664 test_send_large(opt, cgrp);
1666 /* Test push + redirect */
1668 txmsg_start_push = 1;
1670 test_send_many(opt, cgrp);
1672 /* Test push + cork */
1675 txmsg_start_push = 1;
1677 test_send_many(opt, cgrp);
1680 static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
1682 txmsg_start_push = 1;
1683 txmsg_end_push = 10;
1684 txmsg_start_pop = 5;
1686 test_send_large(opt, cgrp);
1689 static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
1696 test_send_one(opt, cgrp);
1703 test_send_one(opt, cgrp);
1710 test_send_one(opt, cgrp);
1717 test_send_large(opt, cgrp);
1724 test_send_large(opt, cgrp);
1731 test_send_large(opt, cgrp);
1734 static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
1740 test_send(opt, cgrp);
1746 test_send(opt, cgrp);
1749 static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
1752 skb_use_parser = 512;
1754 skb_use_parser = 570;
1755 opt->iov_length = 256;
1758 test_exec(cgrp, opt);
1761 static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt)
1765 skb_use_parser = 10;
1766 opt->iov_length = 20;
1769 opt->check_recved_len = true;
1770 test_exec(cgrp, opt);
1771 opt->check_recved_len = false;
1774 char *map_names[] = {
1786 int prog_attach_type[] = {
1787 BPF_SK_SKB_STREAM_PARSER,
1788 BPF_SK_SKB_STREAM_VERDICT,
1789 BPF_SK_SKB_STREAM_VERDICT,
1790 BPF_CGROUP_SOCK_OPS,
1801 BPF_PROG_TYPE_SK_SKB,
1802 BPF_PROG_TYPE_SK_SKB,
1803 BPF_PROG_TYPE_SK_SKB,
1804 BPF_PROG_TYPE_SOCK_OPS,
1805 BPF_PROG_TYPE_SK_MSG,
1806 BPF_PROG_TYPE_SK_MSG,
1807 BPF_PROG_TYPE_SK_MSG,
1808 BPF_PROG_TYPE_SK_MSG,
1809 BPF_PROG_TYPE_SK_MSG,
1810 BPF_PROG_TYPE_SK_MSG,
1811 BPF_PROG_TYPE_SK_MSG,
1814 static int populate_progs(char *bpf_file)
1816 struct bpf_program *prog;
1817 struct bpf_object *obj;
1821 obj = bpf_object__open(bpf_file);
1822 err = libbpf_get_error(obj);
1826 libbpf_strerror(err, err_buf, sizeof(err_buf));
1827 printf("Unable to load eBPF objects in file '%s' : %s\n",
1832 bpf_object__for_each_program(prog, obj) {
1833 bpf_program__set_type(prog, prog_type[i]);
1834 bpf_program__set_expected_attach_type(prog,
1835 prog_attach_type[i]);
1839 i = bpf_object__load(obj);
1841 bpf_object__for_each_program(prog, obj) {
1842 prog_fd[i] = bpf_program__fd(prog);
1846 for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
1847 maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
1848 map_fd[i] = bpf_map__fd(maps[i]);
1849 if (map_fd[i] < 0) {
1850 fprintf(stderr, "load_bpf_file: (%i) %s\n",
1851 map_fd[i], strerror(errno));
1859 struct _test test[] = {
1860 {"txmsg test passthrough", test_txmsg_pass},
1861 {"txmsg test redirect", test_txmsg_redir},
1862 {"txmsg test redirect wait send mem", test_txmsg_redir_wait_sndmem},
1863 {"txmsg test drop", test_txmsg_drop},
1864 {"txmsg test ingress redirect", test_txmsg_ingress_redir},
1865 {"txmsg test skb", test_txmsg_skb},
1866 {"txmsg test apply", test_txmsg_apply},
1867 {"txmsg test cork", test_txmsg_cork},
1868 {"txmsg test hanging corks", test_txmsg_cork_hangs},
1869 {"txmsg test push_data", test_txmsg_push},
1870 {"txmsg test pull-data", test_txmsg_pull},
1871 {"txmsg test pop-data", test_txmsg_pop},
1872 {"txmsg test push/pop data", test_txmsg_push_pop},
1873 {"txmsg test ingress parser", test_txmsg_ingress_parser},
1874 {"txmsg test ingress parser2", test_txmsg_ingress_parser2},
1877 static int check_whitelist(struct _test *t, struct sockmap_options *opt)
1881 if (!opt->whitelist)
1883 ptr = strdup(opt->whitelist);
1886 entry = strtok(ptr, ",");
1888 if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1889 strstr(opt->map, entry) != 0 ||
1890 strstr(t->title, entry) != 0)
1892 entry = strtok(NULL, ",");
1897 static int check_blacklist(struct _test *t, struct sockmap_options *opt)
1901 if (!opt->blacklist)
1903 ptr = strdup(opt->blacklist);
1906 entry = strtok(ptr, ",");
1908 if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1909 strstr(opt->map, entry) != 0 ||
1910 strstr(t->title, entry) != 0)
1912 entry = strtok(NULL, ",");
1917 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
1921 err = populate_progs(opt->map);
1923 fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
1927 /* Tests basic commands and APIs */
1928 for (i = 0; i < ARRAY_SIZE(test); i++) {
1929 struct _test t = test[i];
1931 if (check_whitelist(&t, opt) != 0)
1933 if (check_blacklist(&t, opt) == 0)
1936 test_start_subtest(&t, opt);
1937 t.tester(cg_fd, opt);
1944 static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
1946 opt->map = BPF_SOCKMAP_FILENAME;
1947 __test_selftests(cg_fd, opt);
1950 static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
1952 opt->map = BPF_SOCKHASH_FILENAME;
1953 __test_selftests(cg_fd, opt);
1956 static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
1958 opt->map = BPF_SOCKHASH_FILENAME;
1959 opt->prepend = "ktls";
1961 __test_selftests(cg_fd, opt);
1965 static int test_selftest(int cg_fd, struct sockmap_options *opt)
1968 test_selftests_sockmap(cg_fd, opt);
1969 test_selftests_sockhash(cg_fd, opt);
1970 test_selftests_ktls(cg_fd, opt);
1971 test_print_results();
1975 int main(int argc, char **argv)
1977 int iov_count = 1, length = 1024, rate = 1;
1978 struct sockmap_options options = {0};
1979 int opt, longindex, err, cg_fd = 0;
1980 char *bpf_file = BPF_SOCKMAP_FILENAME;
1981 int test = SELFTESTS;
1982 bool cg_created = 0;
1984 while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
1985 long_options, &longindex)) != -1) {
1988 txmsg_start = atoi(optarg);
1991 txmsg_end = atoi(optarg);
1994 txmsg_start_push = atoi(optarg);
1997 txmsg_end_push = atoi(optarg);
2000 txmsg_start_pop = atoi(optarg);
2003 txmsg_pop = atoi(optarg);
2006 txmsg_apply = atoi(optarg);
2009 txmsg_cork = atoi(optarg);
2012 cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
2015 "ERROR: (%i) open cg path failed: %s\n",
2021 rate = atoi(optarg);
2024 options.verbose = 1;
2026 options.verbose = atoi(optarg);
2029 iov_count = atoi(optarg);
2032 length = atoi(optarg);
2035 options.data_test = true;
2038 if (strcmp(optarg, "ping") == 0) {
2040 } else if (strcmp(optarg, "sendmsg") == 0) {
2042 } else if (strcmp(optarg, "base") == 0) {
2044 } else if (strcmp(optarg, "base_sendpage") == 0) {
2045 test = BASE_SENDPAGE;
2046 } else if (strcmp(optarg, "sendpage") == 0) {
2054 options.whitelist = strdup(optarg);
2055 if (!options.whitelist)
2059 options.blacklist = strdup(optarg);
2060 if (!options.blacklist)
2072 cg_fd = cgroup_setup_and_join(CG_PATH);
2078 /* Use libbpf 1.0 API mode */
2079 libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
2081 if (test == SELFTESTS) {
2082 err = test_selftest(cg_fd, &options);
2086 err = populate_progs(bpf_file);
2088 fprintf(stderr, "populate program: (%s) %s\n",
2089 bpf_file, strerror(errno));
2095 signal(SIGINT, running_handler);
2097 options.iov_count = iov_count;
2098 options.iov_length = length;
2099 options.rate = rate;
2101 err = run_options(&options, cg_fd, test);
2103 if (options.whitelist)
2104 free(options.whitelist);
2105 if (options.blacklist)
2106 free(options.blacklist);
2108 cleanup_cgroup_environment();
2113 void running_handler(int a)