1 #include <linux/module.h>
2 #include <linux/errno.h>
3 #include <linux/socket.h>
4 #include <linux/skbuff.h>
6 #include <linux/icmp.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <net/genetlink.h>
14 #include <net/protocol.h>
16 #include <net/udp_tunnel.h>
18 #include <uapi/linux/fou.h>
19 #include <uapi/linux/genetlink.h>
28 struct list_head list;
32 #define FOU_F_REMCSUM_NOPARTIAL BIT(0)
38 struct udp_port_cfg udp_config;
41 static unsigned int fou_net_id;
44 struct list_head fou_list;
45 struct mutex fou_lock;
48 static inline struct fou *fou_from_sock(struct sock *sk)
50 return sk->sk_user_data;
53 static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len)
55 /* Remove 'len' bytes from the packet (UDP header and
56 * FOU header if present).
58 if (fou->family == AF_INET)
59 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
61 ipv6_hdr(skb)->payload_len =
62 htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
65 skb_postpull_rcsum(skb, udp_hdr(skb), len);
66 skb_reset_transport_header(skb);
67 return iptunnel_pull_offloads(skb);
70 static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
72 struct fou *fou = fou_from_sock(sk);
77 if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
80 return -fou->protocol;
87 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
88 void *data, size_t hdrlen, u8 ipproto,
92 size_t start = ntohs(pd[0]);
93 size_t offset = ntohs(pd[1]);
94 size_t plen = sizeof(struct udphdr) + hdrlen +
95 max_t(size_t, offset + sizeof(u16), start);
97 if (skb->remcsum_offload)
100 if (!pskb_may_pull(skb, plen))
102 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
104 skb_remcsum_process(skb, (void *)guehdr + hdrlen,
105 start, offset, nopartial);
110 static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
117 static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
119 struct fou *fou = fou_from_sock(sk);
120 size_t len, optlen, hdrlen;
121 struct guehdr *guehdr;
129 len = sizeof(struct udphdr) + sizeof(struct guehdr);
130 if (!pskb_may_pull(skb, len))
133 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
135 switch (guehdr->version) {
136 case 0: /* Full GUE header present */
140 /* Direct encapsulation of IPv4 or IPv6 */
144 switch (((struct iphdr *)guehdr)->version) {
155 if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
161 default: /* Undefined version */
165 optlen = guehdr->hlen << 2;
168 if (!pskb_may_pull(skb, len))
171 /* guehdr may change after pull */
172 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
174 if (validate_gue_flags(guehdr, optlen))
177 hdrlen = sizeof(struct guehdr) + optlen;
179 if (fou->family == AF_INET)
180 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
182 ipv6_hdr(skb)->payload_len =
183 htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
185 /* Pull csum through the guehdr now . This can be used if
186 * there is a remote checksum offload.
188 skb_postpull_rcsum(skb, udp_hdr(skb), len);
192 if (guehdr->flags & GUE_FLAG_PRIV) {
193 __be32 flags = *(__be32 *)(data + doffset);
195 doffset += GUE_LEN_PRIV;
197 if (flags & GUE_PFLAG_REMCSUM) {
198 guehdr = gue_remcsum(skb, guehdr, data + doffset,
199 hdrlen, guehdr->proto_ctype,
201 FOU_F_REMCSUM_NOPARTIAL));
207 doffset += GUE_PLEN_REMCSUM;
211 if (unlikely(guehdr->control))
212 return gue_control_message(skb, guehdr);
214 proto_ctype = guehdr->proto_ctype;
215 __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
216 skb_reset_transport_header(skb);
218 if (iptunnel_pull_offloads(skb))
228 static struct sk_buff *fou_gro_receive(struct sock *sk,
229 struct list_head *head,
232 u8 proto = fou_from_sock(sk)->protocol;
233 const struct net_offload **offloads;
234 const struct net_offload *ops;
235 struct sk_buff *pp = NULL;
237 /* We can clear the encap_mark for FOU as we are essentially doing
238 * one of two possible things. We are either adding an L4 tunnel
239 * header to the outer L3 tunnel header, or we are are simply
240 * treating the GRE tunnel header as though it is a UDP protocol
241 * specific header such as VXLAN or GENEVE.
243 NAPI_GRO_CB(skb)->encap_mark = 0;
245 /* Flag this frame as already having an outer encap header */
246 NAPI_GRO_CB(skb)->is_fou = 1;
249 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
250 ops = rcu_dereference(offloads[proto]);
251 if (!ops || !ops->callbacks.gro_receive)
254 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
262 static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
265 const struct net_offload *ops;
266 u8 proto = fou_from_sock(sk)->protocol;
268 const struct net_offload **offloads;
271 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
272 ops = rcu_dereference(offloads[proto]);
273 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
276 err = ops->callbacks.gro_complete(skb, nhoff);
278 skb_set_inner_mac_header(skb, nhoff);
286 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
287 struct guehdr *guehdr, void *data,
288 size_t hdrlen, struct gro_remcsum *grc,
292 size_t start = ntohs(pd[0]);
293 size_t offset = ntohs(pd[1]);
295 if (skb->remcsum_offload)
298 if (!NAPI_GRO_CB(skb)->csum_valid)
301 guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen,
302 start, offset, grc, nopartial);
304 skb->remcsum_offload = 1;
309 static struct sk_buff *gue_gro_receive(struct sock *sk,
310 struct list_head *head,
313 const struct net_offload **offloads;
314 const struct net_offload *ops;
315 struct sk_buff *pp = NULL;
317 struct guehdr *guehdr;
318 size_t len, optlen, hdrlen, off;
322 struct fou *fou = fou_from_sock(sk);
323 struct gro_remcsum grc;
326 skb_gro_remcsum_init(&grc);
328 off = skb_gro_offset(skb);
329 len = off + sizeof(*guehdr);
331 guehdr = skb_gro_header_fast(skb, off);
332 if (skb_gro_header_hard(skb, len)) {
333 guehdr = skb_gro_header_slow(skb, len, off);
334 if (unlikely(!guehdr))
338 switch (guehdr->version) {
342 switch (((struct iphdr *)guehdr)->version) {
344 proto = IPPROTO_IPIP;
347 proto = IPPROTO_IPV6;
357 optlen = guehdr->hlen << 2;
360 if (skb_gro_header_hard(skb, len)) {
361 guehdr = skb_gro_header_slow(skb, len, off);
362 if (unlikely(!guehdr))
366 if (unlikely(guehdr->control) || guehdr->version != 0 ||
367 validate_gue_flags(guehdr, optlen))
370 hdrlen = sizeof(*guehdr) + optlen;
372 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
373 * this is needed if there is a remote checkcsum offload.
375 skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
379 if (guehdr->flags & GUE_FLAG_PRIV) {
380 __be32 flags = *(__be32 *)(data + doffset);
382 doffset += GUE_LEN_PRIV;
384 if (flags & GUE_PFLAG_REMCSUM) {
385 guehdr = gue_gro_remcsum(skb, off, guehdr,
386 data + doffset, hdrlen, &grc,
388 FOU_F_REMCSUM_NOPARTIAL));
395 doffset += GUE_PLEN_REMCSUM;
399 skb_gro_pull(skb, hdrlen);
401 list_for_each_entry(p, head, list) {
402 const struct guehdr *guehdr2;
404 if (!NAPI_GRO_CB(p)->same_flow)
407 guehdr2 = (struct guehdr *)(p->data + off);
409 /* Compare base GUE header to be equal (covers
410 * hlen, version, proto_ctype, and flags.
412 if (guehdr->word != guehdr2->word) {
413 NAPI_GRO_CB(p)->same_flow = 0;
417 /* Compare optional fields are the same. */
418 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
419 guehdr->hlen << 2)) {
420 NAPI_GRO_CB(p)->same_flow = 0;
425 proto = guehdr->proto_ctype;
429 /* We can clear the encap_mark for GUE as we are essentially doing
430 * one of two possible things. We are either adding an L4 tunnel
431 * header to the outer L3 tunnel header, or we are are simply
432 * treating the GRE tunnel header as though it is a UDP protocol
433 * specific header such as VXLAN or GENEVE.
435 NAPI_GRO_CB(skb)->encap_mark = 0;
437 /* Flag this frame as already having an outer encap header */
438 NAPI_GRO_CB(skb)->is_fou = 1;
441 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
442 ops = rcu_dereference(offloads[proto]);
443 if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
446 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
452 skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
457 static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
459 const struct net_offload **offloads;
460 struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
461 const struct net_offload *ops;
462 unsigned int guehlen = 0;
466 switch (guehdr->version) {
468 proto = guehdr->proto_ctype;
469 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
472 switch (((struct iphdr *)guehdr)->version) {
474 proto = IPPROTO_IPIP;
477 proto = IPPROTO_IPV6;
488 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
489 ops = rcu_dereference(offloads[proto]);
490 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
493 err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
495 skb_set_inner_mac_header(skb, nhoff + guehlen);
502 static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg)
504 struct sock *sk = fou->sock->sk;
505 struct udp_port_cfg *udp_cfg = &cfg->udp_config;
507 if (fou->family != udp_cfg->family ||
508 fou->port != udp_cfg->local_udp_port ||
509 sk->sk_dport != udp_cfg->peer_udp_port ||
510 sk->sk_bound_dev_if != udp_cfg->bind_ifindex)
513 if (fou->family == AF_INET) {
514 if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr ||
515 sk->sk_daddr != udp_cfg->peer_ip.s_addr)
519 #if IS_ENABLED(CONFIG_IPV6)
521 if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) ||
522 ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6))
532 static int fou_add_to_port_list(struct net *net, struct fou *fou,
535 struct fou_net *fn = net_generic(net, fou_net_id);
538 mutex_lock(&fn->fou_lock);
539 list_for_each_entry(fout, &fn->fou_list, list) {
540 if (fou_cfg_cmp(fout, cfg)) {
541 mutex_unlock(&fn->fou_lock);
546 list_add(&fou->list, &fn->fou_list);
547 mutex_unlock(&fn->fou_lock);
552 static void fou_release(struct fou *fou)
554 struct socket *sock = fou->sock;
556 list_del(&fou->list);
557 udp_tunnel_sock_release(sock);
562 static int fou_create(struct net *net, struct fou_cfg *cfg,
563 struct socket **sockp)
565 struct socket *sock = NULL;
566 struct fou *fou = NULL;
568 struct udp_tunnel_sock_cfg tunnel_cfg;
571 /* Open UDP socket */
572 err = udp_sock_create(net, &cfg->udp_config, &sock);
576 /* Allocate FOU port structure */
577 fou = kzalloc(sizeof(*fou), GFP_KERNEL);
585 fou->port = cfg->udp_config.local_udp_port;
586 fou->family = cfg->udp_config.family;
587 fou->flags = cfg->flags;
588 fou->type = cfg->type;
591 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
592 tunnel_cfg.encap_type = 1;
593 tunnel_cfg.sk_user_data = fou;
594 tunnel_cfg.encap_destroy = NULL;
596 /* Initial for fou type */
598 case FOU_ENCAP_DIRECT:
599 tunnel_cfg.encap_rcv = fou_udp_recv;
600 tunnel_cfg.gro_receive = fou_gro_receive;
601 tunnel_cfg.gro_complete = fou_gro_complete;
602 fou->protocol = cfg->protocol;
605 tunnel_cfg.encap_rcv = gue_udp_recv;
606 tunnel_cfg.gro_receive = gue_gro_receive;
607 tunnel_cfg.gro_complete = gue_gro_complete;
614 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
616 sk->sk_allocation = GFP_ATOMIC;
618 err = fou_add_to_port_list(net, fou, cfg);
630 udp_tunnel_sock_release(sock);
635 static int fou_destroy(struct net *net, struct fou_cfg *cfg)
637 struct fou_net *fn = net_generic(net, fou_net_id);
641 mutex_lock(&fn->fou_lock);
642 list_for_each_entry(fou, &fn->fou_list, list) {
643 if (fou_cfg_cmp(fou, cfg)) {
649 mutex_unlock(&fn->fou_lock);
654 static struct genl_family fou_nl_family;
656 static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
657 [FOU_ATTR_PORT] = { .type = NLA_U16, },
658 [FOU_ATTR_AF] = { .type = NLA_U8, },
659 [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
660 [FOU_ATTR_TYPE] = { .type = NLA_U8, },
661 [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
662 [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
663 [FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
664 [FOU_ATTR_LOCAL_V6] = { .type = sizeof(struct in6_addr), },
665 [FOU_ATTR_PEER_V6] = { .type = sizeof(struct in6_addr), },
666 [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
667 [FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
670 static int parse_nl_config(struct genl_info *info,
673 bool has_local = false, has_peer = false;
678 memset(cfg, 0, sizeof(*cfg));
680 cfg->udp_config.family = AF_INET;
682 if (info->attrs[FOU_ATTR_AF]) {
683 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
689 cfg->udp_config.ipv6_v6only = 1;
692 return -EAFNOSUPPORT;
695 cfg->udp_config.family = family;
698 if (info->attrs[FOU_ATTR_PORT]) {
699 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
700 cfg->udp_config.local_udp_port = port;
703 if (info->attrs[FOU_ATTR_IPPROTO])
704 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
706 if (info->attrs[FOU_ATTR_TYPE])
707 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
709 if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
710 cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
712 if (cfg->udp_config.family == AF_INET) {
713 if (info->attrs[FOU_ATTR_LOCAL_V4]) {
714 attr = info->attrs[FOU_ATTR_LOCAL_V4];
715 cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr);
719 if (info->attrs[FOU_ATTR_PEER_V4]) {
720 attr = info->attrs[FOU_ATTR_PEER_V4];
721 cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr);
724 #if IS_ENABLED(CONFIG_IPV6)
726 if (info->attrs[FOU_ATTR_LOCAL_V6]) {
727 attr = info->attrs[FOU_ATTR_LOCAL_V6];
728 cfg->udp_config.local_ip6 = nla_get_in6_addr(attr);
732 if (info->attrs[FOU_ATTR_PEER_V6]) {
733 attr = info->attrs[FOU_ATTR_PEER_V6];
734 cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr);
741 if (info->attrs[FOU_ATTR_PEER_PORT]) {
742 port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]);
743 cfg->udp_config.peer_udp_port = port;
749 if (info->attrs[FOU_ATTR_IFINDEX]) {
753 ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]);
755 cfg->udp_config.bind_ifindex = ifindex;
761 static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
763 struct net *net = genl_info_net(info);
767 err = parse_nl_config(info, &cfg);
771 return fou_create(net, &cfg, NULL);
774 static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
776 struct net *net = genl_info_net(info);
780 err = parse_nl_config(info, &cfg);
784 return fou_destroy(net, &cfg);
787 static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
789 struct sock *sk = fou->sock->sk;
791 if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
792 nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
793 nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) ||
794 nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
795 nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) ||
796 nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if))
799 if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
800 if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
803 if (fou->sock->sk->sk_family == AF_INET) {
804 if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr))
807 if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr))
809 #if IS_ENABLED(CONFIG_IPV6)
811 if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6,
812 &sk->sk_v6_rcv_saddr))
815 if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr))
823 static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
824 u32 flags, struct sk_buff *skb, u8 cmd)
828 hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
832 if (fou_fill_info(fou, skb) < 0)
833 goto nla_put_failure;
835 genlmsg_end(skb, hdr);
839 genlmsg_cancel(skb, hdr);
843 static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
845 struct net *net = genl_info_net(info);
846 struct fou_net *fn = net_generic(net, fou_net_id);
854 ret = parse_nl_config(info, &cfg);
857 port = cfg.udp_config.local_udp_port;
861 family = cfg.udp_config.family;
862 if (family != AF_INET && family != AF_INET6)
865 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
870 mutex_lock(&fn->fou_lock);
871 list_for_each_entry(fout, &fn->fou_list, list) {
872 if (fou_cfg_cmp(fout, &cfg)) {
873 ret = fou_dump_info(fout, info->snd_portid,
874 info->snd_seq, 0, msg,
879 mutex_unlock(&fn->fou_lock);
883 return genlmsg_reply(msg, info);
890 static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
892 struct net *net = sock_net(skb->sk);
893 struct fou_net *fn = net_generic(net, fou_net_id);
897 mutex_lock(&fn->fou_lock);
898 list_for_each_entry(fout, &fn->fou_list, list) {
899 if (idx++ < cb->args[0])
901 ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
902 cb->nlh->nlmsg_seq, NLM_F_MULTI,
907 mutex_unlock(&fn->fou_lock);
913 static const struct genl_ops fou_nl_ops[] = {
916 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
917 .doit = fou_nl_cmd_add_port,
918 .flags = GENL_ADMIN_PERM,
922 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
923 .doit = fou_nl_cmd_rm_port,
924 .flags = GENL_ADMIN_PERM,
928 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
929 .doit = fou_nl_cmd_get_port,
930 .dumpit = fou_nl_dump,
934 static struct genl_family fou_nl_family __ro_after_init = {
936 .name = FOU_GENL_NAME,
937 .version = FOU_GENL_VERSION,
938 .maxattr = FOU_ATTR_MAX,
939 .policy = fou_nl_policy,
941 .module = THIS_MODULE,
943 .n_ops = ARRAY_SIZE(fou_nl_ops),
946 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
948 return sizeof(struct udphdr);
950 EXPORT_SYMBOL(fou_encap_hlen);
952 size_t gue_encap_hlen(struct ip_tunnel_encap *e)
955 bool need_priv = false;
957 len = sizeof(struct udphdr) + sizeof(struct guehdr);
959 if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
960 len += GUE_PLEN_REMCSUM;
964 len += need_priv ? GUE_LEN_PRIV : 0;
968 EXPORT_SYMBOL(gue_encap_hlen);
970 int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
971 u8 *protocol, __be16 *sport, int type)
975 err = iptunnel_handle_offloads(skb, type);
979 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
984 EXPORT_SYMBOL(__fou_build_header);
986 int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
987 u8 *protocol, __be16 *sport, int type)
989 struct guehdr *guehdr;
990 size_t hdrlen, optlen = 0;
992 bool need_priv = false;
995 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
996 skb->ip_summed == CHECKSUM_PARTIAL) {
997 optlen += GUE_PLEN_REMCSUM;
998 type |= SKB_GSO_TUNNEL_REMCSUM;
1002 optlen += need_priv ? GUE_LEN_PRIV : 0;
1004 err = iptunnel_handle_offloads(skb, type);
1008 /* Get source port (based on flow hash) before skb_push */
1009 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
1012 hdrlen = sizeof(struct guehdr) + optlen;
1014 skb_push(skb, hdrlen);
1016 guehdr = (struct guehdr *)skb->data;
1018 guehdr->control = 0;
1019 guehdr->version = 0;
1020 guehdr->hlen = optlen >> 2;
1022 guehdr->proto_ctype = *protocol;
1027 __be32 *flags = data;
1029 guehdr->flags |= GUE_FLAG_PRIV;
1031 data += GUE_LEN_PRIV;
1033 if (type & SKB_GSO_TUNNEL_REMCSUM) {
1034 u16 csum_start = skb_checksum_start_offset(skb);
1037 if (csum_start < hdrlen)
1040 csum_start -= hdrlen;
1041 pd[0] = htons(csum_start);
1042 pd[1] = htons(csum_start + skb->csum_offset);
1044 if (!skb_is_gso(skb)) {
1045 skb->ip_summed = CHECKSUM_NONE;
1046 skb->encapsulation = 0;
1049 *flags |= GUE_PFLAG_REMCSUM;
1050 data += GUE_PLEN_REMCSUM;
1057 EXPORT_SYMBOL(__gue_build_header);
1059 #ifdef CONFIG_NET_FOU_IP_TUNNELS
1061 static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
1062 struct flowi4 *fl4, u8 *protocol, __be16 sport)
1066 skb_push(skb, sizeof(struct udphdr));
1067 skb_reset_transport_header(skb);
1071 uh->dest = e->dport;
1073 uh->len = htons(skb->len);
1074 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
1075 fl4->saddr, fl4->daddr, skb->len);
1077 *protocol = IPPROTO_UDP;
1080 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1081 u8 *protocol, struct flowi4 *fl4)
1083 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1088 err = __fou_build_header(skb, e, protocol, &sport, type);
1092 fou_build_udp(skb, e, fl4, protocol, sport);
1097 static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1098 u8 *protocol, struct flowi4 *fl4)
1100 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1105 err = __gue_build_header(skb, e, protocol, &sport, type);
1109 fou_build_udp(skb, e, fl4, protocol, sport);
1114 static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info)
1116 const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]);
1118 if (ipprot && ipprot->err_handler) {
1119 if (!ipprot->err_handler(skb, info))
1126 static int gue_err(struct sk_buff *skb, u32 info)
1128 int transport_offset = skb_transport_offset(skb);
1129 struct guehdr *guehdr;
1133 len = sizeof(struct udphdr) + sizeof(struct guehdr);
1134 if (!pskb_may_pull(skb, transport_offset + len))
1137 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1139 switch (guehdr->version) {
1140 case 0: /* Full GUE header present */
1143 /* Direct encapsulation of IPv4 or IPv6 */
1144 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1146 switch (((struct iphdr *)guehdr)->version) {
1148 ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info);
1150 #if IS_ENABLED(CONFIG_IPV6)
1152 ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info);
1160 default: /* Undefined version */
1164 if (guehdr->control)
1167 optlen = guehdr->hlen << 2;
1169 if (!pskb_may_pull(skb, transport_offset + len + optlen))
1172 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1173 if (validate_gue_flags(guehdr, optlen))
1176 /* Handling exceptions for direct UDP encapsulation in GUE would lead to
1177 * recursion. Besides, this kind of encapsulation can't even be
1178 * configured currently. Discard this.
1180 if (guehdr->proto_ctype == IPPROTO_UDP ||
1181 guehdr->proto_ctype == IPPROTO_UDPLITE)
1184 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1185 ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
1188 skb_set_transport_header(skb, transport_offset);
1193 static const struct ip_tunnel_encap_ops fou_iptun_ops = {
1194 .encap_hlen = fou_encap_hlen,
1195 .build_header = fou_build_header,
1196 .err_handler = gue_err,
1199 static const struct ip_tunnel_encap_ops gue_iptun_ops = {
1200 .encap_hlen = gue_encap_hlen,
1201 .build_header = gue_build_header,
1202 .err_handler = gue_err,
1205 static int ip_tunnel_encap_add_fou_ops(void)
1209 ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1211 pr_err("can't add fou ops\n");
1215 ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1217 pr_err("can't add gue ops\n");
1218 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1225 static void ip_tunnel_encap_del_fou_ops(void)
1227 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1228 ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1233 static int ip_tunnel_encap_add_fou_ops(void)
1238 static void ip_tunnel_encap_del_fou_ops(void)
1244 static __net_init int fou_init_net(struct net *net)
1246 struct fou_net *fn = net_generic(net, fou_net_id);
1248 INIT_LIST_HEAD(&fn->fou_list);
1249 mutex_init(&fn->fou_lock);
1253 static __net_exit void fou_exit_net(struct net *net)
1255 struct fou_net *fn = net_generic(net, fou_net_id);
1256 struct fou *fou, *next;
1258 /* Close all the FOU sockets */
1259 mutex_lock(&fn->fou_lock);
1260 list_for_each_entry_safe(fou, next, &fn->fou_list, list)
1262 mutex_unlock(&fn->fou_lock);
1265 static struct pernet_operations fou_net_ops = {
1266 .init = fou_init_net,
1267 .exit = fou_exit_net,
1269 .size = sizeof(struct fou_net),
1272 static int __init fou_init(void)
1276 ret = register_pernet_device(&fou_net_ops);
1280 ret = genl_register_family(&fou_nl_family);
1284 ret = ip_tunnel_encap_add_fou_ops();
1288 genl_unregister_family(&fou_nl_family);
1290 unregister_pernet_device(&fou_net_ops);
1295 static void __exit fou_fini(void)
1297 ip_tunnel_encap_del_fou_ops();
1298 genl_unregister_family(&fou_nl_family);
1299 unregister_pernet_device(&fou_net_ops);
1302 module_init(fou_init);
1303 module_exit(fou_fini);
1305 MODULE_LICENSE("GPL");