1 // SPDX-License-Identifier: GPL-2.0-only
5 * Development of this code funded by Astaro AG (http://www.astaro.com/)
8 #include <asm/unaligned.h>
9 #include <linux/kernel.h>
10 #include <linux/netlink.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter/nf_tables.h>
13 #include <linux/sctp.h>
14 #include <net/netfilter/nf_tables_core.h>
15 #include <net/netfilter/nf_tables.h>
28 static unsigned int optlen(const u8 *opt, unsigned int offset)
30 /* Beware zero-length options: make finite progress */
31 if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0)
34 return opt[offset + 1];
37 static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
38 struct nft_regs *regs,
39 const struct nft_pktinfo *pkt)
41 struct nft_exthdr *priv = nft_expr_priv(expr);
42 u32 *dest = ®s->data[priv->dreg];
43 unsigned int offset = 0;
46 if (pkt->skb->protocol != htons(ETH_P_IPV6))
49 err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
50 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
51 nft_reg_store8(dest, err >= 0);
56 offset += priv->offset;
58 dest[priv->len / NFT_REG32_SIZE] = 0;
59 if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
63 regs->verdict.code = NFT_BREAK;
66 /* find the offset to specified option.
68 * If target header is found, its offset is set in *offset and return option
69 * number. Otherwise, return negative error.
71 * If the first fragment doesn't contain the End of Options it is considered
74 static int ipv4_find_option(struct net *net, struct sk_buff *skb,
75 unsigned int *offset, int target)
77 unsigned char optbuf[sizeof(struct ip_options) + 40];
78 struct ip_options *opt = (struct ip_options *)optbuf;
79 struct iphdr *iph, _iph;
85 iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
88 start = sizeof(struct iphdr);
90 optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
94 memset(opt, 0, sizeof(struct ip_options));
95 /* Copy the options since __ip_options_compile() modifies
98 if (skb_copy_bits(skb, start, opt->__data, optlen))
100 opt->optlen = optlen;
102 if (__ip_options_compile(net, opt, NULL, &info))
110 found = target == IPOPT_SSRR ? opt->is_strictroute :
111 !opt->is_strictroute;
113 *offset = opt->srr + start;
118 *offset = opt->rr + start;
122 if (!opt->router_alert)
124 *offset = opt->router_alert + start;
130 return found ? target : -ENOENT;
133 static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
134 struct nft_regs *regs,
135 const struct nft_pktinfo *pkt)
137 struct nft_exthdr *priv = nft_expr_priv(expr);
138 u32 *dest = ®s->data[priv->dreg];
139 struct sk_buff *skb = pkt->skb;
143 if (skb->protocol != htons(ETH_P_IP))
146 err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type);
147 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
148 nft_reg_store8(dest, err >= 0);
150 } else if (err < 0) {
153 offset += priv->offset;
155 dest[priv->len / NFT_REG32_SIZE] = 0;
156 if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
160 regs->verdict.code = NFT_BREAK;
164 nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
165 unsigned int len, void *buffer, unsigned int *tcphdr_len)
169 if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
172 tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
176 *tcphdr_len = __tcp_hdrlen(tcph);
177 if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
180 return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer);
183 static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
184 struct nft_regs *regs,
185 const struct nft_pktinfo *pkt)
187 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
188 struct nft_exthdr *priv = nft_expr_priv(expr);
189 unsigned int i, optl, tcphdr_len, offset;
190 u32 *dest = ®s->data[priv->dreg];
194 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
199 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
200 optl = optlen(opt, i);
202 if (priv->type != opt[i])
205 if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
208 offset = i + priv->offset;
209 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
212 dest[priv->len / NFT_REG32_SIZE] = 0;
213 memcpy(dest, opt + offset, priv->len);
220 if (priv->flags & NFT_EXTHDR_F_PRESENT)
223 regs->verdict.code = NFT_BREAK;
226 static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
227 struct nft_regs *regs,
228 const struct nft_pktinfo *pkt)
230 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
231 struct nft_exthdr *priv = nft_expr_priv(expr);
232 unsigned int i, optl, tcphdr_len, offset;
236 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
241 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
247 optl = optlen(opt, i);
249 if (priv->type != opt[i])
252 if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
255 if (skb_ensure_writable(pkt->skb,
256 nft_thoff(pkt) + i + priv->len))
259 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
264 offset = i + priv->offset;
268 old.v16 = (__force __be16)get_unaligned((u16 *)(opt + offset));
269 new.v16 = (__force __be16)nft_reg_load16(
270 ®s->data[priv->sreg]);
272 switch (priv->type) {
274 /* increase can cause connection to stall */
275 if (ntohs(old.v16) <= ntohs(new.v16))
280 if (old.v16 == new.v16)
283 put_unaligned(new.v16, (__be16*)(opt + offset));
284 inet_proto_csum_replace2(&tcph->check, pkt->skb,
285 old.v16, new.v16, false);
288 new.v32 = nft_reg_load_be32(®s->data[priv->sreg]);
289 old.v32 = (__force __be32)get_unaligned((u32 *)(opt + offset));
291 if (old.v32 == new.v32)
294 put_unaligned(new.v32, (__be32*)(opt + offset));
295 inet_proto_csum_replace4(&tcph->check, pkt->skb,
296 old.v32, new.v32, false);
307 regs->verdict.code = NFT_BREAK;
310 static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
311 struct nft_regs *regs,
312 const struct nft_pktinfo *pkt)
314 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
315 struct nft_exthdr *priv = nft_expr_priv(expr);
316 unsigned int i, tcphdr_len, optl;
320 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
324 if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
327 opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
330 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
333 optl = optlen(opt, i);
334 if (priv->type != opt[i])
337 if (i + optl > tcphdr_len)
340 for (j = 0; j < optl; ++j) {
344 if ((i + j) % 2 == 0) {
348 inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o),
351 memset(opt + i, TCPOPT_NOP, optl);
355 /* option not found, continue. This allows to do multiple
356 * option removals per rule.
360 regs->verdict.code = NFT_BREAK;
363 /* can't remove, no choice but to drop */
364 regs->verdict.code = NF_DROP;
367 static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
368 struct nft_regs *regs,
369 const struct nft_pktinfo *pkt)
371 unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr);
372 struct nft_exthdr *priv = nft_expr_priv(expr);
373 u32 *dest = ®s->data[priv->dreg];
374 const struct sctp_chunkhdr *sch;
375 struct sctp_chunkhdr _sch;
377 if (pkt->tprot != IPPROTO_SCTP)
381 sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
382 if (!sch || !sch->length)
385 if (sch->type == priv->type) {
386 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
387 nft_reg_store8(dest, true);
390 if (priv->offset + priv->len > ntohs(sch->length) ||
391 offset + ntohs(sch->length) > pkt->skb->len)
394 dest[priv->len / NFT_REG32_SIZE] = 0;
395 if (skb_copy_bits(pkt->skb, offset + priv->offset,
396 dest, priv->len) < 0)
400 offset += SCTP_PAD4(ntohs(sch->length));
401 } while (offset < pkt->skb->len);
403 if (priv->flags & NFT_EXTHDR_F_PRESENT)
404 nft_reg_store8(dest, false);
406 regs->verdict.code = NFT_BREAK;
409 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
410 [NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
411 [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
412 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
413 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
414 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
415 [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
416 [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
419 static int nft_exthdr_init(const struct nft_ctx *ctx,
420 const struct nft_expr *expr,
421 const struct nlattr * const tb[])
423 struct nft_exthdr *priv = nft_expr_priv(expr);
424 u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
427 if (!tb[NFTA_EXTHDR_DREG] ||
428 !tb[NFTA_EXTHDR_TYPE] ||
429 !tb[NFTA_EXTHDR_OFFSET] ||
430 !tb[NFTA_EXTHDR_LEN])
433 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
437 err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
441 if (tb[NFTA_EXTHDR_FLAGS]) {
442 err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags);
446 if (flags & ~NFT_EXTHDR_F_PRESENT)
450 if (tb[NFTA_EXTHDR_OP]) {
451 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
456 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
457 priv->offset = offset;
462 return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG],
463 &priv->dreg, NULL, NFT_DATA_VALUE,
467 static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
468 const struct nft_expr *expr,
469 const struct nlattr * const tb[])
471 struct nft_exthdr *priv = nft_expr_priv(expr);
472 u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
475 if (!tb[NFTA_EXTHDR_SREG] ||
476 !tb[NFTA_EXTHDR_TYPE] ||
477 !tb[NFTA_EXTHDR_OFFSET] ||
478 !tb[NFTA_EXTHDR_LEN])
481 if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
484 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
488 err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
502 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
506 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
507 priv->offset = offset;
512 return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
516 static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx,
517 const struct nft_expr *expr,
518 const struct nlattr * const tb[])
520 struct nft_exthdr *priv = nft_expr_priv(expr);
522 if (tb[NFTA_EXTHDR_SREG] ||
523 tb[NFTA_EXTHDR_DREG] ||
524 tb[NFTA_EXTHDR_FLAGS] ||
525 tb[NFTA_EXTHDR_OFFSET] ||
529 if (!tb[NFTA_EXTHDR_TYPE])
532 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
533 priv->op = NFT_EXTHDR_OP_TCPOPT;
538 static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
539 const struct nft_expr *expr,
540 const struct nlattr * const tb[])
542 struct nft_exthdr *priv = nft_expr_priv(expr);
543 int err = nft_exthdr_init(ctx, expr, tb);
548 switch (priv->type) {
560 static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
562 if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
563 goto nla_put_failure;
564 if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
565 goto nla_put_failure;
566 if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
567 goto nla_put_failure;
568 if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags)))
569 goto nla_put_failure;
570 if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op)))
571 goto nla_put_failure;
578 static int nft_exthdr_dump(struct sk_buff *skb,
579 const struct nft_expr *expr, bool reset)
581 const struct nft_exthdr *priv = nft_expr_priv(expr);
583 if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
586 return nft_exthdr_dump_common(skb, priv);
589 static int nft_exthdr_dump_set(struct sk_buff *skb,
590 const struct nft_expr *expr, bool reset)
592 const struct nft_exthdr *priv = nft_expr_priv(expr);
594 if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
597 return nft_exthdr_dump_common(skb, priv);
600 static int nft_exthdr_dump_strip(struct sk_buff *skb,
601 const struct nft_expr *expr, bool reset)
603 const struct nft_exthdr *priv = nft_expr_priv(expr);
605 return nft_exthdr_dump_common(skb, priv);
608 static bool nft_exthdr_reduce(struct nft_regs_track *track,
609 const struct nft_expr *expr)
611 const struct nft_exthdr *priv = nft_expr_priv(expr);
612 const struct nft_exthdr *exthdr;
614 if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
615 nft_reg_track_update(track, expr, priv->dreg, priv->len);
619 exthdr = nft_expr_priv(track->regs[priv->dreg].selector);
620 if (priv->type != exthdr->type ||
621 priv->op != exthdr->op ||
622 priv->flags != exthdr->flags ||
623 priv->offset != exthdr->offset ||
624 priv->len != exthdr->len) {
625 nft_reg_track_update(track, expr, priv->dreg, priv->len);
629 if (!track->regs[priv->dreg].bitwise)
632 return nft_expr_reduce_bitwise(track, expr);
635 static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
636 .type = &nft_exthdr_type,
637 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
638 .eval = nft_exthdr_ipv6_eval,
639 .init = nft_exthdr_init,
640 .dump = nft_exthdr_dump,
641 .reduce = nft_exthdr_reduce,
644 static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
645 .type = &nft_exthdr_type,
646 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
647 .eval = nft_exthdr_ipv4_eval,
648 .init = nft_exthdr_ipv4_init,
649 .dump = nft_exthdr_dump,
650 .reduce = nft_exthdr_reduce,
653 static const struct nft_expr_ops nft_exthdr_tcp_ops = {
654 .type = &nft_exthdr_type,
655 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
656 .eval = nft_exthdr_tcp_eval,
657 .init = nft_exthdr_init,
658 .dump = nft_exthdr_dump,
659 .reduce = nft_exthdr_reduce,
662 static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
663 .type = &nft_exthdr_type,
664 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
665 .eval = nft_exthdr_tcp_set_eval,
666 .init = nft_exthdr_tcp_set_init,
667 .dump = nft_exthdr_dump_set,
668 .reduce = NFT_REDUCE_READONLY,
671 static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = {
672 .type = &nft_exthdr_type,
673 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
674 .eval = nft_exthdr_tcp_strip_eval,
675 .init = nft_exthdr_tcp_strip_init,
676 .dump = nft_exthdr_dump_strip,
677 .reduce = NFT_REDUCE_READONLY,
680 static const struct nft_expr_ops nft_exthdr_sctp_ops = {
681 .type = &nft_exthdr_type,
682 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
683 .eval = nft_exthdr_sctp_eval,
684 .init = nft_exthdr_init,
685 .dump = nft_exthdr_dump,
686 .reduce = nft_exthdr_reduce,
689 static const struct nft_expr_ops *
690 nft_exthdr_select_ops(const struct nft_ctx *ctx,
691 const struct nlattr * const tb[])
695 if (!tb[NFTA_EXTHDR_OP])
696 return &nft_exthdr_ipv6_ops;
698 if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
699 return ERR_PTR(-EOPNOTSUPP);
701 op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
703 case NFT_EXTHDR_OP_TCPOPT:
704 if (tb[NFTA_EXTHDR_SREG])
705 return &nft_exthdr_tcp_set_ops;
706 if (tb[NFTA_EXTHDR_DREG])
707 return &nft_exthdr_tcp_ops;
708 return &nft_exthdr_tcp_strip_ops;
709 case NFT_EXTHDR_OP_IPV6:
710 if (tb[NFTA_EXTHDR_DREG])
711 return &nft_exthdr_ipv6_ops;
713 case NFT_EXTHDR_OP_IPV4:
714 if (ctx->family != NFPROTO_IPV6) {
715 if (tb[NFTA_EXTHDR_DREG])
716 return &nft_exthdr_ipv4_ops;
719 case NFT_EXTHDR_OP_SCTP:
720 if (tb[NFTA_EXTHDR_DREG])
721 return &nft_exthdr_sctp_ops;
725 return ERR_PTR(-EOPNOTSUPP);
728 struct nft_expr_type nft_exthdr_type __read_mostly = {
730 .select_ops = nft_exthdr_select_ops,
731 .policy = nft_exthdr_policy,
732 .maxattr = NFTA_EXTHDR_MAX,
733 .owner = THIS_MODULE,