5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
27 enum nft_ct_keys key:8;
28 enum ip_conntrack_dir dir:8;
30 enum nft_registers dreg:8;
31 enum nft_registers sreg:8;
35 #ifdef CONFIG_NF_CONNTRACK_ZONES
36 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
37 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
40 static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
42 enum ip_conntrack_dir d)
44 if (d < IP_CT_DIR_MAX)
45 return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
46 atomic64_read(&c[d].packets);
48 return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
49 nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
52 static void nft_ct_get_eval(const struct nft_expr *expr,
53 struct nft_regs *regs,
54 const struct nft_pktinfo *pkt)
56 const struct nft_ct *priv = nft_expr_priv(expr);
57 u32 *dest = ®s->data[priv->dreg];
58 enum ip_conntrack_info ctinfo;
59 const struct nf_conn *ct;
60 const struct nf_conn_help *help;
61 const struct nf_conntrack_tuple *tuple;
62 const struct nf_conntrack_helper *helper;
65 ct = nf_ct_get(pkt->skb, &ctinfo);
70 state = NF_CT_STATE_INVALID_BIT;
71 else if (nf_ct_is_untracked(ct))
72 state = NF_CT_STATE_UNTRACKED_BIT;
74 state = NF_CT_STATE_BIT(ctinfo);
85 case NFT_CT_DIRECTION:
86 *dest = CTINFO2DIR(ctinfo);
91 #ifdef CONFIG_NF_CONNTRACK_MARK
96 #ifdef CONFIG_NF_CONNTRACK_SECMARK
101 case NFT_CT_EXPIRATION:
102 *dest = jiffies_to_msecs(nf_ct_expires(ct));
105 if (ct->master == NULL)
107 help = nfct_help(ct->master);
110 helper = rcu_dereference(help->helper);
113 strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
115 #ifdef CONFIG_NF_CONNTRACK_LABELS
116 case NFT_CT_LABELS: {
117 struct nf_conn_labels *labels = nf_ct_labels_find(ct);
120 memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
122 memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
126 case NFT_CT_BYTES: /* fallthrough */
128 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
132 count = nft_ct_get_eval_counter(acct->counter,
133 priv->key, priv->dir);
134 memcpy(dest, &count, sizeof(count));
137 case NFT_CT_AVGPKT: {
138 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
139 u64 avgcnt = 0, bcnt = 0, pcnt = 0;
142 pcnt = nft_ct_get_eval_counter(acct->counter,
143 NFT_CT_PKTS, priv->dir);
144 bcnt = nft_ct_get_eval_counter(acct->counter,
145 NFT_CT_BYTES, priv->dir);
147 avgcnt = div64_u64(bcnt, pcnt);
150 memcpy(dest, &avgcnt, sizeof(avgcnt));
153 case NFT_CT_L3PROTOCOL:
154 *dest = nf_ct_l3num(ct);
156 case NFT_CT_PROTOCOL:
157 *dest = nf_ct_protonum(ct);
159 #ifdef CONFIG_NF_CONNTRACK_ZONES
161 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
163 if (priv->dir < IP_CT_DIR_MAX)
164 *dest = nf_ct_zone_id(zone, priv->dir);
175 tuple = &ct->tuplehash[priv->dir].tuple;
178 memcpy(dest, tuple->src.u3.all,
179 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
182 memcpy(dest, tuple->dst.u3.all,
183 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
185 case NFT_CT_PROTO_SRC:
186 *dest = (__force __u16)tuple->src.u.all;
188 case NFT_CT_PROTO_DST:
189 *dest = (__force __u16)tuple->dst.u.all;
196 regs->verdict.code = NFT_BREAK;
199 #ifdef CONFIG_NF_CONNTRACK_ZONES
200 static void nft_ct_set_zone_eval(const struct nft_expr *expr,
201 struct nft_regs *regs,
202 const struct nft_pktinfo *pkt)
204 struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
205 const struct nft_ct *priv = nft_expr_priv(expr);
206 struct sk_buff *skb = pkt->skb;
207 enum ip_conntrack_info ctinfo;
208 u16 value = regs->data[priv->sreg];
211 ct = nf_ct_get(skb, &ctinfo);
212 if (ct) /* already tracked */
218 case IP_CT_DIR_ORIGINAL:
219 zone.dir = NF_CT_ZONE_DIR_ORIG;
221 case IP_CT_DIR_REPLY:
222 zone.dir = NF_CT_ZONE_DIR_REPL;
228 ct = this_cpu_read(nft_ct_pcpu_template);
230 if (likely(atomic_read(&ct->ct_general.use) == 1)) {
231 nf_ct_zone_add(ct, &zone);
233 /* previous skb got queued to userspace */
234 ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
236 regs->verdict.code = NF_DROP;
241 atomic_inc(&ct->ct_general.use);
242 nf_ct_set(skb, ct, IP_CT_NEW);
246 static void nft_ct_set_eval(const struct nft_expr *expr,
247 struct nft_regs *regs,
248 const struct nft_pktinfo *pkt)
250 const struct nft_ct *priv = nft_expr_priv(expr);
251 struct sk_buff *skb = pkt->skb;
252 #ifdef CONFIG_NF_CONNTRACK_MARK
253 u32 value = regs->data[priv->sreg];
255 enum ip_conntrack_info ctinfo;
258 ct = nf_ct_get(skb, &ctinfo);
263 #ifdef CONFIG_NF_CONNTRACK_MARK
265 if (ct->mark != value) {
267 nf_conntrack_event_cache(IPCT_MARK, ct);
271 #ifdef CONFIG_NF_CONNTRACK_LABELS
273 nf_connlabels_replace(ct,
274 ®s->data[priv->sreg],
275 ®s->data[priv->sreg],
276 NF_CT_LABELS_MAX_SIZE / sizeof(u32));
284 static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
285 [NFTA_CT_DREG] = { .type = NLA_U32 },
286 [NFTA_CT_KEY] = { .type = NLA_U32 },
287 [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
288 [NFTA_CT_SREG] = { .type = NLA_U32 },
291 static int nft_ct_netns_get(struct net *net, uint8_t family)
295 if (family == NFPROTO_INET) {
296 err = nf_ct_netns_get(net, NFPROTO_IPV4);
299 err = nf_ct_netns_get(net, NFPROTO_IPV6);
303 err = nf_ct_netns_get(net, family);
310 nf_ct_netns_put(net, NFPROTO_IPV4);
315 static void nft_ct_netns_put(struct net *net, uint8_t family)
317 if (family == NFPROTO_INET) {
318 nf_ct_netns_put(net, NFPROTO_IPV4);
319 nf_ct_netns_put(net, NFPROTO_IPV6);
321 nf_ct_netns_put(net, family);
324 #ifdef CONFIG_NF_CONNTRACK_ZONES
325 static void nft_ct_tmpl_put_pcpu(void)
330 for_each_possible_cpu(cpu) {
331 ct = per_cpu(nft_ct_pcpu_template, cpu);
335 per_cpu(nft_ct_pcpu_template, cpu) = NULL;
339 static bool nft_ct_tmpl_alloc_pcpu(void)
341 struct nf_conntrack_zone zone = { .id = 0 };
345 if (nft_ct_pcpu_template_refcnt)
348 for_each_possible_cpu(cpu) {
349 tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
351 nft_ct_tmpl_put_pcpu();
355 atomic_set(&tmp->ct_general.use, 1);
356 per_cpu(nft_ct_pcpu_template, cpu) = tmp;
363 static int nft_ct_get_init(const struct nft_ctx *ctx,
364 const struct nft_expr *expr,
365 const struct nlattr * const tb[])
367 struct nft_ct *priv = nft_expr_priv(expr);
371 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
372 priv->dir = IP_CT_DIR_MAX;
374 case NFT_CT_DIRECTION:
375 if (tb[NFTA_CT_DIRECTION] != NULL)
381 #ifdef CONFIG_NF_CONNTRACK_MARK
384 #ifdef CONFIG_NF_CONNTRACK_SECMARK
387 case NFT_CT_EXPIRATION:
388 if (tb[NFTA_CT_DIRECTION] != NULL)
392 #ifdef CONFIG_NF_CONNTRACK_LABELS
394 if (tb[NFTA_CT_DIRECTION] != NULL)
396 len = NF_CT_LABELS_MAX_SIZE;
400 if (tb[NFTA_CT_DIRECTION] != NULL)
402 len = NF_CT_HELPER_NAME_LEN;
405 case NFT_CT_L3PROTOCOL:
406 case NFT_CT_PROTOCOL:
407 /* For compatibility, do not report error if NFTA_CT_DIRECTION
408 * attribute is specified.
414 if (tb[NFTA_CT_DIRECTION] == NULL)
417 switch (ctx->afi->family) {
419 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
424 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
428 return -EAFNOSUPPORT;
431 case NFT_CT_PROTO_SRC:
432 case NFT_CT_PROTO_DST:
433 if (tb[NFTA_CT_DIRECTION] == NULL)
435 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
442 #ifdef CONFIG_NF_CONNTRACK_ZONES
451 if (tb[NFTA_CT_DIRECTION] != NULL) {
452 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
454 case IP_CT_DIR_ORIGINAL:
455 case IP_CT_DIR_REPLY:
462 priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
463 err = nft_validate_register_store(ctx, priv->dreg, NULL,
464 NFT_DATA_VALUE, len);
468 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
472 if (priv->key == NFT_CT_BYTES ||
473 priv->key == NFT_CT_PKTS ||
474 priv->key == NFT_CT_AVGPKT)
475 nf_ct_set_acct(ctx->net, true);
480 static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
483 #ifdef CONFIG_NF_CONNTRACK_LABELS
485 nf_connlabels_put(ctx->net);
488 #ifdef CONFIG_NF_CONNTRACK_ZONES
490 if (--nft_ct_pcpu_template_refcnt == 0)
491 nft_ct_tmpl_put_pcpu();
498 static int nft_ct_set_init(const struct nft_ctx *ctx,
499 const struct nft_expr *expr,
500 const struct nlattr * const tb[])
502 struct nft_ct *priv = nft_expr_priv(expr);
506 priv->dir = IP_CT_DIR_MAX;
507 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
509 #ifdef CONFIG_NF_CONNTRACK_MARK
511 if (tb[NFTA_CT_DIRECTION])
513 len = FIELD_SIZEOF(struct nf_conn, mark);
516 #ifdef CONFIG_NF_CONNTRACK_LABELS
518 if (tb[NFTA_CT_DIRECTION])
520 len = NF_CT_LABELS_MAX_SIZE;
521 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
526 #ifdef CONFIG_NF_CONNTRACK_ZONES
528 if (!nft_ct_tmpl_alloc_pcpu())
530 nft_ct_pcpu_template_refcnt++;
538 if (tb[NFTA_CT_DIRECTION]) {
539 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
541 case IP_CT_DIR_ORIGINAL:
542 case IP_CT_DIR_REPLY:
549 priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
550 err = nft_validate_register_load(priv->sreg, len);
554 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
561 __nft_ct_set_destroy(ctx, priv);
565 static void nft_ct_get_destroy(const struct nft_ctx *ctx,
566 const struct nft_expr *expr)
568 nf_ct_netns_put(ctx->net, ctx->afi->family);
571 static void nft_ct_set_destroy(const struct nft_ctx *ctx,
572 const struct nft_expr *expr)
574 struct nft_ct *priv = nft_expr_priv(expr);
576 __nft_ct_set_destroy(ctx, priv);
577 nft_ct_netns_put(ctx->net, ctx->afi->family);
580 static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
582 const struct nft_ct *priv = nft_expr_priv(expr);
584 if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
585 goto nla_put_failure;
586 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
587 goto nla_put_failure;
592 case NFT_CT_PROTO_SRC:
593 case NFT_CT_PROTO_DST:
594 if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
595 goto nla_put_failure;
601 if (priv->dir < IP_CT_DIR_MAX &&
602 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
603 goto nla_put_failure;
615 static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
617 const struct nft_ct *priv = nft_expr_priv(expr);
619 if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
620 goto nla_put_failure;
621 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
622 goto nla_put_failure;
626 if (priv->dir < IP_CT_DIR_MAX &&
627 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
628 goto nla_put_failure;
640 static struct nft_expr_type nft_ct_type;
641 static const struct nft_expr_ops nft_ct_get_ops = {
642 .type = &nft_ct_type,
643 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
644 .eval = nft_ct_get_eval,
645 .init = nft_ct_get_init,
646 .destroy = nft_ct_get_destroy,
647 .dump = nft_ct_get_dump,
650 static const struct nft_expr_ops nft_ct_set_ops = {
651 .type = &nft_ct_type,
652 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
653 .eval = nft_ct_set_eval,
654 .init = nft_ct_set_init,
655 .destroy = nft_ct_set_destroy,
656 .dump = nft_ct_set_dump,
659 #ifdef CONFIG_NF_CONNTRACK_ZONES
660 static const struct nft_expr_ops nft_ct_set_zone_ops = {
661 .type = &nft_ct_type,
662 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
663 .eval = nft_ct_set_zone_eval,
664 .init = nft_ct_set_init,
665 .destroy = nft_ct_set_destroy,
666 .dump = nft_ct_set_dump,
670 static const struct nft_expr_ops *
671 nft_ct_select_ops(const struct nft_ctx *ctx,
672 const struct nlattr * const tb[])
674 if (tb[NFTA_CT_KEY] == NULL)
675 return ERR_PTR(-EINVAL);
677 if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
678 return ERR_PTR(-EINVAL);
680 if (tb[NFTA_CT_DREG])
681 return &nft_ct_get_ops;
683 if (tb[NFTA_CT_SREG]) {
684 #ifdef CONFIG_NF_CONNTRACK_ZONES
685 if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
686 return &nft_ct_set_zone_ops;
688 return &nft_ct_set_ops;
691 return ERR_PTR(-EINVAL);
694 static struct nft_expr_type nft_ct_type __read_mostly = {
696 .select_ops = &nft_ct_select_ops,
697 .policy = nft_ct_policy,
698 .maxattr = NFTA_CT_MAX,
699 .owner = THIS_MODULE,
702 static void nft_notrack_eval(const struct nft_expr *expr,
703 struct nft_regs *regs,
704 const struct nft_pktinfo *pkt)
706 struct sk_buff *skb = pkt->skb;
707 enum ip_conntrack_info ctinfo;
710 ct = nf_ct_get(pkt->skb, &ctinfo);
711 /* Previously seen (loopback or untracked)? Ignore. */
715 ct = nf_ct_untracked_get();
716 atomic_inc(&ct->ct_general.use);
717 nf_ct_set(skb, ct, IP_CT_NEW);
720 static struct nft_expr_type nft_notrack_type;
721 static const struct nft_expr_ops nft_notrack_ops = {
722 .type = &nft_notrack_type,
723 .size = NFT_EXPR_SIZE(0),
724 .eval = nft_notrack_eval,
727 static struct nft_expr_type nft_notrack_type __read_mostly = {
729 .ops = &nft_notrack_ops,
730 .owner = THIS_MODULE,
733 static int __init nft_ct_module_init(void)
737 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
739 err = nft_register_expr(&nft_ct_type);
743 err = nft_register_expr(&nft_notrack_type);
749 nft_unregister_expr(&nft_ct_type);
753 static void __exit nft_ct_module_exit(void)
755 nft_unregister_expr(&nft_notrack_type);
756 nft_unregister_expr(&nft_ct_type);
759 module_init(nft_ct_module_init);
760 module_exit(nft_ct_module_exit);
762 MODULE_LICENSE("GPL");
764 MODULE_ALIAS_NFT_EXPR("ct");
765 MODULE_ALIAS_NFT_EXPR("notrack");