]> Git Repo - linux.git/blob - net/mctp/route.c
iavf: Refactor iavf state machine tracking
[linux.git] / net / mctp / route.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Management Component Transport Protocol (MCTP) - routing
4  * implementation.
5  *
6  * This is currently based on a simple routing table, with no dst cache. The
7  * number of routes should stay fairly small, so the lookup cost is small.
8  *
9  * Copyright (c) 2021 Code Construct
10  * Copyright (c) 2021 Google
11  */
12
13 #include <linux/idr.h>
14 #include <linux/kconfig.h>
15 #include <linux/mctp.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <linux/skbuff.h>
19
20 #include <uapi/linux/if_arp.h>
21
22 #include <net/mctp.h>
23 #include <net/mctpdevice.h>
24 #include <net/netlink.h>
25 #include <net/sock.h>
26
27 #include <trace/events/mctp.h>
28
29 static const unsigned int mctp_message_maxlen = 64 * 1024;
30 static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
31
32 /* route output callbacks */
33 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
34 {
35         kfree_skb(skb);
36         return 0;
37 }
38
39 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
40 {
41         struct mctp_skb_cb *cb = mctp_cb(skb);
42         struct mctp_hdr *mh;
43         struct sock *sk;
44         u8 type;
45
46         WARN_ON(!rcu_read_lock_held());
47
48         /* TODO: look up in skb->cb? */
49         mh = mctp_hdr(skb);
50
51         if (!skb_headlen(skb))
52                 return NULL;
53
54         type = (*(u8 *)skb->data) & 0x7f;
55
56         sk_for_each_rcu(sk, &net->mctp.binds) {
57                 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
58
59                 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
60                         continue;
61
62                 if (msk->bind_type != type)
63                         continue;
64
65                 if (msk->bind_addr != MCTP_ADDR_ANY &&
66                     msk->bind_addr != mh->dest)
67                         continue;
68
69                 return msk;
70         }
71
72         return NULL;
73 }
74
75 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
76                            mctp_eid_t peer, u8 tag)
77 {
78         if (key->local_addr != local)
79                 return false;
80
81         if (key->peer_addr != peer)
82                 return false;
83
84         if (key->tag != tag)
85                 return false;
86
87         return true;
88 }
89
90 /* returns a key (with key->lock held, and refcounted), or NULL if no such
91  * key exists.
92  */
93 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
94                                            mctp_eid_t peer,
95                                            unsigned long *irqflags)
96         __acquires(&key->lock)
97 {
98         struct mctp_sk_key *key, *ret;
99         unsigned long flags;
100         struct mctp_hdr *mh;
101         u8 tag;
102
103         mh = mctp_hdr(skb);
104         tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
105
106         ret = NULL;
107         spin_lock_irqsave(&net->mctp.keys_lock, flags);
108
109         hlist_for_each_entry(key, &net->mctp.keys, hlist) {
110                 if (!mctp_key_match(key, mh->dest, peer, tag))
111                         continue;
112
113                 spin_lock(&key->lock);
114                 if (key->valid) {
115                         refcount_inc(&key->refs);
116                         ret = key;
117                         break;
118                 }
119                 spin_unlock(&key->lock);
120         }
121
122         if (ret) {
123                 spin_unlock(&net->mctp.keys_lock);
124                 *irqflags = flags;
125         } else {
126                 spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
127         }
128
129         return ret;
130 }
131
132 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
133                                           mctp_eid_t local, mctp_eid_t peer,
134                                           u8 tag, gfp_t gfp)
135 {
136         struct mctp_sk_key *key;
137
138         key = kzalloc(sizeof(*key), gfp);
139         if (!key)
140                 return NULL;
141
142         key->peer_addr = peer;
143         key->local_addr = local;
144         key->tag = tag;
145         key->sk = &msk->sk;
146         key->valid = true;
147         spin_lock_init(&key->lock);
148         refcount_set(&key->refs, 1);
149
150         return key;
151 }
152
153 void mctp_key_unref(struct mctp_sk_key *key)
154 {
155         if (refcount_dec_and_test(&key->refs))
156                 kfree(key);
157 }
158
159 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
160 {
161         struct net *net = sock_net(&msk->sk);
162         struct mctp_sk_key *tmp;
163         unsigned long flags;
164         int rc = 0;
165
166         spin_lock_irqsave(&net->mctp.keys_lock, flags);
167
168         hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
169                 if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
170                                    key->tag)) {
171                         spin_lock(&tmp->lock);
172                         if (tmp->valid)
173                                 rc = -EEXIST;
174                         spin_unlock(&tmp->lock);
175                         if (rc)
176                                 break;
177                 }
178         }
179
180         if (!rc) {
181                 refcount_inc(&key->refs);
182                 key->expiry = jiffies + mctp_key_lifetime;
183                 timer_reduce(&msk->key_expiry, key->expiry);
184
185                 hlist_add_head(&key->hlist, &net->mctp.keys);
186                 hlist_add_head(&key->sklist, &msk->keys);
187         }
188
189         spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
190
191         return rc;
192 }
193
194 /* We're done with the key; unset valid and remove from lists. There may still
195  * be outstanding refs on the key though...
196  */
197 static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
198                                    unsigned long flags)
199         __releases(&key->lock)
200 {
201         struct sk_buff *skb;
202
203         skb = key->reasm_head;
204         key->reasm_head = NULL;
205         key->reasm_dead = true;
206         key->valid = false;
207         spin_unlock_irqrestore(&key->lock, flags);
208
209         spin_lock_irqsave(&net->mctp.keys_lock, flags);
210         hlist_del(&key->hlist);
211         hlist_del(&key->sklist);
212         spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
213
214         /* one unref for the lists */
215         mctp_key_unref(key);
216
217         /* and one for the local reference */
218         mctp_key_unref(key);
219
220         if (skb)
221                 kfree_skb(skb);
222
223 }
224
225 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
226 {
227         struct mctp_hdr *hdr = mctp_hdr(skb);
228         u8 exp_seq, this_seq;
229
230         this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
231                 & MCTP_HDR_SEQ_MASK;
232
233         if (!key->reasm_head) {
234                 key->reasm_head = skb;
235                 key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
236                 key->last_seq = this_seq;
237                 return 0;
238         }
239
240         exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
241
242         if (this_seq != exp_seq)
243                 return -EINVAL;
244
245         if (key->reasm_head->len + skb->len > mctp_message_maxlen)
246                 return -EINVAL;
247
248         skb->next = NULL;
249         skb->sk = NULL;
250         *key->reasm_tailp = skb;
251         key->reasm_tailp = &skb->next;
252
253         key->last_seq = this_seq;
254
255         key->reasm_head->data_len += skb->len;
256         key->reasm_head->len += skb->len;
257         key->reasm_head->truesize += skb->truesize;
258
259         return 0;
260 }
261
262 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
263 {
264         struct net *net = dev_net(skb->dev);
265         struct mctp_sk_key *key;
266         struct mctp_sock *msk;
267         struct mctp_hdr *mh;
268         unsigned long f;
269         u8 tag, flags;
270         int rc;
271
272         msk = NULL;
273         rc = -EINVAL;
274
275         /* we may be receiving a locally-routed packet; drop source sk
276          * accounting
277          */
278         skb_orphan(skb);
279
280         /* ensure we have enough data for a header and a type */
281         if (skb->len < sizeof(struct mctp_hdr) + 1)
282                 goto out;
283
284         /* grab header, advance data ptr */
285         mh = mctp_hdr(skb);
286         skb_pull(skb, sizeof(struct mctp_hdr));
287
288         if (mh->ver != 1)
289                 goto out;
290
291         flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
292         tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
293
294         rcu_read_lock();
295
296         /* lookup socket / reasm context, exactly matching (src,dest,tag).
297          * we hold a ref on the key, and key->lock held.
298          */
299         key = mctp_lookup_key(net, skb, mh->src, &f);
300
301         if (flags & MCTP_HDR_FLAG_SOM) {
302                 if (key) {
303                         msk = container_of(key->sk, struct mctp_sock, sk);
304                 } else {
305                         /* first response to a broadcast? do a more general
306                          * key lookup to find the socket, but don't use this
307                          * key for reassembly - we'll create a more specific
308                          * one for future packets if required (ie, !EOM).
309                          */
310                         key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
311                         if (key) {
312                                 msk = container_of(key->sk,
313                                                    struct mctp_sock, sk);
314                                 spin_unlock_irqrestore(&key->lock, f);
315                                 mctp_key_unref(key);
316                                 key = NULL;
317                         }
318                 }
319
320                 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
321                         msk = mctp_lookup_bind(net, skb);
322
323                 if (!msk) {
324                         rc = -ENOENT;
325                         goto out_unlock;
326                 }
327
328                 /* single-packet message? deliver to socket, clean up any
329                  * pending key.
330                  */
331                 if (flags & MCTP_HDR_FLAG_EOM) {
332                         sock_queue_rcv_skb(&msk->sk, skb);
333                         if (key) {
334                                 /* we've hit a pending reassembly; not much we
335                                  * can do but drop it
336                                  */
337                                 trace_mctp_key_release(key,
338                                                        MCTP_TRACE_KEY_REPLIED);
339                                 __mctp_key_unlock_drop(key, net, f);
340                                 key = NULL;
341                         }
342                         rc = 0;
343                         goto out_unlock;
344                 }
345
346                 /* broadcast response or a bind() - create a key for further
347                  * packets for this message
348                  */
349                 if (!key) {
350                         key = mctp_key_alloc(msk, mh->dest, mh->src,
351                                              tag, GFP_ATOMIC);
352                         if (!key) {
353                                 rc = -ENOMEM;
354                                 goto out_unlock;
355                         }
356
357                         /* we can queue without the key lock here, as the
358                          * key isn't observable yet
359                          */
360                         mctp_frag_queue(key, skb);
361
362                         /* if the key_add fails, we've raced with another
363                          * SOM packet with the same src, dest and tag. There's
364                          * no way to distinguish future packets, so all we
365                          * can do is drop; we'll free the skb on exit from
366                          * this function.
367                          */
368                         rc = mctp_key_add(key, msk);
369                         if (rc)
370                                 kfree(key);
371
372                         trace_mctp_key_acquire(key);
373
374                         /* we don't need to release key->lock on exit */
375                         mctp_key_unref(key);
376                         key = NULL;
377
378                 } else {
379                         if (key->reasm_head || key->reasm_dead) {
380                                 /* duplicate start? drop everything */
381                                 trace_mctp_key_release(key,
382                                                        MCTP_TRACE_KEY_INVALIDATED);
383                                 __mctp_key_unlock_drop(key, net, f);
384                                 rc = -EEXIST;
385                                 key = NULL;
386                         } else {
387                                 rc = mctp_frag_queue(key, skb);
388                         }
389                 }
390
391         } else if (key) {
392                 /* this packet continues a previous message; reassemble
393                  * using the message-specific key
394                  */
395
396                 /* we need to be continuing an existing reassembly... */
397                 if (!key->reasm_head)
398                         rc = -EINVAL;
399                 else
400                         rc = mctp_frag_queue(key, skb);
401
402                 /* end of message? deliver to socket, and we're done with
403                  * the reassembly/response key
404                  */
405                 if (!rc && flags & MCTP_HDR_FLAG_EOM) {
406                         sock_queue_rcv_skb(key->sk, key->reasm_head);
407                         key->reasm_head = NULL;
408                         trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED);
409                         __mctp_key_unlock_drop(key, net, f);
410                         key = NULL;
411                 }
412
413         } else {
414                 /* not a start, no matching key */
415                 rc = -ENOENT;
416         }
417
418 out_unlock:
419         rcu_read_unlock();
420         if (key) {
421                 spin_unlock_irqrestore(&key->lock, f);
422                 mctp_key_unref(key);
423         }
424 out:
425         if (rc)
426                 kfree_skb(skb);
427         return rc;
428 }
429
430 static unsigned int mctp_route_mtu(struct mctp_route *rt)
431 {
432         return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
433 }
434
435 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
436 {
437         struct mctp_hdr *hdr = mctp_hdr(skb);
438         char daddr_buf[MAX_ADDR_LEN];
439         char *daddr = NULL;
440         unsigned int mtu;
441         int rc;
442
443         skb->protocol = htons(ETH_P_MCTP);
444
445         mtu = READ_ONCE(skb->dev->mtu);
446         if (skb->len > mtu) {
447                 kfree_skb(skb);
448                 return -EMSGSIZE;
449         }
450
451         /* If lookup fails let the device handle daddr==NULL */
452         if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
453                 daddr = daddr_buf;
454
455         rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
456                              daddr, skb->dev->dev_addr, skb->len);
457         if (rc) {
458                 kfree_skb(skb);
459                 return -EHOSTUNREACH;
460         }
461
462         rc = dev_queue_xmit(skb);
463         if (rc)
464                 rc = net_xmit_errno(rc);
465
466         return rc;
467 }
468
469 /* route alloc/release */
470 static void mctp_route_release(struct mctp_route *rt)
471 {
472         if (refcount_dec_and_test(&rt->refs)) {
473                 mctp_dev_put(rt->dev);
474                 kfree_rcu(rt, rcu);
475         }
476 }
477
478 /* returns a route with the refcount at 1 */
479 static struct mctp_route *mctp_route_alloc(void)
480 {
481         struct mctp_route *rt;
482
483         rt = kzalloc(sizeof(*rt), GFP_KERNEL);
484         if (!rt)
485                 return NULL;
486
487         INIT_LIST_HEAD(&rt->list);
488         refcount_set(&rt->refs, 1);
489         rt->output = mctp_route_discard;
490
491         return rt;
492 }
493
494 unsigned int mctp_default_net(struct net *net)
495 {
496         return READ_ONCE(net->mctp.default_net);
497 }
498
499 int mctp_default_net_set(struct net *net, unsigned int index)
500 {
501         if (index == 0)
502                 return -EINVAL;
503         WRITE_ONCE(net->mctp.default_net, index);
504         return 0;
505 }
506
507 /* tag management */
508 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
509                              struct mctp_sock *msk)
510 {
511         struct netns_mctp *mns = &net->mctp;
512
513         lockdep_assert_held(&mns->keys_lock);
514
515         key->expiry = jiffies + mctp_key_lifetime;
516         timer_reduce(&msk->key_expiry, key->expiry);
517
518         /* we hold the net->key_lock here, allowing updates to both
519          * then net and sk
520          */
521         hlist_add_head_rcu(&key->hlist, &mns->keys);
522         hlist_add_head_rcu(&key->sklist, &msk->keys);
523         refcount_inc(&key->refs);
524 }
525
526 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
527  * it for the socket msk
528  */
529 static int mctp_alloc_local_tag(struct mctp_sock *msk,
530                                 mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
531 {
532         struct net *net = sock_net(&msk->sk);
533         struct netns_mctp *mns = &net->mctp;
534         struct mctp_sk_key *key, *tmp;
535         unsigned long flags;
536         int rc = -EAGAIN;
537         u8 tagbits;
538
539         /* for NULL destination EIDs, we may get a response from any peer */
540         if (daddr == MCTP_ADDR_NULL)
541                 daddr = MCTP_ADDR_ANY;
542
543         /* be optimistic, alloc now */
544         key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
545         if (!key)
546                 return -ENOMEM;
547
548         /* 8 possible tag values */
549         tagbits = 0xff;
550
551         spin_lock_irqsave(&mns->keys_lock, flags);
552
553         /* Walk through the existing keys, looking for potential conflicting
554          * tags. If we find a conflict, clear that bit from tagbits
555          */
556         hlist_for_each_entry(tmp, &mns->keys, hlist) {
557                 /* We can check the lookup fields (*_addr, tag) without the
558                  * lock held, they don't change over the lifetime of the key.
559                  */
560
561                 /* if we don't own the tag, it can't conflict */
562                 if (tmp->tag & MCTP_HDR_FLAG_TO)
563                         continue;
564
565                 if (!((tmp->peer_addr == daddr ||
566                        tmp->peer_addr == MCTP_ADDR_ANY) &&
567                        tmp->local_addr == saddr))
568                         continue;
569
570                 spin_lock(&tmp->lock);
571                 /* key must still be valid. If we find a match, clear the
572                  * potential tag value
573                  */
574                 if (tmp->valid)
575                         tagbits &= ~(1 << tmp->tag);
576                 spin_unlock(&tmp->lock);
577
578                 if (!tagbits)
579                         break;
580         }
581
582         if (tagbits) {
583                 key->tag = __ffs(tagbits);
584                 mctp_reserve_tag(net, key, msk);
585                 trace_mctp_key_acquire(key);
586
587                 *tagp = key->tag;
588                 /* done with the key in this scope */
589                 mctp_key_unref(key);
590                 key = NULL;
591                 rc = 0;
592         }
593
594         spin_unlock_irqrestore(&mns->keys_lock, flags);
595
596         if (!tagbits)
597                 kfree(key);
598
599         return rc;
600 }
601
602 /* routing lookups */
603 static bool mctp_rt_match_eid(struct mctp_route *rt,
604                               unsigned int net, mctp_eid_t eid)
605 {
606         return READ_ONCE(rt->dev->net) == net &&
607                 rt->min <= eid && rt->max >= eid;
608 }
609
610 /* compares match, used for duplicate prevention */
611 static bool mctp_rt_compare_exact(struct mctp_route *rt1,
612                                   struct mctp_route *rt2)
613 {
614         ASSERT_RTNL();
615         return rt1->dev->net == rt2->dev->net &&
616                 rt1->min == rt2->min &&
617                 rt1->max == rt2->max;
618 }
619
620 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
621                                      mctp_eid_t daddr)
622 {
623         struct mctp_route *tmp, *rt = NULL;
624
625         list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
626                 /* TODO: add metrics */
627                 if (mctp_rt_match_eid(tmp, dnet, daddr)) {
628                         if (refcount_inc_not_zero(&tmp->refs)) {
629                                 rt = tmp;
630                                 break;
631                         }
632                 }
633         }
634
635         return rt;
636 }
637
638 static struct mctp_route *mctp_route_lookup_null(struct net *net,
639                                                  struct net_device *dev)
640 {
641         struct mctp_route *rt;
642
643         list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
644                 if (rt->dev->dev == dev && rt->type == RTN_LOCAL &&
645                     refcount_inc_not_zero(&rt->refs))
646                         return rt;
647         }
648
649         return NULL;
650 }
651
652 /* sends a skb to rt and releases the route. */
653 int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
654 {
655         int rc;
656
657         rc = rt->output(rt, skb);
658         mctp_route_release(rt);
659         return rc;
660 }
661
662 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
663                                   unsigned int mtu, u8 tag)
664 {
665         const unsigned int hlen = sizeof(struct mctp_hdr);
666         struct mctp_hdr *hdr, *hdr2;
667         unsigned int pos, size;
668         struct sk_buff *skb2;
669         int rc;
670         u8 seq;
671
672         hdr = mctp_hdr(skb);
673         seq = 0;
674         rc = 0;
675
676         if (mtu < hlen + 1) {
677                 kfree_skb(skb);
678                 return -EMSGSIZE;
679         }
680
681         /* we've got the header */
682         skb_pull(skb, hlen);
683
684         for (pos = 0; pos < skb->len;) {
685                 /* size of message payload */
686                 size = min(mtu - hlen, skb->len - pos);
687
688                 skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
689                 if (!skb2) {
690                         rc = -ENOMEM;
691                         break;
692                 }
693
694                 /* generic skb copy */
695                 skb2->protocol = skb->protocol;
696                 skb2->priority = skb->priority;
697                 skb2->dev = skb->dev;
698                 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
699
700                 if (skb->sk)
701                         skb_set_owner_w(skb2, skb->sk);
702
703                 /* establish packet */
704                 skb_reserve(skb2, MCTP_HEADER_MAXLEN);
705                 skb_reset_network_header(skb2);
706                 skb_put(skb2, hlen + size);
707                 skb2->transport_header = skb2->network_header + hlen;
708
709                 /* copy header fields, calculate SOM/EOM flags & seq */
710                 hdr2 = mctp_hdr(skb2);
711                 hdr2->ver = hdr->ver;
712                 hdr2->dest = hdr->dest;
713                 hdr2->src = hdr->src;
714                 hdr2->flags_seq_tag = tag &
715                         (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
716
717                 if (pos == 0)
718                         hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
719
720                 if (pos + size == skb->len)
721                         hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
722
723                 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
724
725                 /* copy message payload */
726                 skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
727
728                 /* do route, but don't drop the rt reference */
729                 rc = rt->output(rt, skb2);
730                 if (rc)
731                         break;
732
733                 seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
734                 pos += size;
735         }
736
737         mctp_route_release(rt);
738         consume_skb(skb);
739         return rc;
740 }
741
742 int mctp_local_output(struct sock *sk, struct mctp_route *rt,
743                       struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
744 {
745         struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
746         struct mctp_skb_cb *cb = mctp_cb(skb);
747         struct mctp_hdr *hdr;
748         unsigned long flags;
749         unsigned int mtu;
750         mctp_eid_t saddr;
751         int rc;
752         u8 tag;
753
754         if (WARN_ON(!rt->dev))
755                 return -EINVAL;
756
757         spin_lock_irqsave(&rt->dev->addrs_lock, flags);
758         if (rt->dev->num_addrs == 0) {
759                 rc = -EHOSTUNREACH;
760         } else {
761                 /* use the outbound interface's first address as our source */
762                 saddr = rt->dev->addrs[0];
763                 rc = 0;
764         }
765         spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
766
767         if (rc)
768                 return rc;
769
770         if (req_tag & MCTP_HDR_FLAG_TO) {
771                 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
772                 if (rc)
773                         return rc;
774                 tag |= MCTP_HDR_FLAG_TO;
775         } else {
776                 tag = req_tag;
777         }
778
779
780         skb->protocol = htons(ETH_P_MCTP);
781         skb->priority = 0;
782         skb_reset_transport_header(skb);
783         skb_push(skb, sizeof(struct mctp_hdr));
784         skb_reset_network_header(skb);
785         skb->dev = rt->dev->dev;
786
787         /* cb->net will have been set on initial ingress */
788         cb->src = saddr;
789
790         /* set up common header fields */
791         hdr = mctp_hdr(skb);
792         hdr->ver = 1;
793         hdr->dest = daddr;
794         hdr->src = saddr;
795
796         mtu = mctp_route_mtu(rt);
797
798         if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
799                 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
800                         tag;
801                 return mctp_do_route(rt, skb);
802         } else {
803                 return mctp_do_fragment_route(rt, skb, mtu, tag);
804         }
805 }
806
807 /* route management */
808 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
809                           unsigned int daddr_extent, unsigned int mtu,
810                           unsigned char type)
811 {
812         int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
813         struct net *net = dev_net(mdev->dev);
814         struct mctp_route *rt, *ert;
815
816         if (!mctp_address_ok(daddr_start))
817                 return -EINVAL;
818
819         if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
820                 return -EINVAL;
821
822         switch (type) {
823         case RTN_LOCAL:
824                 rtfn = mctp_route_input;
825                 break;
826         case RTN_UNICAST:
827                 rtfn = mctp_route_output;
828                 break;
829         default:
830                 return -EINVAL;
831         }
832
833         rt = mctp_route_alloc();
834         if (!rt)
835                 return -ENOMEM;
836
837         rt->min = daddr_start;
838         rt->max = daddr_start + daddr_extent;
839         rt->mtu = mtu;
840         rt->dev = mdev;
841         mctp_dev_hold(rt->dev);
842         rt->type = type;
843         rt->output = rtfn;
844
845         ASSERT_RTNL();
846         /* Prevent duplicate identical routes. */
847         list_for_each_entry(ert, &net->mctp.routes, list) {
848                 if (mctp_rt_compare_exact(rt, ert)) {
849                         mctp_route_release(rt);
850                         return -EEXIST;
851                 }
852         }
853
854         list_add_rcu(&rt->list, &net->mctp.routes);
855
856         return 0;
857 }
858
859 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
860                              unsigned int daddr_extent)
861 {
862         struct net *net = dev_net(mdev->dev);
863         struct mctp_route *rt, *tmp;
864         mctp_eid_t daddr_end;
865         bool dropped;
866
867         if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
868                 return -EINVAL;
869
870         daddr_end = daddr_start + daddr_extent;
871         dropped = false;
872
873         ASSERT_RTNL();
874
875         list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
876                 if (rt->dev == mdev &&
877                     rt->min == daddr_start && rt->max == daddr_end) {
878                         list_del_rcu(&rt->list);
879                         /* TODO: immediate RTM_DELROUTE */
880                         mctp_route_release(rt);
881                         dropped = true;
882                 }
883         }
884
885         return dropped ? 0 : -ENOENT;
886 }
887
888 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
889 {
890         return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
891 }
892
893 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
894 {
895         return mctp_route_remove(mdev, addr, 0);
896 }
897
898 /* removes all entries for a given device */
899 void mctp_route_remove_dev(struct mctp_dev *mdev)
900 {
901         struct net *net = dev_net(mdev->dev);
902         struct mctp_route *rt, *tmp;
903
904         ASSERT_RTNL();
905         list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
906                 if (rt->dev == mdev) {
907                         list_del_rcu(&rt->list);
908                         /* TODO: immediate RTM_DELROUTE */
909                         mctp_route_release(rt);
910                 }
911         }
912 }
913
914 /* Incoming packet-handling */
915
916 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
917                                 struct packet_type *pt,
918                                 struct net_device *orig_dev)
919 {
920         struct net *net = dev_net(dev);
921         struct mctp_dev *mdev;
922         struct mctp_skb_cb *cb;
923         struct mctp_route *rt;
924         struct mctp_hdr *mh;
925
926         rcu_read_lock();
927         mdev = __mctp_dev_get(dev);
928         rcu_read_unlock();
929         if (!mdev) {
930                 /* basic non-data sanity checks */
931                 goto err_drop;
932         }
933
934         if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
935                 goto err_drop;
936
937         skb_reset_transport_header(skb);
938         skb_reset_network_header(skb);
939
940         /* We have enough for a header; decode and route */
941         mh = mctp_hdr(skb);
942         if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
943                 goto err_drop;
944
945         cb = __mctp_cb(skb);
946         cb->net = READ_ONCE(mdev->net);
947
948         rt = mctp_route_lookup(net, cb->net, mh->dest);
949
950         /* NULL EID, but addressed to our physical address */
951         if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
952                 rt = mctp_route_lookup_null(net, dev);
953
954         if (!rt)
955                 goto err_drop;
956
957         mctp_do_route(rt, skb);
958
959         return NET_RX_SUCCESS;
960
961 err_drop:
962         kfree_skb(skb);
963         return NET_RX_DROP;
964 }
965
966 static struct packet_type mctp_packet_type = {
967         .type = cpu_to_be16(ETH_P_MCTP),
968         .func = mctp_pkttype_receive,
969 };
970
971 /* netlink interface */
972
973 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
974         [RTA_DST]               = { .type = NLA_U8 },
975         [RTA_METRICS]           = { .type = NLA_NESTED },
976         [RTA_OIF]               = { .type = NLA_U32 },
977 };
978
979 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
980  * tb must hold RTA_MAX+1 elements.
981  */
982 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
983                               struct netlink_ext_ack *extack,
984                               struct nlattr **tb, struct rtmsg **rtm,
985                               struct mctp_dev **mdev, mctp_eid_t *daddr_start)
986 {
987         struct net *net = sock_net(skb->sk);
988         struct net_device *dev;
989         unsigned int ifindex;
990         int rc;
991
992         rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
993                          rta_mctp_policy, extack);
994         if (rc < 0) {
995                 NL_SET_ERR_MSG(extack, "incorrect format");
996                 return rc;
997         }
998
999         if (!tb[RTA_DST]) {
1000                 NL_SET_ERR_MSG(extack, "dst EID missing");
1001                 return -EINVAL;
1002         }
1003         *daddr_start = nla_get_u8(tb[RTA_DST]);
1004
1005         if (!tb[RTA_OIF]) {
1006                 NL_SET_ERR_MSG(extack, "ifindex missing");
1007                 return -EINVAL;
1008         }
1009         ifindex = nla_get_u32(tb[RTA_OIF]);
1010
1011         *rtm = nlmsg_data(nlh);
1012         if ((*rtm)->rtm_family != AF_MCTP) {
1013                 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
1014                 return -EINVAL;
1015         }
1016
1017         dev = __dev_get_by_index(net, ifindex);
1018         if (!dev) {
1019                 NL_SET_ERR_MSG(extack, "bad ifindex");
1020                 return -ENODEV;
1021         }
1022         *mdev = mctp_dev_get_rtnl(dev);
1023         if (!*mdev)
1024                 return -ENODEV;
1025
1026         if (dev->flags & IFF_LOOPBACK) {
1027                 NL_SET_ERR_MSG(extack, "no routes to loopback");
1028                 return -EINVAL;
1029         }
1030
1031         return 0;
1032 }
1033
1034 static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
1035         [RTAX_MTU]              = { .type = NLA_U32 },
1036 };
1037
1038 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1039                          struct netlink_ext_ack *extack)
1040 {
1041         struct nlattr *tb[RTA_MAX + 1];
1042         struct nlattr *tbx[RTAX_MAX + 1];
1043         mctp_eid_t daddr_start;
1044         struct mctp_dev *mdev;
1045         struct rtmsg *rtm;
1046         unsigned int mtu;
1047         int rc;
1048
1049         rc = mctp_route_nlparse(skb, nlh, extack, tb,
1050                                 &rtm, &mdev, &daddr_start);
1051         if (rc < 0)
1052                 return rc;
1053
1054         if (rtm->rtm_type != RTN_UNICAST) {
1055                 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
1056                 return -EINVAL;
1057         }
1058
1059         mtu = 0;
1060         if (tb[RTA_METRICS]) {
1061                 rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
1062                                       rta_metrics_policy, NULL);
1063                 if (rc < 0)
1064                         return rc;
1065                 if (tbx[RTAX_MTU])
1066                         mtu = nla_get_u32(tbx[RTAX_MTU]);
1067         }
1068
1069         if (rtm->rtm_type != RTN_UNICAST)
1070                 return -EINVAL;
1071
1072         rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
1073                             rtm->rtm_type);
1074         return rc;
1075 }
1076
1077 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1078                          struct netlink_ext_ack *extack)
1079 {
1080         struct nlattr *tb[RTA_MAX + 1];
1081         mctp_eid_t daddr_start;
1082         struct mctp_dev *mdev;
1083         struct rtmsg *rtm;
1084         int rc;
1085
1086         rc = mctp_route_nlparse(skb, nlh, extack, tb,
1087                                 &rtm, &mdev, &daddr_start);
1088         if (rc < 0)
1089                 return rc;
1090
1091         /* we only have unicast routes */
1092         if (rtm->rtm_type != RTN_UNICAST)
1093                 return -EINVAL;
1094
1095         rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
1096         return rc;
1097 }
1098
1099 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
1100                             u32 portid, u32 seq, int event, unsigned int flags)
1101 {
1102         struct nlmsghdr *nlh;
1103         struct rtmsg *hdr;
1104         void *metrics;
1105
1106         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
1107         if (!nlh)
1108                 return -EMSGSIZE;
1109
1110         hdr = nlmsg_data(nlh);
1111         hdr->rtm_family = AF_MCTP;
1112
1113         /* we use the _len fields as a number of EIDs, rather than
1114          * a number of bits in the address
1115          */
1116         hdr->rtm_dst_len = rt->max - rt->min;
1117         hdr->rtm_src_len = 0;
1118         hdr->rtm_tos = 0;
1119         hdr->rtm_table = RT_TABLE_DEFAULT;
1120         hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1121         hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
1122         hdr->rtm_type = rt->type;
1123
1124         if (nla_put_u8(skb, RTA_DST, rt->min))
1125                 goto cancel;
1126
1127         metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1128         if (!metrics)
1129                 goto cancel;
1130
1131         if (rt->mtu) {
1132                 if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1133                         goto cancel;
1134         }
1135
1136         nla_nest_end(skb, metrics);
1137
1138         if (rt->dev) {
1139                 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1140                         goto cancel;
1141         }
1142
1143         /* TODO: conditional neighbour physaddr? */
1144
1145         nlmsg_end(skb, nlh);
1146
1147         return 0;
1148
1149 cancel:
1150         nlmsg_cancel(skb, nlh);
1151         return -EMSGSIZE;
1152 }
1153
1154 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1155 {
1156         struct net *net = sock_net(skb->sk);
1157         struct mctp_route *rt;
1158         int s_idx, idx;
1159
1160         /* TODO: allow filtering on route data, possibly under
1161          * cb->strict_check
1162          */
1163
1164         /* TODO: change to struct overlay */
1165         s_idx = cb->args[0];
1166         idx = 0;
1167
1168         rcu_read_lock();
1169         list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1170                 if (idx++ < s_idx)
1171                         continue;
1172                 if (mctp_fill_rtinfo(skb, rt,
1173                                      NETLINK_CB(cb->skb).portid,
1174                                      cb->nlh->nlmsg_seq,
1175                                      RTM_NEWROUTE, NLM_F_MULTI) < 0)
1176                         break;
1177         }
1178
1179         rcu_read_unlock();
1180         cb->args[0] = idx;
1181
1182         return skb->len;
1183 }
1184
1185 /* net namespace implementation */
1186 static int __net_init mctp_routes_net_init(struct net *net)
1187 {
1188         struct netns_mctp *ns = &net->mctp;
1189
1190         INIT_LIST_HEAD(&ns->routes);
1191         INIT_HLIST_HEAD(&ns->binds);
1192         mutex_init(&ns->bind_lock);
1193         INIT_HLIST_HEAD(&ns->keys);
1194         spin_lock_init(&ns->keys_lock);
1195         WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
1196         return 0;
1197 }
1198
1199 static void __net_exit mctp_routes_net_exit(struct net *net)
1200 {
1201         struct mctp_route *rt;
1202
1203         rcu_read_lock();
1204         list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1205                 mctp_route_release(rt);
1206         rcu_read_unlock();
1207 }
1208
1209 static struct pernet_operations mctp_net_ops = {
1210         .init = mctp_routes_net_init,
1211         .exit = mctp_routes_net_exit,
1212 };
1213
1214 int __init mctp_routes_init(void)
1215 {
1216         dev_add_pack(&mctp_packet_type);
1217
1218         rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
1219                              NULL, mctp_dump_rtinfo, 0);
1220         rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
1221                              mctp_newroute, NULL, 0);
1222         rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
1223                              mctp_delroute, NULL, 0);
1224
1225         return register_pernet_subsys(&mctp_net_ops);
1226 }
1227
1228 void __exit mctp_routes_exit(void)
1229 {
1230         unregister_pernet_subsys(&mctp_net_ops);
1231         rtnl_unregister(PF_MCTP, RTM_DELROUTE);
1232         rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
1233         rtnl_unregister(PF_MCTP, RTM_GETROUTE);
1234         dev_remove_pack(&mctp_packet_type);
1235 }
1236
1237 #if IS_ENABLED(CONFIG_MCTP_TEST)
1238 #include "test/route-test.c"
1239 #endif
This page took 0.097826 seconds and 4 git commands to generate.