]> Git Repo - linux.git/blob - net/mctp/route.c
bpf: selftests: Add selftests for module kfunc support
[linux.git] / net / mctp / route.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Management Component Transport Protocol (MCTP) - routing
4  * implementation.
5  *
6  * This is currently based on a simple routing table, with no dst cache. The
7  * number of routes should stay fairly small, so the lookup cost is small.
8  *
9  * Copyright (c) 2021 Code Construct
10  * Copyright (c) 2021 Google
11  */
12
13 #include <linux/idr.h>
14 #include <linux/kconfig.h>
15 #include <linux/mctp.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <linux/skbuff.h>
19
20 #include <uapi/linux/if_arp.h>
21
22 #include <net/mctp.h>
23 #include <net/mctpdevice.h>
24 #include <net/netlink.h>
25 #include <net/sock.h>
26
27 #include <trace/events/mctp.h>
28
29 static const unsigned int mctp_message_maxlen = 64 * 1024;
30 static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
31
32 /* route output callbacks */
33 static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
34 {
35         kfree_skb(skb);
36         return 0;
37 }
38
39 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
40 {
41         struct mctp_skb_cb *cb = mctp_cb(skb);
42         struct mctp_hdr *mh;
43         struct sock *sk;
44         u8 type;
45
46         WARN_ON(!rcu_read_lock_held());
47
48         /* TODO: look up in skb->cb? */
49         mh = mctp_hdr(skb);
50
51         if (!skb_headlen(skb))
52                 return NULL;
53
54         type = (*(u8 *)skb->data) & 0x7f;
55
56         sk_for_each_rcu(sk, &net->mctp.binds) {
57                 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
58
59                 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
60                         continue;
61
62                 if (msk->bind_type != type)
63                         continue;
64
65                 if (msk->bind_addr != MCTP_ADDR_ANY &&
66                     msk->bind_addr != mh->dest)
67                         continue;
68
69                 return msk;
70         }
71
72         return NULL;
73 }
74
75 static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
76                            mctp_eid_t peer, u8 tag)
77 {
78         if (key->local_addr != local)
79                 return false;
80
81         if (key->peer_addr != peer)
82                 return false;
83
84         if (key->tag != tag)
85                 return false;
86
87         return true;
88 }
89
90 /* returns a key (with key->lock held, and refcounted), or NULL if no such
91  * key exists.
92  */
93 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
94                                            mctp_eid_t peer,
95                                            unsigned long *irqflags)
96         __acquires(&key->lock)
97 {
98         struct mctp_sk_key *key, *ret;
99         unsigned long flags;
100         struct mctp_hdr *mh;
101         u8 tag;
102
103         mh = mctp_hdr(skb);
104         tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
105
106         ret = NULL;
107         spin_lock_irqsave(&net->mctp.keys_lock, flags);
108
109         hlist_for_each_entry(key, &net->mctp.keys, hlist) {
110                 if (!mctp_key_match(key, mh->dest, peer, tag))
111                         continue;
112
113                 spin_lock(&key->lock);
114                 if (key->valid) {
115                         refcount_inc(&key->refs);
116                         ret = key;
117                         break;
118                 }
119                 spin_unlock(&key->lock);
120         }
121
122         if (ret) {
123                 spin_unlock(&net->mctp.keys_lock);
124                 *irqflags = flags;
125         } else {
126                 spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
127         }
128
129         return ret;
130 }
131
132 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
133                                           mctp_eid_t local, mctp_eid_t peer,
134                                           u8 tag, gfp_t gfp)
135 {
136         struct mctp_sk_key *key;
137
138         key = kzalloc(sizeof(*key), gfp);
139         if (!key)
140                 return NULL;
141
142         key->peer_addr = peer;
143         key->local_addr = local;
144         key->tag = tag;
145         key->sk = &msk->sk;
146         key->valid = true;
147         spin_lock_init(&key->lock);
148         refcount_set(&key->refs, 1);
149
150         return key;
151 }
152
153 void mctp_key_unref(struct mctp_sk_key *key)
154 {
155         if (refcount_dec_and_test(&key->refs))
156                 kfree(key);
157 }
158
159 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
160 {
161         struct net *net = sock_net(&msk->sk);
162         struct mctp_sk_key *tmp;
163         unsigned long flags;
164         int rc = 0;
165
166         spin_lock_irqsave(&net->mctp.keys_lock, flags);
167
168         hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
169                 if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
170                                    key->tag)) {
171                         spin_lock(&tmp->lock);
172                         if (tmp->valid)
173                                 rc = -EEXIST;
174                         spin_unlock(&tmp->lock);
175                         if (rc)
176                                 break;
177                 }
178         }
179
180         if (!rc) {
181                 refcount_inc(&key->refs);
182                 key->expiry = jiffies + mctp_key_lifetime;
183                 timer_reduce(&msk->key_expiry, key->expiry);
184
185                 hlist_add_head(&key->hlist, &net->mctp.keys);
186                 hlist_add_head(&key->sklist, &msk->keys);
187         }
188
189         spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
190
191         return rc;
192 }
193
194 /* We're done with the key; unset valid and remove from lists. There may still
195  * be outstanding refs on the key though...
196  */
197 static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
198                                    unsigned long flags)
199         __releases(&key->lock)
200 {
201         struct sk_buff *skb;
202
203         skb = key->reasm_head;
204         key->reasm_head = NULL;
205         key->reasm_dead = true;
206         key->valid = false;
207         spin_unlock_irqrestore(&key->lock, flags);
208
209         spin_lock_irqsave(&net->mctp.keys_lock, flags);
210         hlist_del(&key->hlist);
211         hlist_del(&key->sklist);
212         spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
213
214         /* one unref for the lists */
215         mctp_key_unref(key);
216
217         /* and one for the local reference */
218         mctp_key_unref(key);
219
220         if (skb)
221                 kfree_skb(skb);
222
223 }
224
225 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
226 {
227         struct mctp_hdr *hdr = mctp_hdr(skb);
228         u8 exp_seq, this_seq;
229
230         this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
231                 & MCTP_HDR_SEQ_MASK;
232
233         if (!key->reasm_head) {
234                 key->reasm_head = skb;
235                 key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
236                 key->last_seq = this_seq;
237                 return 0;
238         }
239
240         exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
241
242         if (this_seq != exp_seq)
243                 return -EINVAL;
244
245         if (key->reasm_head->len + skb->len > mctp_message_maxlen)
246                 return -EINVAL;
247
248         skb->next = NULL;
249         skb->sk = NULL;
250         *key->reasm_tailp = skb;
251         key->reasm_tailp = &skb->next;
252
253         key->last_seq = this_seq;
254
255         key->reasm_head->data_len += skb->len;
256         key->reasm_head->len += skb->len;
257         key->reasm_head->truesize += skb->truesize;
258
259         return 0;
260 }
261
262 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
263 {
264         struct net *net = dev_net(skb->dev);
265         struct mctp_sk_key *key;
266         struct mctp_sock *msk;
267         struct mctp_hdr *mh;
268         unsigned long f;
269         u8 tag, flags;
270         int rc;
271
272         msk = NULL;
273         rc = -EINVAL;
274
275         /* we may be receiving a locally-routed packet; drop source sk
276          * accounting
277          */
278         skb_orphan(skb);
279
280         /* ensure we have enough data for a header and a type */
281         if (skb->len < sizeof(struct mctp_hdr) + 1)
282                 goto out;
283
284         /* grab header, advance data ptr */
285         mh = mctp_hdr(skb);
286         skb_pull(skb, sizeof(struct mctp_hdr));
287
288         if (mh->ver != 1)
289                 goto out;
290
291         flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
292         tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
293
294         rcu_read_lock();
295
296         /* lookup socket / reasm context, exactly matching (src,dest,tag).
297          * we hold a ref on the key, and key->lock held.
298          */
299         key = mctp_lookup_key(net, skb, mh->src, &f);
300
301         if (flags & MCTP_HDR_FLAG_SOM) {
302                 if (key) {
303                         msk = container_of(key->sk, struct mctp_sock, sk);
304                 } else {
305                         /* first response to a broadcast? do a more general
306                          * key lookup to find the socket, but don't use this
307                          * key for reassembly - we'll create a more specific
308                          * one for future packets if required (ie, !EOM).
309                          */
310                         key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
311                         if (key) {
312                                 msk = container_of(key->sk,
313                                                    struct mctp_sock, sk);
314                                 spin_unlock_irqrestore(&key->lock, f);
315                                 mctp_key_unref(key);
316                                 key = NULL;
317                         }
318                 }
319
320                 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
321                         msk = mctp_lookup_bind(net, skb);
322
323                 if (!msk) {
324                         rc = -ENOENT;
325                         goto out_unlock;
326                 }
327
328                 /* single-packet message? deliver to socket, clean up any
329                  * pending key.
330                  */
331                 if (flags & MCTP_HDR_FLAG_EOM) {
332                         sock_queue_rcv_skb(&msk->sk, skb);
333                         if (key) {
334                                 /* we've hit a pending reassembly; not much we
335                                  * can do but drop it
336                                  */
337                                 trace_mctp_key_release(key,
338                                                        MCTP_TRACE_KEY_REPLIED);
339                                 __mctp_key_unlock_drop(key, net, f);
340                                 key = NULL;
341                         }
342                         rc = 0;
343                         goto out_unlock;
344                 }
345
346                 /* broadcast response or a bind() - create a key for further
347                  * packets for this message
348                  */
349                 if (!key) {
350                         key = mctp_key_alloc(msk, mh->dest, mh->src,
351                                              tag, GFP_ATOMIC);
352                         if (!key) {
353                                 rc = -ENOMEM;
354                                 goto out_unlock;
355                         }
356
357                         /* we can queue without the key lock here, as the
358                          * key isn't observable yet
359                          */
360                         mctp_frag_queue(key, skb);
361
362                         /* if the key_add fails, we've raced with another
363                          * SOM packet with the same src, dest and tag. There's
364                          * no way to distinguish future packets, so all we
365                          * can do is drop; we'll free the skb on exit from
366                          * this function.
367                          */
368                         rc = mctp_key_add(key, msk);
369                         if (rc)
370                                 kfree(key);
371
372                         trace_mctp_key_acquire(key);
373
374                         /* we don't need to release key->lock on exit */
375                         key = NULL;
376
377                 } else {
378                         if (key->reasm_head || key->reasm_dead) {
379                                 /* duplicate start? drop everything */
380                                 trace_mctp_key_release(key,
381                                                        MCTP_TRACE_KEY_INVALIDATED);
382                                 __mctp_key_unlock_drop(key, net, f);
383                                 rc = -EEXIST;
384                                 key = NULL;
385                         } else {
386                                 rc = mctp_frag_queue(key, skb);
387                         }
388                 }
389
390         } else if (key) {
391                 /* this packet continues a previous message; reassemble
392                  * using the message-specific key
393                  */
394
395                 /* we need to be continuing an existing reassembly... */
396                 if (!key->reasm_head)
397                         rc = -EINVAL;
398                 else
399                         rc = mctp_frag_queue(key, skb);
400
401                 /* end of message? deliver to socket, and we're done with
402                  * the reassembly/response key
403                  */
404                 if (!rc && flags & MCTP_HDR_FLAG_EOM) {
405                         sock_queue_rcv_skb(key->sk, key->reasm_head);
406                         key->reasm_head = NULL;
407                         trace_mctp_key_release(key, MCTP_TRACE_KEY_REPLIED);
408                         __mctp_key_unlock_drop(key, net, f);
409                         key = NULL;
410                 }
411
412         } else {
413                 /* not a start, no matching key */
414                 rc = -ENOENT;
415         }
416
417 out_unlock:
418         rcu_read_unlock();
419         if (key) {
420                 spin_unlock_irqrestore(&key->lock, f);
421                 mctp_key_unref(key);
422         }
423 out:
424         if (rc)
425                 kfree_skb(skb);
426         return rc;
427 }
428
429 static unsigned int mctp_route_mtu(struct mctp_route *rt)
430 {
431         return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
432 }
433
434 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
435 {
436         struct mctp_hdr *hdr = mctp_hdr(skb);
437         char daddr_buf[MAX_ADDR_LEN];
438         char *daddr = NULL;
439         unsigned int mtu;
440         int rc;
441
442         skb->protocol = htons(ETH_P_MCTP);
443
444         mtu = READ_ONCE(skb->dev->mtu);
445         if (skb->len > mtu) {
446                 kfree_skb(skb);
447                 return -EMSGSIZE;
448         }
449
450         /* If lookup fails let the device handle daddr==NULL */
451         if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
452                 daddr = daddr_buf;
453
454         rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
455                              daddr, skb->dev->dev_addr, skb->len);
456         if (rc) {
457                 kfree_skb(skb);
458                 return -EHOSTUNREACH;
459         }
460
461         rc = dev_queue_xmit(skb);
462         if (rc)
463                 rc = net_xmit_errno(rc);
464
465         return rc;
466 }
467
468 /* route alloc/release */
469 static void mctp_route_release(struct mctp_route *rt)
470 {
471         if (refcount_dec_and_test(&rt->refs)) {
472                 mctp_dev_put(rt->dev);
473                 kfree_rcu(rt, rcu);
474         }
475 }
476
477 /* returns a route with the refcount at 1 */
478 static struct mctp_route *mctp_route_alloc(void)
479 {
480         struct mctp_route *rt;
481
482         rt = kzalloc(sizeof(*rt), GFP_KERNEL);
483         if (!rt)
484                 return NULL;
485
486         INIT_LIST_HEAD(&rt->list);
487         refcount_set(&rt->refs, 1);
488         rt->output = mctp_route_discard;
489
490         return rt;
491 }
492
493 unsigned int mctp_default_net(struct net *net)
494 {
495         return READ_ONCE(net->mctp.default_net);
496 }
497
498 int mctp_default_net_set(struct net *net, unsigned int index)
499 {
500         if (index == 0)
501                 return -EINVAL;
502         WRITE_ONCE(net->mctp.default_net, index);
503         return 0;
504 }
505
506 /* tag management */
507 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
508                              struct mctp_sock *msk)
509 {
510         struct netns_mctp *mns = &net->mctp;
511
512         lockdep_assert_held(&mns->keys_lock);
513
514         key->expiry = jiffies + mctp_key_lifetime;
515         timer_reduce(&msk->key_expiry, key->expiry);
516
517         /* we hold the net->key_lock here, allowing updates to both
518          * then net and sk
519          */
520         hlist_add_head_rcu(&key->hlist, &mns->keys);
521         hlist_add_head_rcu(&key->sklist, &msk->keys);
522         refcount_inc(&key->refs);
523 }
524
525 /* Allocate a locally-owned tag value for (saddr, daddr), and reserve
526  * it for the socket msk
527  */
528 static int mctp_alloc_local_tag(struct mctp_sock *msk,
529                                 mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
530 {
531         struct net *net = sock_net(&msk->sk);
532         struct netns_mctp *mns = &net->mctp;
533         struct mctp_sk_key *key, *tmp;
534         unsigned long flags;
535         int rc = -EAGAIN;
536         u8 tagbits;
537
538         /* for NULL destination EIDs, we may get a response from any peer */
539         if (daddr == MCTP_ADDR_NULL)
540                 daddr = MCTP_ADDR_ANY;
541
542         /* be optimistic, alloc now */
543         key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
544         if (!key)
545                 return -ENOMEM;
546
547         /* 8 possible tag values */
548         tagbits = 0xff;
549
550         spin_lock_irqsave(&mns->keys_lock, flags);
551
552         /* Walk through the existing keys, looking for potential conflicting
553          * tags. If we find a conflict, clear that bit from tagbits
554          */
555         hlist_for_each_entry(tmp, &mns->keys, hlist) {
556                 /* We can check the lookup fields (*_addr, tag) without the
557                  * lock held, they don't change over the lifetime of the key.
558                  */
559
560                 /* if we don't own the tag, it can't conflict */
561                 if (tmp->tag & MCTP_HDR_FLAG_TO)
562                         continue;
563
564                 if (!((tmp->peer_addr == daddr ||
565                        tmp->peer_addr == MCTP_ADDR_ANY) &&
566                        tmp->local_addr == saddr))
567                         continue;
568
569                 spin_lock(&tmp->lock);
570                 /* key must still be valid. If we find a match, clear the
571                  * potential tag value
572                  */
573                 if (tmp->valid)
574                         tagbits &= ~(1 << tmp->tag);
575                 spin_unlock(&tmp->lock);
576
577                 if (!tagbits)
578                         break;
579         }
580
581         if (tagbits) {
582                 key->tag = __ffs(tagbits);
583                 mctp_reserve_tag(net, key, msk);
584                 trace_mctp_key_acquire(key);
585
586                 *tagp = key->tag;
587                 rc = 0;
588         }
589
590         spin_unlock_irqrestore(&mns->keys_lock, flags);
591
592         if (!tagbits)
593                 kfree(key);
594
595         return rc;
596 }
597
598 /* routing lookups */
599 static bool mctp_rt_match_eid(struct mctp_route *rt,
600                               unsigned int net, mctp_eid_t eid)
601 {
602         return READ_ONCE(rt->dev->net) == net &&
603                 rt->min <= eid && rt->max >= eid;
604 }
605
606 /* compares match, used for duplicate prevention */
607 static bool mctp_rt_compare_exact(struct mctp_route *rt1,
608                                   struct mctp_route *rt2)
609 {
610         ASSERT_RTNL();
611         return rt1->dev->net == rt2->dev->net &&
612                 rt1->min == rt2->min &&
613                 rt1->max == rt2->max;
614 }
615
616 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
617                                      mctp_eid_t daddr)
618 {
619         struct mctp_route *tmp, *rt = NULL;
620
621         list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
622                 /* TODO: add metrics */
623                 if (mctp_rt_match_eid(tmp, dnet, daddr)) {
624                         if (refcount_inc_not_zero(&tmp->refs)) {
625                                 rt = tmp;
626                                 break;
627                         }
628                 }
629         }
630
631         return rt;
632 }
633
634 static struct mctp_route *mctp_route_lookup_null(struct net *net,
635                                                  struct net_device *dev)
636 {
637         struct mctp_route *rt;
638
639         list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
640                 if (rt->dev->dev == dev && rt->type == RTN_LOCAL &&
641                     refcount_inc_not_zero(&rt->refs))
642                         return rt;
643         }
644
645         return NULL;
646 }
647
648 /* sends a skb to rt and releases the route. */
649 int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
650 {
651         int rc;
652
653         rc = rt->output(rt, skb);
654         mctp_route_release(rt);
655         return rc;
656 }
657
658 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
659                                   unsigned int mtu, u8 tag)
660 {
661         const unsigned int hlen = sizeof(struct mctp_hdr);
662         struct mctp_hdr *hdr, *hdr2;
663         unsigned int pos, size;
664         struct sk_buff *skb2;
665         int rc;
666         u8 seq;
667
668         hdr = mctp_hdr(skb);
669         seq = 0;
670         rc = 0;
671
672         if (mtu < hlen + 1) {
673                 kfree_skb(skb);
674                 return -EMSGSIZE;
675         }
676
677         /* we've got the header */
678         skb_pull(skb, hlen);
679
680         for (pos = 0; pos < skb->len;) {
681                 /* size of message payload */
682                 size = min(mtu - hlen, skb->len - pos);
683
684                 skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
685                 if (!skb2) {
686                         rc = -ENOMEM;
687                         break;
688                 }
689
690                 /* generic skb copy */
691                 skb2->protocol = skb->protocol;
692                 skb2->priority = skb->priority;
693                 skb2->dev = skb->dev;
694                 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
695
696                 if (skb->sk)
697                         skb_set_owner_w(skb2, skb->sk);
698
699                 /* establish packet */
700                 skb_reserve(skb2, MCTP_HEADER_MAXLEN);
701                 skb_reset_network_header(skb2);
702                 skb_put(skb2, hlen + size);
703                 skb2->transport_header = skb2->network_header + hlen;
704
705                 /* copy header fields, calculate SOM/EOM flags & seq */
706                 hdr2 = mctp_hdr(skb2);
707                 hdr2->ver = hdr->ver;
708                 hdr2->dest = hdr->dest;
709                 hdr2->src = hdr->src;
710                 hdr2->flags_seq_tag = tag &
711                         (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
712
713                 if (pos == 0)
714                         hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
715
716                 if (pos + size == skb->len)
717                         hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
718
719                 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
720
721                 /* copy message payload */
722                 skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
723
724                 /* do route, but don't drop the rt reference */
725                 rc = rt->output(rt, skb2);
726                 if (rc)
727                         break;
728
729                 seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
730                 pos += size;
731         }
732
733         mctp_route_release(rt);
734         consume_skb(skb);
735         return rc;
736 }
737
738 int mctp_local_output(struct sock *sk, struct mctp_route *rt,
739                       struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
740 {
741         struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
742         struct mctp_skb_cb *cb = mctp_cb(skb);
743         struct mctp_hdr *hdr;
744         unsigned long flags;
745         unsigned int mtu;
746         mctp_eid_t saddr;
747         int rc;
748         u8 tag;
749
750         if (WARN_ON(!rt->dev))
751                 return -EINVAL;
752
753         spin_lock_irqsave(&rt->dev->addrs_lock, flags);
754         if (rt->dev->num_addrs == 0) {
755                 rc = -EHOSTUNREACH;
756         } else {
757                 /* use the outbound interface's first address as our source */
758                 saddr = rt->dev->addrs[0];
759                 rc = 0;
760         }
761         spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
762
763         if (rc)
764                 return rc;
765
766         if (req_tag & MCTP_HDR_FLAG_TO) {
767                 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
768                 if (rc)
769                         return rc;
770                 tag |= MCTP_HDR_FLAG_TO;
771         } else {
772                 tag = req_tag;
773         }
774
775
776         skb->protocol = htons(ETH_P_MCTP);
777         skb->priority = 0;
778         skb_reset_transport_header(skb);
779         skb_push(skb, sizeof(struct mctp_hdr));
780         skb_reset_network_header(skb);
781         skb->dev = rt->dev->dev;
782
783         /* cb->net will have been set on initial ingress */
784         cb->src = saddr;
785
786         /* set up common header fields */
787         hdr = mctp_hdr(skb);
788         hdr->ver = 1;
789         hdr->dest = daddr;
790         hdr->src = saddr;
791
792         mtu = mctp_route_mtu(rt);
793
794         if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
795                 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
796                         tag;
797                 return mctp_do_route(rt, skb);
798         } else {
799                 return mctp_do_fragment_route(rt, skb, mtu, tag);
800         }
801 }
802
803 /* route management */
804 static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
805                           unsigned int daddr_extent, unsigned int mtu,
806                           unsigned char type)
807 {
808         int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
809         struct net *net = dev_net(mdev->dev);
810         struct mctp_route *rt, *ert;
811
812         if (!mctp_address_ok(daddr_start))
813                 return -EINVAL;
814
815         if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
816                 return -EINVAL;
817
818         switch (type) {
819         case RTN_LOCAL:
820                 rtfn = mctp_route_input;
821                 break;
822         case RTN_UNICAST:
823                 rtfn = mctp_route_output;
824                 break;
825         default:
826                 return -EINVAL;
827         }
828
829         rt = mctp_route_alloc();
830         if (!rt)
831                 return -ENOMEM;
832
833         rt->min = daddr_start;
834         rt->max = daddr_start + daddr_extent;
835         rt->mtu = mtu;
836         rt->dev = mdev;
837         mctp_dev_hold(rt->dev);
838         rt->type = type;
839         rt->output = rtfn;
840
841         ASSERT_RTNL();
842         /* Prevent duplicate identical routes. */
843         list_for_each_entry(ert, &net->mctp.routes, list) {
844                 if (mctp_rt_compare_exact(rt, ert)) {
845                         mctp_route_release(rt);
846                         return -EEXIST;
847                 }
848         }
849
850         list_add_rcu(&rt->list, &net->mctp.routes);
851
852         return 0;
853 }
854
855 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
856                              unsigned int daddr_extent)
857 {
858         struct net *net = dev_net(mdev->dev);
859         struct mctp_route *rt, *tmp;
860         mctp_eid_t daddr_end;
861         bool dropped;
862
863         if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
864                 return -EINVAL;
865
866         daddr_end = daddr_start + daddr_extent;
867         dropped = false;
868
869         ASSERT_RTNL();
870
871         list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
872                 if (rt->dev == mdev &&
873                     rt->min == daddr_start && rt->max == daddr_end) {
874                         list_del_rcu(&rt->list);
875                         /* TODO: immediate RTM_DELROUTE */
876                         mctp_route_release(rt);
877                         dropped = true;
878                 }
879         }
880
881         return dropped ? 0 : -ENOENT;
882 }
883
884 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
885 {
886         return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
887 }
888
889 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
890 {
891         return mctp_route_remove(mdev, addr, 0);
892 }
893
894 /* removes all entries for a given device */
895 void mctp_route_remove_dev(struct mctp_dev *mdev)
896 {
897         struct net *net = dev_net(mdev->dev);
898         struct mctp_route *rt, *tmp;
899
900         ASSERT_RTNL();
901         list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
902                 if (rt->dev == mdev) {
903                         list_del_rcu(&rt->list);
904                         /* TODO: immediate RTM_DELROUTE */
905                         mctp_route_release(rt);
906                 }
907         }
908 }
909
910 /* Incoming packet-handling */
911
912 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
913                                 struct packet_type *pt,
914                                 struct net_device *orig_dev)
915 {
916         struct net *net = dev_net(dev);
917         struct mctp_dev *mdev;
918         struct mctp_skb_cb *cb;
919         struct mctp_route *rt;
920         struct mctp_hdr *mh;
921
922         rcu_read_lock();
923         mdev = __mctp_dev_get(dev);
924         rcu_read_unlock();
925         if (!mdev) {
926                 /* basic non-data sanity checks */
927                 goto err_drop;
928         }
929
930         if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
931                 goto err_drop;
932
933         skb_reset_transport_header(skb);
934         skb_reset_network_header(skb);
935
936         /* We have enough for a header; decode and route */
937         mh = mctp_hdr(skb);
938         if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
939                 goto err_drop;
940
941         cb = __mctp_cb(skb);
942         cb->net = READ_ONCE(mdev->net);
943
944         rt = mctp_route_lookup(net, cb->net, mh->dest);
945
946         /* NULL EID, but addressed to our physical address */
947         if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
948                 rt = mctp_route_lookup_null(net, dev);
949
950         if (!rt)
951                 goto err_drop;
952
953         mctp_do_route(rt, skb);
954
955         return NET_RX_SUCCESS;
956
957 err_drop:
958         kfree_skb(skb);
959         return NET_RX_DROP;
960 }
961
962 static struct packet_type mctp_packet_type = {
963         .type = cpu_to_be16(ETH_P_MCTP),
964         .func = mctp_pkttype_receive,
965 };
966
967 /* netlink interface */
968
969 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
970         [RTA_DST]               = { .type = NLA_U8 },
971         [RTA_METRICS]           = { .type = NLA_NESTED },
972         [RTA_OIF]               = { .type = NLA_U32 },
973 };
974
975 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
976  * tb must hold RTA_MAX+1 elements.
977  */
978 static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
979                               struct netlink_ext_ack *extack,
980                               struct nlattr **tb, struct rtmsg **rtm,
981                               struct mctp_dev **mdev, mctp_eid_t *daddr_start)
982 {
983         struct net *net = sock_net(skb->sk);
984         struct net_device *dev;
985         unsigned int ifindex;
986         int rc;
987
988         rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
989                          rta_mctp_policy, extack);
990         if (rc < 0) {
991                 NL_SET_ERR_MSG(extack, "incorrect format");
992                 return rc;
993         }
994
995         if (!tb[RTA_DST]) {
996                 NL_SET_ERR_MSG(extack, "dst EID missing");
997                 return -EINVAL;
998         }
999         *daddr_start = nla_get_u8(tb[RTA_DST]);
1000
1001         if (!tb[RTA_OIF]) {
1002                 NL_SET_ERR_MSG(extack, "ifindex missing");
1003                 return -EINVAL;
1004         }
1005         ifindex = nla_get_u32(tb[RTA_OIF]);
1006
1007         *rtm = nlmsg_data(nlh);
1008         if ((*rtm)->rtm_family != AF_MCTP) {
1009                 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
1010                 return -EINVAL;
1011         }
1012
1013         dev = __dev_get_by_index(net, ifindex);
1014         if (!dev) {
1015                 NL_SET_ERR_MSG(extack, "bad ifindex");
1016                 return -ENODEV;
1017         }
1018         *mdev = mctp_dev_get_rtnl(dev);
1019         if (!*mdev)
1020                 return -ENODEV;
1021
1022         if (dev->flags & IFF_LOOPBACK) {
1023                 NL_SET_ERR_MSG(extack, "no routes to loopback");
1024                 return -EINVAL;
1025         }
1026
1027         return 0;
1028 }
1029
1030 static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
1031         [RTAX_MTU]              = { .type = NLA_U32 },
1032 };
1033
1034 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1035                          struct netlink_ext_ack *extack)
1036 {
1037         struct nlattr *tb[RTA_MAX + 1];
1038         struct nlattr *tbx[RTAX_MAX + 1];
1039         mctp_eid_t daddr_start;
1040         struct mctp_dev *mdev;
1041         struct rtmsg *rtm;
1042         unsigned int mtu;
1043         int rc;
1044
1045         rc = mctp_route_nlparse(skb, nlh, extack, tb,
1046                                 &rtm, &mdev, &daddr_start);
1047         if (rc < 0)
1048                 return rc;
1049
1050         if (rtm->rtm_type != RTN_UNICAST) {
1051                 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
1052                 return -EINVAL;
1053         }
1054
1055         mtu = 0;
1056         if (tb[RTA_METRICS]) {
1057                 rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
1058                                       rta_metrics_policy, NULL);
1059                 if (rc < 0)
1060                         return rc;
1061                 if (tbx[RTAX_MTU])
1062                         mtu = nla_get_u32(tbx[RTAX_MTU]);
1063         }
1064
1065         if (rtm->rtm_type != RTN_UNICAST)
1066                 return -EINVAL;
1067
1068         rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
1069                             rtm->rtm_type);
1070         return rc;
1071 }
1072
1073 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1074                          struct netlink_ext_ack *extack)
1075 {
1076         struct nlattr *tb[RTA_MAX + 1];
1077         mctp_eid_t daddr_start;
1078         struct mctp_dev *mdev;
1079         struct rtmsg *rtm;
1080         int rc;
1081
1082         rc = mctp_route_nlparse(skb, nlh, extack, tb,
1083                                 &rtm, &mdev, &daddr_start);
1084         if (rc < 0)
1085                 return rc;
1086
1087         /* we only have unicast routes */
1088         if (rtm->rtm_type != RTN_UNICAST)
1089                 return -EINVAL;
1090
1091         rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
1092         return rc;
1093 }
1094
1095 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
1096                             u32 portid, u32 seq, int event, unsigned int flags)
1097 {
1098         struct nlmsghdr *nlh;
1099         struct rtmsg *hdr;
1100         void *metrics;
1101
1102         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
1103         if (!nlh)
1104                 return -EMSGSIZE;
1105
1106         hdr = nlmsg_data(nlh);
1107         hdr->rtm_family = AF_MCTP;
1108
1109         /* we use the _len fields as a number of EIDs, rather than
1110          * a number of bits in the address
1111          */
1112         hdr->rtm_dst_len = rt->max - rt->min;
1113         hdr->rtm_src_len = 0;
1114         hdr->rtm_tos = 0;
1115         hdr->rtm_table = RT_TABLE_DEFAULT;
1116         hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
1117         hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
1118         hdr->rtm_type = rt->type;
1119
1120         if (nla_put_u8(skb, RTA_DST, rt->min))
1121                 goto cancel;
1122
1123         metrics = nla_nest_start_noflag(skb, RTA_METRICS);
1124         if (!metrics)
1125                 goto cancel;
1126
1127         if (rt->mtu) {
1128                 if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
1129                         goto cancel;
1130         }
1131
1132         nla_nest_end(skb, metrics);
1133
1134         if (rt->dev) {
1135                 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
1136                         goto cancel;
1137         }
1138
1139         /* TODO: conditional neighbour physaddr? */
1140
1141         nlmsg_end(skb, nlh);
1142
1143         return 0;
1144
1145 cancel:
1146         nlmsg_cancel(skb, nlh);
1147         return -EMSGSIZE;
1148 }
1149
1150 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
1151 {
1152         struct net *net = sock_net(skb->sk);
1153         struct mctp_route *rt;
1154         int s_idx, idx;
1155
1156         /* TODO: allow filtering on route data, possibly under
1157          * cb->strict_check
1158          */
1159
1160         /* TODO: change to struct overlay */
1161         s_idx = cb->args[0];
1162         idx = 0;
1163
1164         rcu_read_lock();
1165         list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
1166                 if (idx++ < s_idx)
1167                         continue;
1168                 if (mctp_fill_rtinfo(skb, rt,
1169                                      NETLINK_CB(cb->skb).portid,
1170                                      cb->nlh->nlmsg_seq,
1171                                      RTM_NEWROUTE, NLM_F_MULTI) < 0)
1172                         break;
1173         }
1174
1175         rcu_read_unlock();
1176         cb->args[0] = idx;
1177
1178         return skb->len;
1179 }
1180
1181 /* net namespace implementation */
1182 static int __net_init mctp_routes_net_init(struct net *net)
1183 {
1184         struct netns_mctp *ns = &net->mctp;
1185
1186         INIT_LIST_HEAD(&ns->routes);
1187         INIT_HLIST_HEAD(&ns->binds);
1188         mutex_init(&ns->bind_lock);
1189         INIT_HLIST_HEAD(&ns->keys);
1190         spin_lock_init(&ns->keys_lock);
1191         WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
1192         return 0;
1193 }
1194
1195 static void __net_exit mctp_routes_net_exit(struct net *net)
1196 {
1197         struct mctp_route *rt;
1198
1199         rcu_read_lock();
1200         list_for_each_entry_rcu(rt, &net->mctp.routes, list)
1201                 mctp_route_release(rt);
1202         rcu_read_unlock();
1203 }
1204
1205 static struct pernet_operations mctp_net_ops = {
1206         .init = mctp_routes_net_init,
1207         .exit = mctp_routes_net_exit,
1208 };
1209
1210 int __init mctp_routes_init(void)
1211 {
1212         dev_add_pack(&mctp_packet_type);
1213
1214         rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
1215                              NULL, mctp_dump_rtinfo, 0);
1216         rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
1217                              mctp_newroute, NULL, 0);
1218         rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
1219                              mctp_delroute, NULL, 0);
1220
1221         return register_pernet_subsys(&mctp_net_ops);
1222 }
1223
1224 void __exit mctp_routes_exit(void)
1225 {
1226         unregister_pernet_subsys(&mctp_net_ops);
1227         rtnl_unregister(PF_MCTP, RTM_DELROUTE);
1228         rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
1229         rtnl_unregister(PF_MCTP, RTM_GETROUTE);
1230         dev_remove_pack(&mctp_packet_type);
1231 }
1232
1233 #if IS_ENABLED(CONFIG_MCTP_TEST)
1234 #include "test/route-test.c"
1235 #endif
This page took 0.100126 seconds and 4 git commands to generate.