]> Git Repo - linux.git/blob - net/mptcp/pm.c
netfilter: nfnetlink_queue: acquire rcu_read_lock() in instance_destroy_rcu()
[linux.git] / net / mptcp / pm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2019, Intel Corporation.
5  */
6 #define pr_fmt(fmt) "MPTCP: " fmt
7
8 #include <linux/kernel.h>
9 #include <net/mptcp.h>
10 #include "protocol.h"
11
12 #include "mib.h"
13
14 /* path manager command handlers */
15
16 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
17                            const struct mptcp_addr_info *addr,
18                            bool echo)
19 {
20         u8 add_addr = READ_ONCE(msk->pm.addr_signal);
21
22         pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
23
24         lockdep_assert_held(&msk->pm.lock);
25
26         if (add_addr &
27             (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
28                 MPTCP_INC_STATS(sock_net((struct sock *)msk),
29                                 echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP);
30                 return -EINVAL;
31         }
32
33         if (echo) {
34                 msk->pm.remote = *addr;
35                 add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
36         } else {
37                 msk->pm.local = *addr;
38                 add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
39         }
40         WRITE_ONCE(msk->pm.addr_signal, add_addr);
41         return 0;
42 }
43
44 int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
45 {
46         u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
47
48         pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
49
50         if (rm_addr) {
51                 MPTCP_ADD_STATS(sock_net((struct sock *)msk),
52                                 MPTCP_MIB_RMADDRTXDROP, rm_list->nr);
53                 return -EINVAL;
54         }
55
56         msk->pm.rm_list_tx = *rm_list;
57         rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
58         WRITE_ONCE(msk->pm.addr_signal, rm_addr);
59         mptcp_pm_nl_addr_send_ack(msk);
60         return 0;
61 }
62
63 int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
64 {
65         pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
66
67         spin_lock_bh(&msk->pm.lock);
68         mptcp_pm_nl_rm_subflow_received(msk, rm_list);
69         spin_unlock_bh(&msk->pm.lock);
70         return 0;
71 }
72
73 /* path manager event handlers */
74
75 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
76 {
77         struct mptcp_pm_data *pm = &msk->pm;
78
79         pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side);
80
81         WRITE_ONCE(pm->server_side, server_side);
82         mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
83 }
84
85 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
86 {
87         struct mptcp_pm_data *pm = &msk->pm;
88         unsigned int subflows_max;
89         int ret = 0;
90
91         if (mptcp_pm_is_userspace(msk)) {
92                 if (mptcp_userspace_pm_active(msk)) {
93                         spin_lock_bh(&pm->lock);
94                         pm->subflows++;
95                         spin_unlock_bh(&pm->lock);
96                         return true;
97                 }
98                 return false;
99         }
100
101         subflows_max = mptcp_pm_get_subflows_max(msk);
102
103         pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
104                  subflows_max, READ_ONCE(pm->accept_subflow));
105
106         /* try to avoid acquiring the lock below */
107         if (!READ_ONCE(pm->accept_subflow))
108                 return false;
109
110         spin_lock_bh(&pm->lock);
111         if (READ_ONCE(pm->accept_subflow)) {
112                 ret = pm->subflows < subflows_max;
113                 if (ret && ++pm->subflows == subflows_max)
114                         WRITE_ONCE(pm->accept_subflow, false);
115         }
116         spin_unlock_bh(&pm->lock);
117
118         return ret;
119 }
120
121 /* return true if the new status bit is currently cleared, that is, this event
122  * can be server, eventually by an already scheduled work
123  */
124 static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
125                                    enum mptcp_pm_status new_status)
126 {
127         pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status,
128                  BIT(new_status));
129         if (msk->pm.status & BIT(new_status))
130                 return false;
131
132         msk->pm.status |= BIT(new_status);
133         mptcp_schedule_work((struct sock *)msk);
134         return true;
135 }
136
137 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
138 {
139         struct mptcp_pm_data *pm = &msk->pm;
140         bool announce = false;
141
142         pr_debug("msk=%p", msk);
143
144         spin_lock_bh(&pm->lock);
145
146         /* mptcp_pm_fully_established() can be invoked by multiple
147          * racing paths - accept() and check_fully_established()
148          * be sure to serve this event only once.
149          */
150         if (READ_ONCE(pm->work_pending) &&
151             !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
152                 mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
153
154         if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
155                 announce = true;
156
157         msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
158         spin_unlock_bh(&pm->lock);
159
160         if (announce)
161                 mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC);
162 }
163
164 void mptcp_pm_connection_closed(struct mptcp_sock *msk)
165 {
166         pr_debug("msk=%p", msk);
167 }
168
169 void mptcp_pm_subflow_established(struct mptcp_sock *msk)
170 {
171         struct mptcp_pm_data *pm = &msk->pm;
172
173         pr_debug("msk=%p", msk);
174
175         if (!READ_ONCE(pm->work_pending))
176                 return;
177
178         spin_lock_bh(&pm->lock);
179
180         if (READ_ONCE(pm->work_pending))
181                 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
182
183         spin_unlock_bh(&pm->lock);
184 }
185
186 void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
187                                  const struct mptcp_subflow_context *subflow)
188 {
189         struct mptcp_pm_data *pm = &msk->pm;
190         bool update_subflows;
191
192         update_subflows = subflow->request_join || subflow->mp_join;
193         if (mptcp_pm_is_userspace(msk)) {
194                 if (update_subflows) {
195                         spin_lock_bh(&pm->lock);
196                         pm->subflows--;
197                         spin_unlock_bh(&pm->lock);
198                 }
199                 return;
200         }
201
202         if (!READ_ONCE(pm->work_pending) && !update_subflows)
203                 return;
204
205         spin_lock_bh(&pm->lock);
206         if (update_subflows)
207                 __mptcp_pm_close_subflow(msk);
208
209         /* Even if this subflow is not really established, tell the PM to try
210          * to pick the next ones, if possible.
211          */
212         if (mptcp_pm_nl_check_work_pending(msk))
213                 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
214
215         spin_unlock_bh(&pm->lock);
216 }
217
218 void mptcp_pm_add_addr_received(const struct sock *ssk,
219                                 const struct mptcp_addr_info *addr)
220 {
221         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
222         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
223         struct mptcp_pm_data *pm = &msk->pm;
224
225         pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
226                  READ_ONCE(pm->accept_addr));
227
228         mptcp_event_addr_announced(ssk, addr);
229
230         spin_lock_bh(&pm->lock);
231
232         if (mptcp_pm_is_userspace(msk)) {
233                 if (mptcp_userspace_pm_active(msk)) {
234                         mptcp_pm_announce_addr(msk, addr, true);
235                         mptcp_pm_add_addr_send_ack(msk);
236                 } else {
237                         __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
238                 }
239         } else if (!READ_ONCE(pm->accept_addr)) {
240                 mptcp_pm_announce_addr(msk, addr, true);
241                 mptcp_pm_add_addr_send_ack(msk);
242         } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
243                 pm->remote = *addr;
244         } else {
245                 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
246         }
247
248         spin_unlock_bh(&pm->lock);
249 }
250
251 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
252                               const struct mptcp_addr_info *addr)
253 {
254         struct mptcp_pm_data *pm = &msk->pm;
255
256         pr_debug("msk=%p", msk);
257
258         spin_lock_bh(&pm->lock);
259
260         if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending))
261                 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
262
263         spin_unlock_bh(&pm->lock);
264 }
265
266 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
267 {
268         if (!mptcp_pm_should_add_signal(msk))
269                 return;
270
271         mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
272 }
273
274 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
275                                const struct mptcp_rm_list *rm_list)
276 {
277         struct mptcp_pm_data *pm = &msk->pm;
278         u8 i;
279
280         pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
281
282         for (i = 0; i < rm_list->nr; i++)
283                 mptcp_event_addr_removed(msk, rm_list->ids[i]);
284
285         spin_lock_bh(&pm->lock);
286         if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED))
287                 pm->rm_list_rx = *rm_list;
288         else
289                 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP);
290         spin_unlock_bh(&pm->lock);
291 }
292
293 void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
294 {
295         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
296         struct sock *sk = subflow->conn;
297         struct mptcp_sock *msk;
298
299         pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
300         msk = mptcp_sk(sk);
301         if (subflow->backup != bkup)
302                 subflow->backup = bkup;
303
304         mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
305 }
306
307 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
308 {
309         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
310         struct mptcp_sock *msk = mptcp_sk(subflow->conn);
311
312         pr_debug("fail_seq=%llu", fail_seq);
313
314         if (!READ_ONCE(msk->allow_infinite_fallback))
315                 return;
316
317         if (!subflow->fail_tout) {
318                 pr_debug("send MP_FAIL response and infinite map");
319
320                 subflow->send_mp_fail = 1;
321                 subflow->send_infinite_map = 1;
322                 tcp_send_ack(sk);
323         } else {
324                 pr_debug("MP_FAIL response received");
325                 WRITE_ONCE(subflow->fail_tout, 0);
326         }
327 }
328
329 /* path manager helpers */
330
331 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
332                               unsigned int opt_size, unsigned int remaining,
333                               struct mptcp_addr_info *addr, bool *echo,
334                               bool *drop_other_suboptions)
335 {
336         int ret = false;
337         u8 add_addr;
338         u8 family;
339         bool port;
340
341         spin_lock_bh(&msk->pm.lock);
342
343         /* double check after the lock is acquired */
344         if (!mptcp_pm_should_add_signal(msk))
345                 goto out_unlock;
346
347         /* always drop every other options for pure ack ADD_ADDR; this is a
348          * plain dup-ack from TCP perspective. The other MPTCP-relevant info,
349          * if any, will be carried by the 'original' TCP ack
350          */
351         if (skb && skb_is_tcp_pure_ack(skb)) {
352                 remaining += opt_size;
353                 *drop_other_suboptions = true;
354         }
355
356         *echo = mptcp_pm_should_add_signal_echo(msk);
357         port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
358
359         family = *echo ? msk->pm.remote.family : msk->pm.local.family;
360         if (remaining < mptcp_add_addr_len(family, *echo, port))
361                 goto out_unlock;
362
363         if (*echo) {
364                 *addr = msk->pm.remote;
365                 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
366         } else {
367                 *addr = msk->pm.local;
368                 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
369         }
370         WRITE_ONCE(msk->pm.addr_signal, add_addr);
371         ret = true;
372
373 out_unlock:
374         spin_unlock_bh(&msk->pm.lock);
375         return ret;
376 }
377
378 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
379                              struct mptcp_rm_list *rm_list)
380 {
381         int ret = false, len;
382         u8 rm_addr;
383
384         spin_lock_bh(&msk->pm.lock);
385
386         /* double check after the lock is acquired */
387         if (!mptcp_pm_should_rm_signal(msk))
388                 goto out_unlock;
389
390         rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
391         len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
392         if (len < 0) {
393                 WRITE_ONCE(msk->pm.addr_signal, rm_addr);
394                 goto out_unlock;
395         }
396         if (remaining < len)
397                 goto out_unlock;
398
399         *rm_list = msk->pm.rm_list_tx;
400         WRITE_ONCE(msk->pm.addr_signal, rm_addr);
401         ret = true;
402
403 out_unlock:
404         spin_unlock_bh(&msk->pm.lock);
405         return ret;
406 }
407
408 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
409 {
410         struct mptcp_addr_info skc_local;
411         struct mptcp_addr_info msk_local;
412
413         if (WARN_ON_ONCE(!msk))
414                 return -1;
415
416         /* The 0 ID mapping is defined by the first subflow, copied into the msk
417          * addr
418          */
419         mptcp_local_address((struct sock_common *)msk, &msk_local);
420         mptcp_local_address((struct sock_common *)skc, &skc_local);
421         if (mptcp_addresses_equal(&msk_local, &skc_local, false))
422                 return 0;
423
424         if (mptcp_pm_is_userspace(msk))
425                 return mptcp_userspace_pm_get_local_id(msk, &skc_local);
426         return mptcp_pm_nl_get_local_id(msk, &skc_local);
427 }
428
429 int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
430                                          u8 *flags, int *ifindex)
431 {
432         *flags = 0;
433         *ifindex = 0;
434
435         if (!id)
436                 return 0;
437
438         if (mptcp_pm_is_userspace(msk))
439                 return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
440         return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
441 }
442
443 int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
444 {
445         if (info->attrs[MPTCP_PM_ATTR_TOKEN])
446                 return mptcp_userspace_pm_get_addr(skb, info);
447         return mptcp_pm_nl_get_addr(skb, info);
448 }
449
450 int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb)
451 {
452         const struct genl_info *info = genl_info_dump(cb);
453
454         if (info->attrs[MPTCP_PM_ATTR_TOKEN])
455                 return mptcp_userspace_pm_dump_addr(msg, cb);
456         return mptcp_pm_nl_dump_addr(msg, cb);
457 }
458
459 int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
460 {
461         if (info->attrs[MPTCP_PM_ATTR_TOKEN])
462                 return mptcp_userspace_pm_set_flags(skb, info);
463         return mptcp_pm_nl_set_flags(skb, info);
464 }
465
466 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
467 {
468         struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
469         u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
470
471         /* keep track of rtx periods with no progress */
472         if (!subflow->stale_count) {
473                 subflow->stale_rcv_tstamp = rcv_tstamp;
474                 subflow->stale_count++;
475         } else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
476                 if (subflow->stale_count < U8_MAX)
477                         subflow->stale_count++;
478                 mptcp_pm_nl_subflow_chk_stale(msk, ssk);
479         } else {
480                 subflow->stale_count = 0;
481                 mptcp_subflow_set_active(subflow);
482         }
483 }
484
485 /* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
486  * otherwise allow any matching local/remote pair
487  */
488 bool mptcp_pm_addr_families_match(const struct sock *sk,
489                                   const struct mptcp_addr_info *loc,
490                                   const struct mptcp_addr_info *rem)
491 {
492         bool mptcp_is_v4 = sk->sk_family == AF_INET;
493
494 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
495         bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6);
496         bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6);
497
498         if (mptcp_is_v4)
499                 return loc_is_v4 && rem_is_v4;
500
501         if (ipv6_only_sock(sk))
502                 return !loc_is_v4 && !rem_is_v4;
503
504         return loc_is_v4 == rem_is_v4;
505 #else
506         return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET;
507 #endif
508 }
509
510 void mptcp_pm_data_reset(struct mptcp_sock *msk)
511 {
512         u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
513         struct mptcp_pm_data *pm = &msk->pm;
514
515         pm->add_addr_signaled = 0;
516         pm->add_addr_accepted = 0;
517         pm->local_addr_used = 0;
518         pm->subflows = 0;
519         pm->rm_list_tx.nr = 0;
520         pm->rm_list_rx.nr = 0;
521         WRITE_ONCE(pm->pm_type, pm_type);
522
523         if (pm_type == MPTCP_PM_TYPE_KERNEL) {
524                 bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
525
526                 /* pm->work_pending must be only be set to 'true' when
527                  * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
528                  */
529                 WRITE_ONCE(pm->work_pending,
530                            (!!mptcp_pm_get_local_addr_max(msk) &&
531                             subflows_allowed) ||
532                            !!mptcp_pm_get_add_addr_signal_max(msk));
533                 WRITE_ONCE(pm->accept_addr,
534                            !!mptcp_pm_get_add_addr_accept_max(msk) &&
535                            subflows_allowed);
536                 WRITE_ONCE(pm->accept_subflow, subflows_allowed);
537         } else {
538                 WRITE_ONCE(pm->work_pending, 0);
539                 WRITE_ONCE(pm->accept_addr, 0);
540                 WRITE_ONCE(pm->accept_subflow, 0);
541         }
542
543         WRITE_ONCE(pm->addr_signal, 0);
544         WRITE_ONCE(pm->remote_deny_join_id0, false);
545         pm->status = 0;
546         bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
547 }
548
549 void mptcp_pm_data_init(struct mptcp_sock *msk)
550 {
551         spin_lock_init(&msk->pm.lock);
552         INIT_LIST_HEAD(&msk->pm.anno_list);
553         INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list);
554         mptcp_pm_data_reset(msk);
555 }
556
557 void __init mptcp_pm_init(void)
558 {
559         mptcp_pm_nl_init();
560 }
This page took 0.068608 seconds and 4 git commands to generate.