2 * net/tipc/group.c: TIPC group messaging code
4 * Copyright (c) 2017, Ericsson AB
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
44 #include "name_table.h"
47 #define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
48 #define ADV_IDLE ADV_UNIT
49 #define ADV_ACTIVE (ADV_UNIT * 12)
65 struct rb_node tree_node;
66 struct list_head list;
67 struct list_head congested;
68 struct sk_buff *event_msg;
69 struct sk_buff_head deferredq;
70 struct tipc_group *group;
84 struct rb_root members;
85 struct list_head congested;
86 struct list_head pending;
87 struct list_head active;
88 struct list_head reclaiming;
89 struct tipc_nlist dests;
106 static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
107 int mtyp, struct sk_buff_head *xmitq);
109 static void tipc_group_decr_active(struct tipc_group *grp,
110 struct tipc_member *m)
112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
116 static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
118 int max_active, active_pool, idle_pool;
119 int mcnt = grp->member_cnt + 1;
121 /* Limit simultaneous reception from other members */
122 max_active = min(mcnt / 8, 64);
123 max_active = max(max_active, 16);
124 grp->max_active = max_active;
126 /* Reserve blocks for active and idle members */
127 active_pool = max_active * ADV_ACTIVE;
128 idle_pool = (mcnt - max_active) * ADV_IDLE;
130 /* Scale to bytes, considering worst-case truesize/msgsize ratio */
131 return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
134 u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
136 return grp->bc_snd_nxt;
139 static bool tipc_group_is_enabled(struct tipc_member *m)
141 return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
144 static bool tipc_group_is_receiver(struct tipc_member *m)
146 return m && m->state >= MBR_JOINED;
149 u32 tipc_group_exclude(struct tipc_group *grp)
156 int tipc_group_size(struct tipc_group *grp)
158 return grp->member_cnt;
161 struct tipc_group *tipc_group_create(struct net *net, u32 portid,
162 struct tipc_group_req *mreq)
164 struct tipc_group *grp;
165 u32 type = mreq->type;
167 grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
170 tipc_nlist_init(&grp->dests, tipc_own_addr(net));
171 INIT_LIST_HEAD(&grp->congested);
172 INIT_LIST_HEAD(&grp->active);
173 INIT_LIST_HEAD(&grp->pending);
174 INIT_LIST_HEAD(&grp->reclaiming);
175 grp->members = RB_ROOT;
177 grp->portid = portid;
178 grp->domain = addr_domain(net, mreq->scope);
180 grp->instance = mreq->instance;
181 grp->scope = mreq->scope;
182 grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
183 grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
184 if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
190 void tipc_group_delete(struct net *net, struct tipc_group *grp)
192 struct rb_root *tree = &grp->members;
193 struct tipc_member *m, *tmp;
194 struct sk_buff_head xmitq;
196 __skb_queue_head_init(&xmitq);
198 rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
199 tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
203 tipc_node_distr_xmit(net, &xmitq);
204 tipc_nlist_purge(&grp->dests);
205 tipc_topsrv_kern_unsubscr(net, grp->subid);
209 struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
212 struct rb_node *n = grp->members.rb_node;
213 u64 nkey, key = (u64)node << 32 | port;
214 struct tipc_member *m;
217 m = container_of(n, struct tipc_member, tree_node);
218 nkey = (u64)m->node << 32 | m->port;
229 static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
232 struct tipc_member *m;
234 m = tipc_group_find_member(grp, node, port);
235 if (m && tipc_group_is_enabled(m))
240 static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
243 struct tipc_member *m;
246 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
247 m = container_of(n, struct tipc_member, tree_node);
254 static void tipc_group_add_to_tree(struct tipc_group *grp,
255 struct tipc_member *m)
257 u64 nkey, key = (u64)m->node << 32 | m->port;
258 struct rb_node **n, *parent = NULL;
259 struct tipc_member *tmp;
261 n = &grp->members.rb_node;
263 tmp = container_of(*n, struct tipc_member, tree_node);
265 tmp = container_of(parent, struct tipc_member, tree_node);
266 nkey = (u64)tmp->node << 32 | tmp->port;
274 rb_link_node(&m->tree_node, parent, n);
275 rb_insert_color(&m->tree_node, &grp->members);
278 static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
282 struct tipc_member *m;
284 m = kzalloc(sizeof(*m), GFP_ATOMIC);
287 INIT_LIST_HEAD(&m->list);
288 INIT_LIST_HEAD(&m->congested);
289 __skb_queue_head_init(&m->deferredq);
293 m->bc_acked = grp->bc_snd_nxt - 1;
295 tipc_group_add_to_tree(grp, m);
296 tipc_nlist_add(&grp->dests, m->node);
301 void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
303 tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
306 static void tipc_group_delete_member(struct tipc_group *grp,
307 struct tipc_member *m)
309 rb_erase(&m->tree_node, &grp->members);
312 /* Check if we were waiting for replicast ack from this member */
313 if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
316 list_del_init(&m->list);
317 list_del_init(&m->congested);
318 tipc_group_decr_active(grp, m);
320 /* If last member on a node, remove node from dest list */
321 if (!tipc_group_find_node(grp, m->node))
322 tipc_nlist_del(&grp->dests, m->node);
327 struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
332 void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
335 seq->type = grp->type;
336 seq->lower = grp->instance;
337 seq->upper = grp->instance;
341 void tipc_group_update_member(struct tipc_member *m, int len)
343 struct tipc_group *grp = m->group;
344 struct tipc_member *_m, *tmp;
346 if (!tipc_group_is_enabled(m))
351 if (m->window >= ADV_IDLE)
354 if (!list_empty(&m->congested))
357 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
359 if (m->window > _m->window)
361 list_add_tail(&m->congested, &_m->congested);
364 list_add_tail(&m->congested, &grp->congested);
367 void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
369 u16 prev = grp->bc_snd_nxt - 1;
370 struct tipc_member *m;
373 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
374 m = container_of(n, struct tipc_member, tree_node);
375 if (tipc_group_is_enabled(m)) {
376 tipc_group_update_member(m, len);
381 /* Mark number of acknowledges to expect, if any */
383 grp->bc_ackers = grp->member_cnt;
387 bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
388 int len, struct tipc_member **mbr)
390 struct sk_buff_head xmitq;
391 struct tipc_member *m;
394 m = tipc_group_find_dest(grp, dnode, dport);
400 if (m->window >= len)
402 m->usr_pending = true;
404 /* If not fully advertised, do it now to prevent mutual blocking */
407 if (state < MBR_JOINED)
409 if (state == MBR_JOINED && adv == ADV_IDLE)
411 if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
413 if (state == MBR_PENDING && adv == ADV_IDLE)
415 skb_queue_head_init(&xmitq);
416 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
417 tipc_node_distr_xmit(grp->net, &xmitq);
421 bool tipc_group_bc_cong(struct tipc_group *grp, int len)
423 struct tipc_member *m = NULL;
425 /* If prev bcast was replicast, reject until all receivers have acked */
429 if (list_empty(&grp->congested))
432 m = list_first_entry(&grp->congested, struct tipc_member, congested);
433 if (m->window >= len)
436 return tipc_group_cong(grp, m->node, m->port, len, &m);
439 /* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
441 static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
443 struct tipc_msg *_hdr, *hdr = buf_msg(skb);
444 u16 bc_seqno = msg_grp_bc_seqno(hdr);
445 struct sk_buff *_skb, *tmp;
446 int mtyp = msg_type(hdr);
448 /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
449 if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
450 skb_queue_walk_safe(defq, _skb, tmp) {
451 _hdr = buf_msg(_skb);
452 if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
454 __skb_queue_before(defq, _skb, skb);
457 /* Bcast was not bypassed, - add to tail */
459 /* Unicasts are never bypassed, - always add to tail */
460 __skb_queue_tail(defq, skb);
463 /* tipc_group_filter_msg() - determine if we should accept arriving message
465 void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
466 struct sk_buff_head *xmitq)
468 struct sk_buff *skb = __skb_dequeue(inputq);
469 bool ack, deliver, update, leave = false;
470 struct sk_buff_head *defq;
471 struct tipc_member *m;
472 struct tipc_msg *hdr;
480 node = msg_orignode(hdr);
481 port = msg_origport(hdr);
483 if (!msg_in_group(hdr))
486 m = tipc_group_find_member(grp, node, port);
487 if (!tipc_group_is_receiver(m))
490 if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
493 TIPC_SKB_CB(skb)->orig_member = m->instance;
494 defq = &m->deferredq;
495 tipc_group_sort_msg(skb, defq);
497 while ((skb = skb_peek(defq))) {
499 mtyp = msg_type(hdr);
500 blks = msg_blocks(hdr);
505 if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
508 /* Decide what to do with message */
510 case TIPC_GRP_MCAST_MSG:
511 if (msg_nameinst(hdr) != grp->instance) {
516 case TIPC_GRP_BCAST_MSG:
518 ack = msg_grp_bc_ack_req(hdr);
520 case TIPC_GRP_UCAST_MSG:
522 case TIPC_GRP_MEMBER_EVT:
523 if (m->state == MBR_LEAVING)
532 /* Execute decisions */
535 __skb_queue_tail(inputq, skb);
540 tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
543 __skb_queue_purge(defq);
544 tipc_group_delete_member(grp, m);
550 tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
557 void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
558 u32 port, struct sk_buff_head *xmitq)
560 struct list_head *active = &grp->active;
561 int max_active = grp->max_active;
562 int reclaim_limit = max_active * 3 / 4;
563 int active_cnt = grp->active_cnt;
564 struct tipc_member *m, *rm;
566 m = tipc_group_find_member(grp, node, port);
570 m->advertised -= blks;
574 /* Reclaim advertised space from least active member */
575 if (!list_empty(active) && active_cnt >= reclaim_limit) {
576 rm = list_first_entry(active, struct tipc_member, list);
577 rm->state = MBR_RECLAIMING;
578 list_move_tail(&rm->list, &grp->reclaiming);
579 tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
581 /* If max active, become pending and wait for reclaimed space */
582 if (active_cnt >= max_active) {
583 m->state = MBR_PENDING;
584 list_add_tail(&m->list, &grp->pending);
587 /* Otherwise become active */
588 m->state = MBR_ACTIVE;
589 list_add_tail(&m->list, &grp->active);
593 if (!list_is_last(&m->list, &grp->active))
594 list_move_tail(&m->list, &grp->active);
595 if (m->advertised > (ADV_ACTIVE * 3 / 4))
597 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
600 if (m->advertised > ADV_IDLE)
602 m->state = MBR_JOINED;
603 if (m->advertised < ADV_IDLE) {
604 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
605 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
617 static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
618 int mtyp, struct sk_buff_head *xmitq)
620 struct tipc_msg *hdr;
624 skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
625 m->node, tipc_own_addr(grp->net),
626 m->port, grp->portid, 0);
630 if (m->state == MBR_ACTIVE)
631 adv = ADV_ACTIVE - m->advertised;
632 else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
633 adv = ADV_IDLE - m->advertised;
637 if (mtyp == GRP_JOIN_MSG) {
638 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
639 msg_set_adv_win(hdr, adv);
640 m->advertised += adv;
641 } else if (mtyp == GRP_LEAVE_MSG) {
642 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
643 } else if (mtyp == GRP_ADV_MSG) {
644 msg_set_adv_win(hdr, adv);
645 m->advertised += adv;
646 } else if (mtyp == GRP_ACK_MSG) {
647 msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
648 } else if (mtyp == GRP_REMIT_MSG) {
649 msg_set_grp_remitted(hdr, m->window);
651 __skb_queue_tail(xmitq, skb);
654 void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
655 struct tipc_msg *hdr, struct sk_buff_head *inputq,
656 struct sk_buff_head *xmitq)
658 u32 node = msg_orignode(hdr);
659 u32 port = msg_origport(hdr);
660 struct tipc_member *m, *pm;
661 struct tipc_msg *ehdr;
662 u16 remitted, in_flight;
667 m = tipc_group_find_member(grp, node, port);
669 switch (msg_type(hdr)) {
672 m = tipc_group_create_member(grp, node, port,
676 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
677 m->bc_rcv_nxt = m->bc_syncpt;
678 m->window += msg_adv_win(hdr);
680 /* Wait until PUBLISH event is received */
681 if (m->state == MBR_DISCOVERED) {
682 m->state = MBR_JOINING;
683 } else if (m->state == MBR_PUBLISHED) {
684 m->state = MBR_JOINED;
686 m->usr_pending = false;
687 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
688 ehdr = buf_msg(m->event_msg);
689 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
690 __skb_queue_tail(inputq, m->event_msg);
692 if (m->window < ADV_IDLE)
693 tipc_group_update_member(m, 0);
695 list_del_init(&m->congested);
700 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
702 /* Wait until WITHDRAW event is received */
703 if (m->state != MBR_LEAVING) {
704 tipc_group_decr_active(grp, m);
705 m->state = MBR_LEAVING;
708 /* Otherwise deliver already received WITHDRAW event */
709 ehdr = buf_msg(m->event_msg);
710 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
711 __skb_queue_tail(inputq, m->event_msg);
713 list_del_init(&m->congested);
718 m->window += msg_adv_win(hdr);
719 *usr_wakeup = m->usr_pending;
720 m->usr_pending = false;
721 list_del_init(&m->congested);
726 m->bc_acked = msg_grp_bc_acked(hdr);
727 if (--grp->bc_ackers)
730 m->usr_pending = false;
732 case GRP_RECLAIM_MSG:
735 *usr_wakeup = m->usr_pending;
736 m->usr_pending = false;
737 tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
738 m->window = ADV_IDLE;
741 if (!m || m->state != MBR_RECLAIMING)
744 list_del_init(&m->list);
746 remitted = msg_grp_remitted(hdr);
748 /* Messages preceding the REMIT still in receive queue */
749 if (m->advertised > remitted) {
750 m->state = MBR_REMITTED;
751 in_flight = m->advertised - remitted;
753 /* All messages preceding the REMIT have been read */
754 if (m->advertised <= remitted) {
755 m->state = MBR_JOINED;
758 /* ..and the REMIT overtaken by more messages => re-advertise */
759 if (m->advertised < remitted)
760 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
762 m->advertised = ADV_IDLE + in_flight;
764 /* Set oldest pending member to active and advertise */
765 if (list_empty(&grp->pending))
767 pm = list_first_entry(&grp->pending, struct tipc_member, list);
768 pm->state = MBR_ACTIVE;
769 list_move_tail(&pm->list, &grp->active);
771 if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
772 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
775 pr_warn("Received unknown GROUP_PROTO message\n");
779 /* tipc_group_member_evt() - receive and handle a member up/down event
781 void tipc_group_member_evt(struct tipc_group *grp,
785 struct sk_buff_head *inputq,
786 struct sk_buff_head *xmitq)
788 struct tipc_msg *hdr = buf_msg(skb);
789 struct tipc_event *evt = (void *)msg_data(hdr);
790 u32 instance = evt->found_lower;
791 u32 node = evt->port.node;
792 u32 port = evt->port.ref;
793 int event = evt->event;
794 struct tipc_member *m;
803 self = tipc_own_addr(net);
804 if (!grp->loopback && node == self && port == grp->portid)
807 /* Convert message before delivery to user */
808 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
809 msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
810 msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
811 msg_set_origport(hdr, port);
812 msg_set_orignode(hdr, node);
813 msg_set_nametype(hdr, grp->type);
814 msg_set_grp_evt(hdr, event);
816 m = tipc_group_find_member(grp, node, port);
818 if (event == TIPC_PUBLISHED) {
820 m = tipc_group_create_member(grp, node, port,
825 /* Hold back event if JOIN message not yet received */
826 if (m->state == MBR_DISCOVERED) {
828 m->state = MBR_PUBLISHED;
830 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
831 __skb_queue_tail(inputq, skb);
832 m->state = MBR_JOINED;
834 m->usr_pending = false;
836 m->instance = instance;
837 TIPC_SKB_CB(skb)->orig_member = m->instance;
838 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
839 if (m->window < ADV_IDLE)
840 tipc_group_update_member(m, 0);
842 list_del_init(&m->congested);
843 } else if (event == TIPC_WITHDRAWN) {
847 TIPC_SKB_CB(skb)->orig_member = m->instance;
850 m->usr_pending = false;
851 node_up = tipc_node_is_up(net, node);
853 /* Hold back event if more messages might be expected */
854 if (m->state != MBR_LEAVING && node_up) {
856 tipc_group_decr_active(grp, m);
857 m->state = MBR_LEAVING;
860 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
862 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
863 __skb_queue_tail(inputq, skb);
865 list_del_init(&m->congested);
867 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);