2 * net/tipc/group.c: TIPC group messaging code
4 * Copyright (c) 2017, Ericsson AB
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
44 #include "name_table.h"
47 #define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
48 #define ADV_IDLE ADV_UNIT
49 #define ADV_ACTIVE (ADV_UNIT * 12)
65 struct rb_node tree_node;
66 struct list_head list;
67 struct list_head congested;
68 struct sk_buff *event_msg;
69 struct sk_buff_head deferredq;
70 struct tipc_group *group;
84 struct rb_root members;
85 struct list_head congested;
86 struct list_head pending;
87 struct list_head active;
88 struct list_head reclaiming;
89 struct tipc_nlist dests;
106 static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
107 int mtyp, struct sk_buff_head *xmitq);
109 static void tipc_group_decr_active(struct tipc_group *grp,
110 struct tipc_member *m)
112 if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
113 m->state == MBR_REMITTED)
117 static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
119 int max_active, active_pool, idle_pool;
120 int mcnt = grp->member_cnt + 1;
122 /* Limit simultaneous reception from other members */
123 max_active = min(mcnt / 8, 64);
124 max_active = max(max_active, 16);
125 grp->max_active = max_active;
127 /* Reserve blocks for active and idle members */
128 active_pool = max_active * ADV_ACTIVE;
129 idle_pool = (mcnt - max_active) * ADV_IDLE;
131 /* Scale to bytes, considering worst-case truesize/msgsize ratio */
132 return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
135 u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
137 return grp->bc_snd_nxt;
140 static bool tipc_group_is_enabled(struct tipc_member *m)
142 return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
145 static bool tipc_group_is_receiver(struct tipc_member *m)
147 return m && m->state >= MBR_JOINED;
150 u32 tipc_group_exclude(struct tipc_group *grp)
157 int tipc_group_size(struct tipc_group *grp)
159 return grp->member_cnt;
162 struct tipc_group *tipc_group_create(struct net *net, u32 portid,
163 struct tipc_group_req *mreq)
165 struct tipc_group *grp;
166 u32 type = mreq->type;
168 grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
171 tipc_nlist_init(&grp->dests, tipc_own_addr(net));
172 INIT_LIST_HEAD(&grp->congested);
173 INIT_LIST_HEAD(&grp->active);
174 INIT_LIST_HEAD(&grp->pending);
175 INIT_LIST_HEAD(&grp->reclaiming);
176 grp->members = RB_ROOT;
178 grp->portid = portid;
179 grp->domain = addr_domain(net, mreq->scope);
181 grp->instance = mreq->instance;
182 grp->scope = mreq->scope;
183 grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
184 grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
185 if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
191 void tipc_group_delete(struct net *net, struct tipc_group *grp)
193 struct rb_root *tree = &grp->members;
194 struct tipc_member *m, *tmp;
195 struct sk_buff_head xmitq;
197 __skb_queue_head_init(&xmitq);
199 rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
200 tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
204 tipc_node_distr_xmit(net, &xmitq);
205 tipc_nlist_purge(&grp->dests);
206 tipc_topsrv_kern_unsubscr(net, grp->subid);
210 struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
213 struct rb_node *n = grp->members.rb_node;
214 u64 nkey, key = (u64)node << 32 | port;
215 struct tipc_member *m;
218 m = container_of(n, struct tipc_member, tree_node);
219 nkey = (u64)m->node << 32 | m->port;
230 static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
233 struct tipc_member *m;
235 m = tipc_group_find_member(grp, node, port);
236 if (m && tipc_group_is_enabled(m))
241 static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
244 struct tipc_member *m;
247 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
248 m = container_of(n, struct tipc_member, tree_node);
255 static void tipc_group_add_to_tree(struct tipc_group *grp,
256 struct tipc_member *m)
258 u64 nkey, key = (u64)m->node << 32 | m->port;
259 struct rb_node **n, *parent = NULL;
260 struct tipc_member *tmp;
262 n = &grp->members.rb_node;
264 tmp = container_of(*n, struct tipc_member, tree_node);
266 tmp = container_of(parent, struct tipc_member, tree_node);
267 nkey = (u64)tmp->node << 32 | tmp->port;
275 rb_link_node(&m->tree_node, parent, n);
276 rb_insert_color(&m->tree_node, &grp->members);
279 static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
283 struct tipc_member *m;
285 m = kzalloc(sizeof(*m), GFP_ATOMIC);
288 INIT_LIST_HEAD(&m->list);
289 INIT_LIST_HEAD(&m->congested);
290 __skb_queue_head_init(&m->deferredq);
294 m->bc_acked = grp->bc_snd_nxt - 1;
296 tipc_group_add_to_tree(grp, m);
297 tipc_nlist_add(&grp->dests, m->node);
302 void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
304 tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
307 static void tipc_group_delete_member(struct tipc_group *grp,
308 struct tipc_member *m)
310 rb_erase(&m->tree_node, &grp->members);
313 /* Check if we were waiting for replicast ack from this member */
314 if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
317 list_del_init(&m->list);
318 list_del_init(&m->congested);
319 tipc_group_decr_active(grp, m);
321 /* If last member on a node, remove node from dest list */
322 if (!tipc_group_find_node(grp, m->node))
323 tipc_nlist_del(&grp->dests, m->node);
328 struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
333 void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
336 seq->type = grp->type;
337 seq->lower = grp->instance;
338 seq->upper = grp->instance;
342 void tipc_group_update_member(struct tipc_member *m, int len)
344 struct tipc_group *grp = m->group;
345 struct tipc_member *_m, *tmp;
347 if (!tipc_group_is_enabled(m))
352 if (m->window >= ADV_IDLE)
355 list_del_init(&m->congested);
357 /* Sort member into congested members' list */
358 list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
359 if (m->window > _m->window)
361 list_add_tail(&m->congested, &_m->congested);
364 list_add_tail(&m->congested, &grp->congested);
367 void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
369 u16 prev = grp->bc_snd_nxt - 1;
370 struct tipc_member *m;
374 for (n = rb_first(&grp->members); n; n = rb_next(n)) {
375 m = container_of(n, struct tipc_member, tree_node);
376 if (tipc_group_is_enabled(m)) {
377 tipc_group_update_member(m, len);
383 /* Mark number of acknowledges to expect, if any */
385 grp->bc_ackers = ackers;
389 bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
390 int len, struct tipc_member **mbr)
392 struct sk_buff_head xmitq;
393 struct tipc_member *m;
396 m = tipc_group_find_dest(grp, dnode, dport);
402 if (m->window >= len)
404 m->usr_pending = true;
406 /* If not fully advertised, do it now to prevent mutual blocking */
409 if (state < MBR_JOINED)
411 if (state == MBR_JOINED && adv == ADV_IDLE)
413 if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
415 if (state == MBR_PENDING && adv == ADV_IDLE)
417 skb_queue_head_init(&xmitq);
418 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
419 tipc_node_distr_xmit(grp->net, &xmitq);
423 bool tipc_group_bc_cong(struct tipc_group *grp, int len)
425 struct tipc_member *m = NULL;
427 /* If prev bcast was replicast, reject until all receivers have acked */
431 if (list_empty(&grp->congested))
434 m = list_first_entry(&grp->congested, struct tipc_member, congested);
435 if (m->window >= len)
438 return tipc_group_cong(grp, m->node, m->port, len, &m);
441 /* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
443 static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
445 struct tipc_msg *_hdr, *hdr = buf_msg(skb);
446 u16 bc_seqno = msg_grp_bc_seqno(hdr);
447 struct sk_buff *_skb, *tmp;
448 int mtyp = msg_type(hdr);
450 /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
451 if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
452 skb_queue_walk_safe(defq, _skb, tmp) {
453 _hdr = buf_msg(_skb);
454 if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
456 __skb_queue_before(defq, _skb, skb);
459 /* Bcast was not bypassed, - add to tail */
461 /* Unicasts are never bypassed, - always add to tail */
462 __skb_queue_tail(defq, skb);
465 /* tipc_group_filter_msg() - determine if we should accept arriving message
467 void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
468 struct sk_buff_head *xmitq)
470 struct sk_buff *skb = __skb_dequeue(inputq);
471 bool ack, deliver, update, leave = false;
472 struct sk_buff_head *defq;
473 struct tipc_member *m;
474 struct tipc_msg *hdr;
482 node = msg_orignode(hdr);
483 port = msg_origport(hdr);
485 if (!msg_in_group(hdr))
488 m = tipc_group_find_member(grp, node, port);
489 if (!tipc_group_is_receiver(m))
492 if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
495 TIPC_SKB_CB(skb)->orig_member = m->instance;
496 defq = &m->deferredq;
497 tipc_group_sort_msg(skb, defq);
499 while ((skb = skb_peek(defq))) {
501 mtyp = msg_type(hdr);
502 blks = msg_blocks(hdr);
507 if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
510 /* Decide what to do with message */
512 case TIPC_GRP_MCAST_MSG:
513 if (msg_nameinst(hdr) != grp->instance) {
518 case TIPC_GRP_BCAST_MSG:
520 ack = msg_grp_bc_ack_req(hdr);
522 case TIPC_GRP_UCAST_MSG:
524 case TIPC_GRP_MEMBER_EVT:
525 if (m->state == MBR_LEAVING)
534 /* Execute decisions */
537 __skb_queue_tail(inputq, skb);
542 tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
545 __skb_queue_purge(defq);
546 tipc_group_delete_member(grp, m);
552 tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
559 void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
560 u32 port, struct sk_buff_head *xmitq)
562 struct list_head *active = &grp->active;
563 int max_active = grp->max_active;
564 int reclaim_limit = max_active * 3 / 4;
565 int active_cnt = grp->active_cnt;
566 struct tipc_member *m, *rm, *pm;
568 m = tipc_group_find_member(grp, node, port);
572 m->advertised -= blks;
576 /* Reclaim advertised space from least active member */
577 if (!list_empty(active) && active_cnt >= reclaim_limit) {
578 rm = list_first_entry(active, struct tipc_member, list);
579 rm->state = MBR_RECLAIMING;
580 list_move_tail(&rm->list, &grp->reclaiming);
581 tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
583 /* If max active, become pending and wait for reclaimed space */
584 if (active_cnt >= max_active) {
585 m->state = MBR_PENDING;
586 list_add_tail(&m->list, &grp->pending);
589 /* Otherwise become active */
590 m->state = MBR_ACTIVE;
591 list_add_tail(&m->list, &grp->active);
595 if (!list_is_last(&m->list, &grp->active))
596 list_move_tail(&m->list, &grp->active);
597 if (m->advertised > (ADV_ACTIVE * 3 / 4))
599 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
602 if (m->advertised > ADV_IDLE)
604 m->state = MBR_JOINED;
605 if (m->advertised < ADV_IDLE) {
606 pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
607 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
610 list_del_init(&m->list);
611 if (list_empty(&grp->pending))
614 /* Set oldest pending member to active and advertise */
615 pm = list_first_entry(&grp->pending, struct tipc_member, list);
616 pm->state = MBR_ACTIVE;
617 list_move_tail(&pm->list, &grp->active);
619 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
630 static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
631 int mtyp, struct sk_buff_head *xmitq)
633 struct tipc_msg *hdr;
637 skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
638 m->node, tipc_own_addr(grp->net),
639 m->port, grp->portid, 0);
643 if (m->state == MBR_ACTIVE)
644 adv = ADV_ACTIVE - m->advertised;
645 else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
646 adv = ADV_IDLE - m->advertised;
650 if (mtyp == GRP_JOIN_MSG) {
651 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
652 msg_set_adv_win(hdr, adv);
653 m->advertised += adv;
654 } else if (mtyp == GRP_LEAVE_MSG) {
655 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
656 } else if (mtyp == GRP_ADV_MSG) {
657 msg_set_adv_win(hdr, adv);
658 m->advertised += adv;
659 } else if (mtyp == GRP_ACK_MSG) {
660 msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
661 } else if (mtyp == GRP_REMIT_MSG) {
662 msg_set_grp_remitted(hdr, m->window);
664 msg_set_dest_droppable(hdr, true);
665 __skb_queue_tail(xmitq, skb);
668 void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
669 struct tipc_msg *hdr, struct sk_buff_head *inputq,
670 struct sk_buff_head *xmitq)
672 u32 node = msg_orignode(hdr);
673 u32 port = msg_origport(hdr);
674 struct tipc_member *m, *pm;
675 struct tipc_msg *ehdr;
676 u16 remitted, in_flight;
681 m = tipc_group_find_member(grp, node, port);
683 switch (msg_type(hdr)) {
686 m = tipc_group_create_member(grp, node, port,
690 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
691 m->bc_rcv_nxt = m->bc_syncpt;
692 m->window += msg_adv_win(hdr);
694 /* Wait until PUBLISH event is received */
695 if (m->state == MBR_DISCOVERED) {
696 m->state = MBR_JOINING;
697 } else if (m->state == MBR_PUBLISHED) {
698 m->state = MBR_JOINED;
700 m->usr_pending = false;
701 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
702 ehdr = buf_msg(m->event_msg);
703 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
704 __skb_queue_tail(inputq, m->event_msg);
706 list_del_init(&m->congested);
707 tipc_group_update_member(m, 0);
712 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
713 list_del_init(&m->list);
714 list_del_init(&m->congested);
717 /* Wait until WITHDRAW event is received */
718 if (m->state != MBR_LEAVING) {
719 tipc_group_decr_active(grp, m);
720 m->state = MBR_LEAVING;
723 /* Otherwise deliver already received WITHDRAW event */
724 ehdr = buf_msg(m->event_msg);
725 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
726 __skb_queue_tail(inputq, m->event_msg);
731 m->window += msg_adv_win(hdr);
732 *usr_wakeup = m->usr_pending;
733 m->usr_pending = false;
734 list_del_init(&m->congested);
739 m->bc_acked = msg_grp_bc_acked(hdr);
740 if (--grp->bc_ackers)
743 m->usr_pending = false;
745 case GRP_RECLAIM_MSG:
748 *usr_wakeup = m->usr_pending;
749 m->usr_pending = false;
750 tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
751 m->window = ADV_IDLE;
754 if (!m || m->state != MBR_RECLAIMING)
757 remitted = msg_grp_remitted(hdr);
759 /* Messages preceding the REMIT still in receive queue */
760 if (m->advertised > remitted) {
761 m->state = MBR_REMITTED;
762 in_flight = m->advertised - remitted;
763 m->advertised = ADV_IDLE + in_flight;
766 /* All messages preceding the REMIT have been read */
767 if (m->advertised <= remitted) {
768 m->state = MBR_JOINED;
771 /* ..and the REMIT overtaken by more messages => re-advertise */
772 if (m->advertised < remitted)
773 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
775 m->advertised = ADV_IDLE + in_flight;
777 list_del_init(&m->list);
779 /* Set oldest pending member to active and advertise */
780 if (list_empty(&grp->pending))
782 pm = list_first_entry(&grp->pending, struct tipc_member, list);
783 pm->state = MBR_ACTIVE;
784 list_move_tail(&pm->list, &grp->active);
786 if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
787 tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
790 pr_warn("Received unknown GROUP_PROTO message\n");
794 /* tipc_group_member_evt() - receive and handle a member up/down event
796 void tipc_group_member_evt(struct tipc_group *grp,
800 struct sk_buff_head *inputq,
801 struct sk_buff_head *xmitq)
803 struct tipc_msg *hdr = buf_msg(skb);
804 struct tipc_event *evt = (void *)msg_data(hdr);
805 u32 instance = evt->found_lower;
806 u32 node = evt->port.node;
807 u32 port = evt->port.ref;
808 int event = evt->event;
809 struct tipc_member *m;
818 self = tipc_own_addr(net);
819 if (!grp->loopback && node == self && port == grp->portid)
822 /* Convert message before delivery to user */
823 msg_set_hdr_sz(hdr, GROUP_H_SIZE);
824 msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
825 msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
826 msg_set_origport(hdr, port);
827 msg_set_orignode(hdr, node);
828 msg_set_nametype(hdr, grp->type);
829 msg_set_grp_evt(hdr, event);
831 m = tipc_group_find_member(grp, node, port);
833 if (event == TIPC_PUBLISHED) {
835 m = tipc_group_create_member(grp, node, port,
840 /* Hold back event if JOIN message not yet received */
841 if (m->state == MBR_DISCOVERED) {
843 m->state = MBR_PUBLISHED;
845 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
846 __skb_queue_tail(inputq, skb);
847 m->state = MBR_JOINED;
849 m->usr_pending = false;
851 m->instance = instance;
852 TIPC_SKB_CB(skb)->orig_member = m->instance;
853 tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
854 if (m->window < ADV_IDLE)
855 tipc_group_update_member(m, 0);
857 list_del_init(&m->congested);
858 } else if (event == TIPC_WITHDRAWN) {
862 TIPC_SKB_CB(skb)->orig_member = m->instance;
865 m->usr_pending = false;
866 node_up = tipc_node_is_up(net, node);
870 /* Hold back event if a LEAVE msg should be expected */
871 if (m->state != MBR_LEAVING) {
873 tipc_group_decr_active(grp, m);
874 m->state = MBR_LEAVING;
876 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
877 __skb_queue_tail(inputq, skb);
880 if (m->state != MBR_LEAVING) {
881 tipc_group_decr_active(grp, m);
882 m->state = MBR_LEAVING;
883 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
885 msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
887 __skb_queue_tail(inputq, skb);
889 list_del_init(&m->list);
890 list_del_init(&m->congested);
892 *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);