2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
32 #include <net/route.h>
33 #include <linux/skbuff.h>
35 #include <net/pkt_sched.h>
38 /* Class-Based Queueing (CBQ) algorithm.
39 =======================================
41 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
42 Management Models for Packet Networks",
43 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50 [4] Sally Floyd and Michael Speer, "Experimental Results
51 for Class-Based Queueing", 1998, not published.
53 -----------------------------------------------------------------------
55 Algorithm skeleton was taken from NS simulator cbq.cc.
56 If someone wants to check this code against the LBL version,
57 he should take into account that ONLY the skeleton was borrowed,
58 the implementation is different. Particularly:
60 --- The WRR algorithm is different. Our version looks more
61 reasonable (I hope) and works when quanta are allowed to be
62 less than MTU, which is always the case when real time classes
63 have small rates. Note, that the statement of [3] is
64 incomplete, delay may actually be estimated even if class
65 per-round allotment is less than MTU. Namely, if per-round
66 allotment is W*r_i, and r_1+...+r_k = r < 1
68 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70 In the worst case we have IntServ estimate with D = W*r+k*MTU
71 and C = MTU*r. The proof (if correct at all) is trivial.
74 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
75 interpret some places, which look like wrong translations
76 from NS. Anyone is advised to find these differences
77 and explain to me, why I am wrong 8).
79 --- Linux has no EOI event, so that we cannot estimate true class
80 idle time. Workaround is to consider the next dequeue event
81 as sign that previous packet is finished. This is wrong because of
82 internal device queueing, but on a permanently loaded link it is true.
83 Moreover, combined with clock integrator, this scheme looks
84 very close to an ideal solution. */
86 struct cbq_sched_data;
91 struct cbq_class *next; /* hash table link */
92 struct cbq_class *next_alive; /* next class with backlog in this priority band */
96 unsigned char priority; /* class priority */
97 unsigned char priority2; /* priority to be used after overlimit */
98 unsigned char ewma_log; /* time constant for idle time calculation */
99 unsigned char ovl_strategy;
100 #ifdef CONFIG_NET_CLS_POLICE
101 unsigned char police;
106 /* Link-sharing scheduler parameters */
107 long maxidle; /* Class parameters: see below. */
111 struct qdisc_rate_table *R_tab;
113 /* Overlimit strategy parameters */
114 void (*overlimit)(struct cbq_class *cl);
117 /* General scheduler (WRR) parameters */
119 long quantum; /* Allotment per WRR round */
120 long weight; /* Relative allotment: see below */
122 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
123 struct cbq_class *split; /* Ptr to split node */
124 struct cbq_class *share; /* Ptr to LS parent in the class tree */
125 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
126 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
128 struct cbq_class *sibling; /* Sibling chain */
129 struct cbq_class *children; /* Pointer to children chain */
131 struct Qdisc *q; /* Elementary queueing discipline */
135 unsigned char cpriority; /* Effective priority */
136 unsigned char delayed;
137 unsigned char level; /* level of the class in hierarchy:
138 0 for leaf classes, and maximal
139 level of children + 1 for nodes.
142 psched_time_t last; /* Last end of service */
143 psched_time_t undertime;
145 long deficit; /* Saved deficit for WRR */
146 unsigned long penalized;
147 struct gnet_stats_basic bstats;
148 struct gnet_stats_queue qstats;
149 struct gnet_stats_rate_est rate_est;
150 spinlock_t *stats_lock;
151 struct tc_cbq_xstats xstats;
153 struct tcf_proto *filter_list;
158 struct cbq_class *defaults[TC_PRIO_MAX+1];
161 struct cbq_sched_data
163 struct cbq_class *classes[16]; /* Hash table of all classes */
164 int nclasses[TC_CBQ_MAXPRIO+1];
165 unsigned quanta[TC_CBQ_MAXPRIO+1];
167 struct cbq_class link;
170 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
173 #ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class *rx_class;
176 struct cbq_class *tx_class;
177 struct cbq_class *tx_borrowed;
179 psched_time_t now; /* Cached timestamp */
180 psched_time_t now_rt; /* Cached real time */
183 struct timer_list delay_timer;
184 struct timer_list wd_timer; /* Watchdog timer,
194 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197 static __inline__ unsigned cbq_hash(u32 h)
204 static __inline__ struct cbq_class *
205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 struct cbq_class *cl;
209 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210 if (cl->classid == classid)
215 #ifdef CONFIG_NET_CLS_POLICE
217 static struct cbq_class *
218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 struct cbq_class *cl, *new;
222 for (cl = this->tparent; cl; cl = cl->tparent)
223 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
231 /* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
241 static struct cbq_class *
242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 struct cbq_sched_data *q = qdisc_priv(sch);
245 struct cbq_class *head = &q->link;
246 struct cbq_class **defmap;
247 struct cbq_class *cl = NULL;
248 u32 prio = skb->priority;
249 struct tcf_result res;
252 * Step 1. If skb->priority points to one of our classes, use it.
254 if (TC_H_MAJ(prio^sch->handle) == 0 &&
255 (cl = cbq_class_lookup(q, prio)) != NULL)
258 *qerr = NET_XMIT_BYPASS;
261 defmap = head->defaults;
264 * Step 2+n. Apply classifier.
266 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
269 if ((cl = (void*)res.class) == NULL) {
270 if (TC_H_MAJ(res.classid))
271 cl = cbq_class_lookup(q, res.classid);
272 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
273 cl = defmap[TC_PRIO_BESTEFFORT];
275 if (cl == NULL || cl->level >= head->level)
279 #ifdef CONFIG_NET_CLS_ACT
283 *qerr = NET_XMIT_SUCCESS;
287 #elif defined(CONFIG_NET_CLS_POLICE)
289 case TC_POLICE_RECLASSIFY:
290 return cbq_reclassify(skb, cl);
301 * Step 3+n. If classifier selected a link sharing class,
302 * apply agency specific classifier.
303 * Repeat this procdure until we hit a leaf node.
312 * Step 4. No success...
314 if (TC_H_MAJ(prio) == 0 &&
315 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
316 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
323 A packet has just been enqueued on the empty class.
324 cbq_activate_class adds it to the tail of active class list
325 of its priority band.
328 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
331 int prio = cl->cpriority;
332 struct cbq_class *cl_tail;
334 cl_tail = q->active[prio];
335 q->active[prio] = cl;
337 if (cl_tail != NULL) {
338 cl->next_alive = cl_tail->next_alive;
339 cl_tail->next_alive = cl;
342 q->activemask |= (1<<prio);
347 Unlink class from active chain.
348 Note that this same procedure is done directly in cbq_dequeue*
349 during round-robin procedure.
352 static void cbq_deactivate_class(struct cbq_class *this)
354 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
355 int prio = this->cpriority;
356 struct cbq_class *cl;
357 struct cbq_class *cl_prev = q->active[prio];
360 cl = cl_prev->next_alive;
362 cl_prev->next_alive = cl->next_alive;
363 cl->next_alive = NULL;
365 if (cl == q->active[prio]) {
366 q->active[prio] = cl_prev;
367 if (cl == q->active[prio]) {
368 q->active[prio] = NULL;
369 q->activemask &= ~(1<<prio);
375 } while ((cl_prev = cl) != q->active[prio]);
379 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381 int toplevel = q->toplevel;
383 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
387 PSCHED_GET_TIME(now);
388 incr = PSCHED_TDIFF(now, q->now_rt);
389 PSCHED_TADD2(q->now, incr, now);
392 if (PSCHED_TLESS(cl->undertime, now)) {
393 q->toplevel = cl->level;
396 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
401 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403 struct cbq_sched_data *q = qdisc_priv(sch);
406 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408 #ifdef CONFIG_NET_CLS_POLICE
412 if (ret == NET_XMIT_BYPASS)
418 #ifdef CONFIG_NET_CLS_POLICE
419 cl->q->__parent = sch;
421 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423 sch->bstats.packets++;
424 sch->bstats.bytes+=len;
425 cbq_mark_toplevel(q, cl);
427 cbq_activate_class(cl);
432 cbq_mark_toplevel(q, cl);
438 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440 struct cbq_sched_data *q = qdisc_priv(sch);
441 struct cbq_class *cl;
444 if ((cl = q->tx_class) == NULL) {
451 cbq_mark_toplevel(q, cl);
453 #ifdef CONFIG_NET_CLS_POLICE
455 cl->q->__parent = sch;
457 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459 sch->qstats.requeues++;
461 cbq_activate_class(cl);
469 /* Overlimit actions */
471 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473 static void cbq_ovl_classic(struct cbq_class *cl)
475 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
476 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
479 delay += cl->offtime;
482 Class goes to sleep, so that it will have no
483 chance to work avgidle. Let's forgive it 8)
485 BTW cbq-2.0 has a crap in this
486 place, apparently they forgot to shift it by cl->ewma_log.
489 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
490 if (cl->avgidle < cl->minidle)
491 cl->avgidle = cl->minidle;
494 PSCHED_TADD2(q->now, delay, cl->undertime);
496 cl->xstats.overactions++;
499 if (q->wd_expires == 0 || q->wd_expires > delay)
500 q->wd_expires = delay;
502 /* Dirty work! We must schedule wakeups based on
503 real available rate, rather than leaf rate,
504 which may be tiny (even zero).
506 if (q->toplevel == TC_CBQ_MAXLEVEL) {
508 psched_tdiff_t base_delay = q->wd_expires;
510 for (b = cl->borrow; b; b = b->borrow) {
511 delay = PSCHED_TDIFF(b->undertime, q->now);
512 if (delay < base_delay) {
519 q->wd_expires = base_delay;
523 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
527 static void cbq_ovl_rclassic(struct cbq_class *cl)
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530 struct cbq_class *this = cl;
533 if (cl->level > q->toplevel) {
537 } while ((cl = cl->borrow) != NULL);
544 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546 static void cbq_ovl_delay(struct cbq_class *cl)
548 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
549 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
552 unsigned long sched = jiffies;
554 delay += cl->offtime;
556 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
557 if (cl->avgidle < cl->minidle)
558 cl->avgidle = cl->minidle;
559 PSCHED_TADD2(q->now, delay, cl->undertime);
562 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
563 cl->penalized = sched;
564 cl->cpriority = TC_CBQ_MAXPRIO;
565 q->pmask |= (1<<TC_CBQ_MAXPRIO);
566 if (del_timer(&q->delay_timer) &&
567 (long)(q->delay_timer.expires - sched) > 0)
568 q->delay_timer.expires = sched;
569 add_timer(&q->delay_timer);
571 cl->xstats.overactions++;
576 if (q->wd_expires == 0 || q->wd_expires > delay)
577 q->wd_expires = delay;
580 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
582 static void cbq_ovl_lowprio(struct cbq_class *cl)
584 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
586 cl->penalized = jiffies + cl->penalty;
588 if (cl->cpriority != cl->priority2) {
589 cl->cpriority = cl->priority2;
590 q->pmask |= (1<<cl->cpriority);
591 cl->xstats.overactions++;
596 /* TC_CBQ_OVL_DROP: penalize class by dropping */
598 static void cbq_ovl_drop(struct cbq_class *cl)
600 if (cl->q->ops->drop)
601 if (cl->q->ops->drop(cl->q))
603 cl->xstats.overactions++;
607 static void cbq_watchdog(unsigned long arg)
609 struct Qdisc *sch = (struct Qdisc*)arg;
611 sch->flags &= ~TCQ_F_THROTTLED;
612 netif_schedule(sch->dev);
615 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
617 struct cbq_class *cl;
618 struct cbq_class *cl_prev = q->active[prio];
619 unsigned long now = jiffies;
620 unsigned long sched = now;
626 cl = cl_prev->next_alive;
627 if ((long)(now - cl->penalized) > 0) {
628 cl_prev->next_alive = cl->next_alive;
629 cl->next_alive = NULL;
630 cl->cpriority = cl->priority;
632 cbq_activate_class(cl);
634 if (cl == q->active[prio]) {
635 q->active[prio] = cl_prev;
636 if (cl == q->active[prio]) {
637 q->active[prio] = NULL;
642 cl = cl_prev->next_alive;
643 } else if ((long)(sched - cl->penalized) > 0)
644 sched = cl->penalized;
645 } while ((cl_prev = cl) != q->active[prio]);
647 return (long)(sched - now);
650 static void cbq_undelay(unsigned long arg)
652 struct Qdisc *sch = (struct Qdisc*)arg;
653 struct cbq_sched_data *q = qdisc_priv(sch);
661 int prio = ffz(~pmask);
666 tmp = cbq_undelay_prio(q, prio);
669 if (tmp < delay || delay == 0)
675 q->delay_timer.expires = jiffies + delay;
676 add_timer(&q->delay_timer);
679 sch->flags &= ~TCQ_F_THROTTLED;
680 netif_schedule(sch->dev);
684 #ifdef CONFIG_NET_CLS_POLICE
686 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
689 struct Qdisc *sch = child->__parent;
690 struct cbq_sched_data *q = qdisc_priv(sch);
691 struct cbq_class *cl = q->rx_class;
695 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
697 cbq_mark_toplevel(q, cl);
700 cl->q->__parent = sch;
702 if (cl->q->enqueue(skb, cl->q) == 0) {
704 sch->bstats.packets++;
705 sch->bstats.bytes+=len;
707 cbq_activate_class(cl);
720 It is mission critical procedure.
722 We "regenerate" toplevel cutoff, if transmitting class
723 has backlog and it is not regulated. It is not part of
724 original CBQ description, but looks more reasonable.
725 Probably, it is wrong. This question needs further investigation.
728 static __inline__ void
729 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
730 struct cbq_class *borrowed)
732 if (cl && q->toplevel >= borrowed->level) {
733 if (cl->q->q.qlen > 1) {
735 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
736 q->toplevel = borrowed->level;
739 } while ((borrowed=borrowed->borrow) != NULL);
742 /* It is not necessary now. Uncommenting it
743 will save CPU cycles, but decrease fairness.
745 q->toplevel = TC_CBQ_MAXLEVEL;
751 cbq_update(struct cbq_sched_data *q)
753 struct cbq_class *this = q->tx_class;
754 struct cbq_class *cl = this;
759 for ( ; cl; cl = cl->share) {
760 long avgidle = cl->avgidle;
763 cl->bstats.packets++;
764 cl->bstats.bytes += len;
767 (now - last) is total time between packet right edges.
768 (last_pktlen/rate) is "virtual" busy time, so that
770 idle = (now - last) - last_pktlen/rate
773 idle = PSCHED_TDIFF(q->now, cl->last);
774 if ((unsigned long)idle > 128*1024*1024) {
775 avgidle = cl->maxidle;
777 idle -= L2T(cl, len);
779 /* true_avgidle := (1-W)*true_avgidle + W*idle,
780 where W=2^{-ewma_log}. But cl->avgidle is scaled:
781 cl->avgidle == true_avgidle/W,
784 avgidle += idle - (avgidle>>cl->ewma_log);
788 /* Overlimit or at-limit */
790 if (avgidle < cl->minidle)
791 avgidle = cl->minidle;
793 cl->avgidle = avgidle;
795 /* Calculate expected time, when this class
796 will be allowed to send.
798 (1-W)*true_avgidle + W*delay = 0, i.e.
799 idle = (1/W - 1)*(-true_avgidle)
801 idle = (1 - W)*(-cl->avgidle);
803 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
807 To maintain the rate allocated to the class,
808 we add to undertime virtual clock,
809 necessary to complete transmitted packet.
810 (len/phys_bandwidth has been already passed
811 to the moment of cbq_update)
814 idle -= L2T(&q->link, len);
815 idle += L2T(cl, len);
817 PSCHED_AUDIT_TDIFF(idle);
819 PSCHED_TADD2(q->now, idle, cl->undertime);
823 PSCHED_SET_PASTPERFECT(cl->undertime);
824 if (avgidle > cl->maxidle)
825 cl->avgidle = cl->maxidle;
827 cl->avgidle = avgidle;
832 cbq_update_toplevel(q, this, q->tx_borrowed);
835 static __inline__ struct cbq_class *
836 cbq_under_limit(struct cbq_class *cl)
838 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
839 struct cbq_class *this_cl = cl;
841 if (cl->tparent == NULL)
844 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
845 !PSCHED_TLESS(q->now, cl->undertime)) {
851 /* It is very suspicious place. Now overlimit
852 action is generated for not bounded classes
853 only if link is completely congested.
854 Though it is in agree with ancestor-only paradigm,
855 it looks very stupid. Particularly,
856 it means that this chunk of code will either
857 never be called or result in strong amplification
858 of burstiness. Dangerous, silly, and, however,
859 no another solution exists.
861 if ((cl = cl->borrow) == NULL) {
862 this_cl->qstats.overlimits++;
863 this_cl->overlimit(this_cl);
866 if (cl->level > q->toplevel)
868 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
869 PSCHED_TLESS(q->now, cl->undertime));
875 static __inline__ struct sk_buff *
876 cbq_dequeue_prio(struct Qdisc *sch, int prio)
878 struct cbq_sched_data *q = qdisc_priv(sch);
879 struct cbq_class *cl_tail, *cl_prev, *cl;
883 cl_tail = cl_prev = q->active[prio];
884 cl = cl_prev->next_alive;
891 struct cbq_class *borrow = cl;
894 (borrow = cbq_under_limit(cl)) == NULL)
897 if (cl->deficit <= 0) {
898 /* Class exhausted its allotment per
899 this round. Switch to the next one.
902 cl->deficit += cl->quantum;
906 skb = cl->q->dequeue(cl->q);
908 /* Class did not give us any skb :-(
909 It could occur even if cl->q->q.qlen != 0
910 f.e. if cl->q == "tbf"
915 cl->deficit -= skb->len;
917 q->tx_borrowed = borrow;
919 #ifndef CBQ_XSTATS_BORROWS_BYTES
920 borrow->xstats.borrows++;
921 cl->xstats.borrows++;
923 borrow->xstats.borrows += skb->len;
924 cl->xstats.borrows += skb->len;
927 q->tx_len = skb->len;
929 if (cl->deficit <= 0) {
930 q->active[prio] = cl;
932 cl->deficit += cl->quantum;
937 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
938 /* Class is empty or penalized.
939 Unlink it from active chain.
941 cl_prev->next_alive = cl->next_alive;
942 cl->next_alive = NULL;
944 /* Did cl_tail point to it? */
949 /* Was it the last class in this band? */
952 q->active[prio] = NULL;
953 q->activemask &= ~(1<<prio);
955 cbq_activate_class(cl);
959 q->active[prio] = cl_tail;
962 cbq_activate_class(cl);
970 } while (cl_prev != cl_tail);
973 q->active[prio] = cl_prev;
978 static __inline__ struct sk_buff *
979 cbq_dequeue_1(struct Qdisc *sch)
981 struct cbq_sched_data *q = qdisc_priv(sch);
985 activemask = q->activemask&0xFF;
987 int prio = ffz(~activemask);
988 activemask &= ~(1<<prio);
989 skb = cbq_dequeue_prio(sch, prio);
996 static struct sk_buff *
997 cbq_dequeue(struct Qdisc *sch)
1000 struct cbq_sched_data *q = qdisc_priv(sch);
1002 psched_tdiff_t incr;
1004 PSCHED_GET_TIME(now);
1005 incr = PSCHED_TDIFF(now, q->now_rt);
1008 psched_tdiff_t incr2;
1009 /* Time integrator. We calculate EOS time
1010 by adding expected packet transmission time.
1011 If real time is greater, we warp artificial clock,
1014 cbq_time = max(real_time, work);
1016 incr2 = L2T(&q->link, q->tx_len);
1017 PSCHED_TADD(q->now, incr2);
1019 if ((incr -= incr2) < 0)
1022 PSCHED_TADD(q->now, incr);
1028 skb = cbq_dequeue_1(sch);
1031 sch->flags &= ~TCQ_F_THROTTLED;
1035 /* All the classes are overlimit.
1039 1. Scheduler is empty.
1040 2. Toplevel cutoff inhibited borrowing.
1041 3. Root class is overlimit.
1043 Reset 2d and 3d conditions and retry.
1045 Note, that NS and cbq-2.0 are buggy, peeking
1046 an arbitrary class is appropriate for ancestor-only
1047 sharing, but not for toplevel algorithm.
1049 Our version is better, but slower, because it requires
1050 two passes, but it is unavoidable with top-level sharing.
1053 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1054 PSCHED_IS_PASTPERFECT(q->link.undertime))
1057 q->toplevel = TC_CBQ_MAXLEVEL;
1058 PSCHED_SET_PASTPERFECT(q->link.undertime);
1061 /* No packets in scheduler or nobody wants to give them to us :-(
1062 Sigh... start watchdog timer in the last case. */
1065 sch->qstats.overlimits++;
1066 if (q->wd_expires) {
1067 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1070 mod_timer(&q->wd_timer, jiffies + delay);
1071 sch->flags |= TCQ_F_THROTTLED;
1077 /* CBQ class maintanance routines */
1079 static void cbq_adjust_levels(struct cbq_class *this)
1086 struct cbq_class *cl;
1088 if ((cl = this->children) != NULL) {
1090 if (cl->level > level)
1092 } while ((cl = cl->sibling) != this->children);
1094 this->level = level+1;
1095 } while ((this = this->tparent) != NULL);
1098 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1100 struct cbq_class *cl;
1103 if (q->quanta[prio] == 0)
1106 for (h=0; h<16; h++) {
1107 for (cl = q->classes[h]; cl; cl = cl->next) {
1108 /* BUGGGG... Beware! This expression suffer of
1109 arithmetic overflows!
1111 if (cl->priority == prio) {
1112 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1115 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1116 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1117 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1123 static void cbq_sync_defmap(struct cbq_class *cl)
1125 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1126 struct cbq_class *split = cl->split;
1133 for (i=0; i<=TC_PRIO_MAX; i++) {
1134 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1135 split->defaults[i] = NULL;
1138 for (i=0; i<=TC_PRIO_MAX; i++) {
1139 int level = split->level;
1141 if (split->defaults[i])
1144 for (h=0; h<16; h++) {
1145 struct cbq_class *c;
1147 for (c = q->classes[h]; c; c = c->next) {
1148 if (c->split == split && c->level < level &&
1150 split->defaults[i] = c;
1158 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1160 struct cbq_class *split = NULL;
1163 if ((split = cl->split) == NULL)
1165 splitid = split->classid;
1168 if (split == NULL || split->classid != splitid) {
1169 for (split = cl->tparent; split; split = split->tparent)
1170 if (split->classid == splitid)
1177 if (cl->split != split) {
1179 cbq_sync_defmap(cl);
1181 cl->defmap = def&mask;
1183 cl->defmap = (cl->defmap&~mask)|(def&mask);
1185 cbq_sync_defmap(cl);
1188 static void cbq_unlink_class(struct cbq_class *this)
1190 struct cbq_class *cl, **clp;
1191 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1193 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1201 if (this->tparent) {
1210 } while ((cl = *clp) != this->sibling);
1212 if (this->tparent->children == this) {
1213 this->tparent->children = this->sibling;
1214 if (this->sibling == this)
1215 this->tparent->children = NULL;
1218 BUG_TRAP(this->sibling == this);
1222 static void cbq_link_class(struct cbq_class *this)
1224 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1225 unsigned h = cbq_hash(this->classid);
1226 struct cbq_class *parent = this->tparent;
1228 this->sibling = this;
1229 this->next = q->classes[h];
1230 q->classes[h] = this;
1235 if (parent->children == NULL) {
1236 parent->children = this;
1238 this->sibling = parent->children->sibling;
1239 parent->children->sibling = this;
1243 static unsigned int cbq_drop(struct Qdisc* sch)
1245 struct cbq_sched_data *q = qdisc_priv(sch);
1246 struct cbq_class *cl, *cl_head;
1250 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1251 if ((cl_head = q->active[prio]) == NULL)
1256 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1259 cbq_deactivate_class(cl);
1262 } while ((cl = cl->next_alive) != cl_head);
1268 cbq_reset(struct Qdisc* sch)
1270 struct cbq_sched_data *q = qdisc_priv(sch);
1271 struct cbq_class *cl;
1278 q->tx_borrowed = NULL;
1279 del_timer(&q->wd_timer);
1280 del_timer(&q->delay_timer);
1281 q->toplevel = TC_CBQ_MAXLEVEL;
1282 PSCHED_GET_TIME(q->now);
1285 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1286 q->active[prio] = NULL;
1288 for (h = 0; h < 16; h++) {
1289 for (cl = q->classes[h]; cl; cl = cl->next) {
1292 cl->next_alive = NULL;
1293 PSCHED_SET_PASTPERFECT(cl->undertime);
1294 cl->avgidle = cl->maxidle;
1295 cl->deficit = cl->quantum;
1296 cl->cpriority = cl->priority;
1303 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1305 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1306 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1307 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1309 if (lss->change&TCF_CBQ_LSS_EWMA)
1310 cl->ewma_log = lss->ewma_log;
1311 if (lss->change&TCF_CBQ_LSS_AVPKT)
1312 cl->avpkt = lss->avpkt;
1313 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1314 cl->minidle = -(long)lss->minidle;
1315 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1316 cl->maxidle = lss->maxidle;
1317 cl->avgidle = lss->maxidle;
1319 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1320 cl->offtime = lss->offtime;
1324 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1326 q->nclasses[cl->priority]--;
1327 q->quanta[cl->priority] -= cl->weight;
1328 cbq_normalize_quanta(q, cl->priority);
1331 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1333 q->nclasses[cl->priority]++;
1334 q->quanta[cl->priority] += cl->weight;
1335 cbq_normalize_quanta(q, cl->priority);
1338 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1340 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1343 cl->allot = wrr->allot;
1345 cl->weight = wrr->weight;
1346 if (wrr->priority) {
1347 cl->priority = wrr->priority-1;
1348 cl->cpriority = cl->priority;
1349 if (cl->priority >= cl->priority2)
1350 cl->priority2 = TC_CBQ_MAXPRIO-1;
1357 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1359 switch (ovl->strategy) {
1360 case TC_CBQ_OVL_CLASSIC:
1361 cl->overlimit = cbq_ovl_classic;
1363 case TC_CBQ_OVL_DELAY:
1364 cl->overlimit = cbq_ovl_delay;
1366 case TC_CBQ_OVL_LOWPRIO:
1367 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1368 ovl->priority2-1 <= cl->priority)
1370 cl->priority2 = ovl->priority2-1;
1371 cl->overlimit = cbq_ovl_lowprio;
1373 case TC_CBQ_OVL_DROP:
1374 cl->overlimit = cbq_ovl_drop;
1376 case TC_CBQ_OVL_RCLASSIC:
1377 cl->overlimit = cbq_ovl_rclassic;
1382 cl->penalty = (ovl->penalty*HZ)/1000;
1386 #ifdef CONFIG_NET_CLS_POLICE
1387 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1389 cl->police = p->police;
1391 if (cl->q->handle) {
1392 if (p->police == TC_POLICE_RECLASSIFY)
1393 cl->q->reshape_fail = cbq_reshape_fail;
1395 cl->q->reshape_fail = NULL;
1401 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1403 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1407 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1409 struct cbq_sched_data *q = qdisc_priv(sch);
1410 struct rtattr *tb[TCA_CBQ_MAX];
1411 struct tc_ratespec *r;
1413 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1414 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1415 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1418 if (tb[TCA_CBQ_LSSOPT-1] &&
1419 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1422 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1424 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1428 q->link.sibling = &q->link;
1429 q->link.classid = sch->handle;
1430 q->link.qdisc = sch;
1431 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1433 q->link.q = &noop_qdisc;
1435 q->link.priority = TC_CBQ_MAXPRIO-1;
1436 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1437 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1438 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1439 q->link.overlimit = cbq_ovl_classic;
1440 q->link.allot = psched_mtu(sch->dev);
1441 q->link.quantum = q->link.allot;
1442 q->link.weight = q->link.R_tab->rate.rate;
1444 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1445 q->link.avpkt = q->link.allot/2;
1446 q->link.minidle = -0x7FFFFFFF;
1447 q->link.stats_lock = &sch->dev->queue_lock;
1449 init_timer(&q->wd_timer);
1450 q->wd_timer.data = (unsigned long)sch;
1451 q->wd_timer.function = cbq_watchdog;
1452 init_timer(&q->delay_timer);
1453 q->delay_timer.data = (unsigned long)sch;
1454 q->delay_timer.function = cbq_undelay;
1455 q->toplevel = TC_CBQ_MAXLEVEL;
1456 PSCHED_GET_TIME(q->now);
1459 cbq_link_class(&q->link);
1461 if (tb[TCA_CBQ_LSSOPT-1])
1462 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1464 cbq_addprio(q, &q->link);
1468 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1470 unsigned char *b = skb->tail;
1472 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1476 skb_trim(skb, b - skb->data);
1480 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1482 unsigned char *b = skb->tail;
1483 struct tc_cbq_lssopt opt;
1486 if (cl->borrow == NULL)
1487 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1488 if (cl->share == NULL)
1489 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1490 opt.ewma_log = cl->ewma_log;
1491 opt.level = cl->level;
1492 opt.avpkt = cl->avpkt;
1493 opt.maxidle = cl->maxidle;
1494 opt.minidle = (u32)(-cl->minidle);
1495 opt.offtime = cl->offtime;
1497 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1501 skb_trim(skb, b - skb->data);
1505 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1507 unsigned char *b = skb->tail;
1508 struct tc_cbq_wrropt opt;
1511 opt.allot = cl->allot;
1512 opt.priority = cl->priority+1;
1513 opt.cpriority = cl->cpriority+1;
1514 opt.weight = cl->weight;
1515 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1519 skb_trim(skb, b - skb->data);
1523 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1525 unsigned char *b = skb->tail;
1526 struct tc_cbq_ovl opt;
1528 opt.strategy = cl->ovl_strategy;
1529 opt.priority2 = cl->priority2+1;
1531 opt.penalty = (cl->penalty*1000)/HZ;
1532 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1536 skb_trim(skb, b - skb->data);
1540 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1542 unsigned char *b = skb->tail;
1543 struct tc_cbq_fopt opt;
1545 if (cl->split || cl->defmap) {
1546 opt.split = cl->split ? cl->split->classid : 0;
1547 opt.defmap = cl->defmap;
1549 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1554 skb_trim(skb, b - skb->data);
1558 #ifdef CONFIG_NET_CLS_POLICE
1559 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1561 unsigned char *b = skb->tail;
1562 struct tc_cbq_police opt;
1565 opt.police = cl->police;
1568 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1573 skb_trim(skb, b - skb->data);
1578 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1580 if (cbq_dump_lss(skb, cl) < 0 ||
1581 cbq_dump_rate(skb, cl) < 0 ||
1582 cbq_dump_wrr(skb, cl) < 0 ||
1583 cbq_dump_ovl(skb, cl) < 0 ||
1584 #ifdef CONFIG_NET_CLS_POLICE
1585 cbq_dump_police(skb, cl) < 0 ||
1587 cbq_dump_fopt(skb, cl) < 0)
1592 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1594 struct cbq_sched_data *q = qdisc_priv(sch);
1595 unsigned char *b = skb->tail;
1598 rta = (struct rtattr*)b;
1599 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1600 if (cbq_dump_attr(skb, &q->link) < 0)
1601 goto rtattr_failure;
1602 rta->rta_len = skb->tail - b;
1606 skb_trim(skb, b - skb->data);
1611 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1613 struct cbq_sched_data *q = qdisc_priv(sch);
1615 q->link.xstats.avgidle = q->link.avgidle;
1616 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1620 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1621 struct sk_buff *skb, struct tcmsg *tcm)
1623 struct cbq_class *cl = (struct cbq_class*)arg;
1624 unsigned char *b = skb->tail;
1628 tcm->tcm_parent = cl->tparent->classid;
1630 tcm->tcm_parent = TC_H_ROOT;
1631 tcm->tcm_handle = cl->classid;
1632 tcm->tcm_info = cl->q->handle;
1634 rta = (struct rtattr*)b;
1635 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636 if (cbq_dump_attr(skb, cl) < 0)
1637 goto rtattr_failure;
1638 rta->rta_len = skb->tail - b;
1642 skb_trim(skb, b - skb->data);
1647 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1648 struct gnet_dump *d)
1650 struct cbq_sched_data *q = qdisc_priv(sch);
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1653 cl->qstats.qlen = cl->q->q.qlen;
1654 cl->xstats.avgidle = cl->avgidle;
1655 cl->xstats.undertime = 0;
1657 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1658 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1660 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1661 #ifdef CONFIG_NET_ESTIMATOR
1662 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1664 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1667 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1670 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1673 struct cbq_class *cl = (struct cbq_class*)arg;
1677 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1678 cl->classid)) == NULL)
1681 #ifdef CONFIG_NET_CLS_POLICE
1682 if (cl->police == TC_POLICE_RECLASSIFY)
1683 new->reshape_fail = cbq_reshape_fail;
1687 *old = xchg(&cl->q, new);
1688 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1690 sch_tree_unlock(sch);
1697 static struct Qdisc *
1698 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1700 struct cbq_class *cl = (struct cbq_class*)arg;
1702 return cl ? cl->q : NULL;
1705 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1707 struct cbq_class *cl = (struct cbq_class *)arg;
1709 if (cl->q->q.qlen == 0)
1710 cbq_deactivate_class(cl);
1713 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1715 struct cbq_sched_data *q = qdisc_priv(sch);
1716 struct cbq_class *cl = cbq_class_lookup(q, classid);
1720 return (unsigned long)cl;
1725 static void cbq_destroy_filters(struct cbq_class *cl)
1727 struct tcf_proto *tp;
1729 while ((tp = cl->filter_list) != NULL) {
1730 cl->filter_list = tp->next;
1735 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1737 struct cbq_sched_data *q = qdisc_priv(sch);
1739 BUG_TRAP(!cl->filters);
1741 cbq_destroy_filters(cl);
1742 qdisc_destroy(cl->q);
1743 qdisc_put_rtab(cl->R_tab);
1744 #ifdef CONFIG_NET_ESTIMATOR
1745 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1752 cbq_destroy(struct Qdisc* sch)
1754 struct cbq_sched_data *q = qdisc_priv(sch);
1755 struct cbq_class *cl;
1758 #ifdef CONFIG_NET_CLS_POLICE
1762 * Filters must be destroyed first because we don't destroy the
1763 * classes from root to leafs which means that filters can still
1764 * be bound to classes which have been destroyed already. --TGR '04
1766 for (h = 0; h < 16; h++)
1767 for (cl = q->classes[h]; cl; cl = cl->next)
1768 cbq_destroy_filters(cl);
1770 for (h = 0; h < 16; h++) {
1771 struct cbq_class *next;
1773 for (cl = q->classes[h]; cl; cl = next) {
1775 cbq_destroy_class(sch, cl);
1780 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1782 struct cbq_class *cl = (struct cbq_class*)arg;
1784 if (--cl->refcnt == 0) {
1785 #ifdef CONFIG_NET_CLS_POLICE
1786 struct cbq_sched_data *q = qdisc_priv(sch);
1788 spin_lock_bh(&sch->dev->queue_lock);
1789 if (q->rx_class == cl)
1791 spin_unlock_bh(&sch->dev->queue_lock);
1794 cbq_destroy_class(sch, cl);
1799 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1803 struct cbq_sched_data *q = qdisc_priv(sch);
1804 struct cbq_class *cl = (struct cbq_class*)*arg;
1805 struct rtattr *opt = tca[TCA_OPTIONS-1];
1806 struct rtattr *tb[TCA_CBQ_MAX];
1807 struct cbq_class *parent;
1808 struct qdisc_rate_table *rtab = NULL;
1810 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1813 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1814 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1817 if (tb[TCA_CBQ_FOPT-1] &&
1818 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1821 if (tb[TCA_CBQ_RATE-1] &&
1822 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1825 if (tb[TCA_CBQ_LSSOPT-1] &&
1826 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1829 if (tb[TCA_CBQ_WRROPT-1] &&
1830 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1833 #ifdef CONFIG_NET_CLS_POLICE
1834 if (tb[TCA_CBQ_POLICE-1] &&
1835 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1842 if (cl->tparent && cl->tparent->classid != parentid)
1844 if (!cl->tparent && parentid != TC_H_ROOT)
1848 if (tb[TCA_CBQ_RATE-1]) {
1849 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1854 /* Change class parameters */
1857 if (cl->next_alive != NULL)
1858 cbq_deactivate_class(cl);
1861 rtab = xchg(&cl->R_tab, rtab);
1862 qdisc_put_rtab(rtab);
1865 if (tb[TCA_CBQ_LSSOPT-1])
1866 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1868 if (tb[TCA_CBQ_WRROPT-1]) {
1870 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1873 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1874 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1876 #ifdef CONFIG_NET_CLS_POLICE
1877 if (tb[TCA_CBQ_POLICE-1])
1878 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1881 if (tb[TCA_CBQ_FOPT-1])
1882 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1885 cbq_activate_class(cl);
1887 sch_tree_unlock(sch);
1889 #ifdef CONFIG_NET_ESTIMATOR
1890 if (tca[TCA_RATE-1])
1891 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1892 cl->stats_lock, tca[TCA_RATE-1]);
1897 if (parentid == TC_H_ROOT)
1900 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1901 tb[TCA_CBQ_LSSOPT-1] == NULL)
1904 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1910 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1914 classid = TC_H_MAKE(sch->handle,0x8000);
1916 for (i=0; i<0x8000; i++) {
1917 if (++q->hgenerator >= 0x8000)
1919 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1925 classid = classid|q->hgenerator;
1930 parent = cbq_class_lookup(q, parentid);
1937 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1943 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1944 cl->q = &noop_qdisc;
1945 cl->classid = classid;
1946 cl->tparent = parent;
1948 cl->allot = parent->allot;
1949 cl->quantum = cl->allot;
1950 cl->weight = cl->R_tab->rate.rate;
1951 cl->stats_lock = &sch->dev->queue_lock;
1955 cl->borrow = cl->tparent;
1956 if (cl->tparent != &q->link)
1957 cl->share = cl->tparent;
1958 cbq_adjust_levels(parent);
1959 cl->minidle = -0x7FFFFFFF;
1960 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1961 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1962 if (cl->ewma_log==0)
1963 cl->ewma_log = q->link.ewma_log;
1965 cl->maxidle = q->link.maxidle;
1967 cl->avpkt = q->link.avpkt;
1968 cl->overlimit = cbq_ovl_classic;
1969 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1970 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1971 #ifdef CONFIG_NET_CLS_POLICE
1972 if (tb[TCA_CBQ_POLICE-1])
1973 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1975 if (tb[TCA_CBQ_FOPT-1])
1976 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1977 sch_tree_unlock(sch);
1979 #ifdef CONFIG_NET_ESTIMATOR
1980 if (tca[TCA_RATE-1])
1981 gen_new_estimator(&cl->bstats, &cl->rate_est,
1982 cl->stats_lock, tca[TCA_RATE-1]);
1985 *arg = (unsigned long)cl;
1989 qdisc_put_rtab(rtab);
1993 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1995 struct cbq_sched_data *q = qdisc_priv(sch);
1996 struct cbq_class *cl = (struct cbq_class*)arg;
1999 if (cl->filters || cl->children || cl == &q->link)
2004 qlen = cl->q->q.qlen;
2006 qdisc_tree_decrease_qlen(cl->q, qlen);
2009 cbq_deactivate_class(cl);
2011 if (q->tx_borrowed == cl)
2012 q->tx_borrowed = q->tx_class;
2013 if (q->tx_class == cl) {
2015 q->tx_borrowed = NULL;
2017 #ifdef CONFIG_NET_CLS_POLICE
2018 if (q->rx_class == cl)
2022 cbq_unlink_class(cl);
2023 cbq_adjust_levels(cl->tparent);
2025 cbq_sync_defmap(cl);
2028 sch_tree_unlock(sch);
2030 if (--cl->refcnt == 0)
2031 cbq_destroy_class(sch, cl);
2036 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2038 struct cbq_sched_data *q = qdisc_priv(sch);
2039 struct cbq_class *cl = (struct cbq_class *)arg;
2044 return &cl->filter_list;
2047 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2050 struct cbq_sched_data *q = qdisc_priv(sch);
2051 struct cbq_class *p = (struct cbq_class*)parent;
2052 struct cbq_class *cl = cbq_class_lookup(q, classid);
2055 if (p && p->level <= cl->level)
2058 return (unsigned long)cl;
2063 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2065 struct cbq_class *cl = (struct cbq_class*)arg;
2070 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2072 struct cbq_sched_data *q = qdisc_priv(sch);
2078 for (h = 0; h < 16; h++) {
2079 struct cbq_class *cl;
2081 for (cl = q->classes[h]; cl; cl = cl->next) {
2082 if (arg->count < arg->skip) {
2086 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2095 static struct Qdisc_class_ops cbq_class_ops = {
2098 .qlen_notify = cbq_qlen_notify,
2101 .change = cbq_change_class,
2102 .delete = cbq_delete,
2104 .tcf_chain = cbq_find_tcf,
2105 .bind_tcf = cbq_bind_filter,
2106 .unbind_tcf = cbq_unbind_filter,
2107 .dump = cbq_dump_class,
2108 .dump_stats = cbq_dump_class_stats,
2111 static struct Qdisc_ops cbq_qdisc_ops = {
2113 .cl_ops = &cbq_class_ops,
2115 .priv_size = sizeof(struct cbq_sched_data),
2116 .enqueue = cbq_enqueue,
2117 .dequeue = cbq_dequeue,
2118 .requeue = cbq_requeue,
2122 .destroy = cbq_destroy,
2125 .dump_stats = cbq_dump_stats,
2126 .owner = THIS_MODULE,
2129 static int __init cbq_module_init(void)
2131 return register_qdisc(&cbq_qdisc_ops);
2133 static void __exit cbq_module_exit(void)
2135 unregister_qdisc(&cbq_qdisc_ops);
2137 module_init(cbq_module_init)
2138 module_exit(cbq_module_exit)
2139 MODULE_LICENSE("GPL");