]> Git Repo - J-linux.git/blob - net/sched/sch_api.c
scsi: zfcp: Trace when request remove fails after qdio send fails
[J-linux.git] / net / sched / sch_api.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c  Packet scheduler API.
4  *
5  * Authors:     Alexey Kuznetsov, <[email protected]>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <[email protected]> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <[email protected]> :990222: kmod support
11  * Jamal Hadi Salim <[email protected]>: 990601: ingress support
12  */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34 #include <net/tc_wrapper.h>
35
36 #include <trace/events/qdisc.h>
37
38 /*
39
40    Short review.
41    -------------
42
43    This file consists of two interrelated parts:
44
45    1. queueing disciplines manager frontend.
46    2. traffic classes manager frontend.
47
48    Generally, queueing discipline ("qdisc") is a black box,
49    which is able to enqueue packets and to dequeue them (when
50    device is ready to send something) in order and at times
51    determined by algorithm hidden in it.
52
53    qdisc's are divided to two categories:
54    - "queues", which have no internal structure visible from outside.
55    - "schedulers", which split all the packets to "traffic classes",
56      using "packet classifiers" (look at cls_api.c)
57
58    In turn, classes may have child qdiscs (as rule, queues)
59    attached to them etc. etc. etc.
60
61    The goal of the routines in this file is to translate
62    information supplied by user in the form of handles
63    to more intelligible for kernel form, to make some sanity
64    checks and part of work, which is common to all qdiscs
65    and to provide rtnetlink notifications.
66
67    All real intelligent work is done inside qdisc modules.
68
69
70
71    Every discipline has two major routines: enqueue and dequeue.
72
73    ---dequeue
74
75    dequeue usually returns a skb to send. It is allowed to return NULL,
76    but it does not mean that queue is empty, it just means that
77    discipline does not want to send anything this time.
78    Queue is really empty if q->q.qlen == 0.
79    For complicated disciplines with multiple queues q->q is not
80    real packet queue, but however q->q.qlen must be valid.
81
82    ---enqueue
83
84    enqueue returns 0, if packet was enqueued successfully.
85    If packet (this one or another one) was dropped, it returns
86    not zero error code.
87    NET_XMIT_DROP        - this packet dropped
88      Expected action: do not backoff, but wait until queue will clear.
89    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
90      Expected action: backoff or ignore
91
92    Auxiliary routines:
93
94    ---peek
95
96    like dequeue but without removing a packet from the queue
97
98    ---reset
99
100    returns qdisc to initial state: purge all buffers, clear all
101    timers, counters (except for statistics) etc.
102
103    ---init
104
105    initializes newly created qdisc.
106
107    ---destroy
108
109    destroys resources allocated by init and during lifetime of qdisc.
110
111    ---change
112
113    changes qdisc parameters.
114  */
115
116 /* Protects list of registered TC modules. It is pure SMP lock. */
117 static DEFINE_RWLOCK(qdisc_mod_lock);
118
119
120 /************************************************
121  *      Queueing disciplines manipulation.      *
122  ************************************************/
123
124
125 /* The list of all installed queueing disciplines. */
126
127 static struct Qdisc_ops *qdisc_base;
128
129 /* Register/unregister queueing discipline */
130
131 int register_qdisc(struct Qdisc_ops *qops)
132 {
133         struct Qdisc_ops *q, **qp;
134         int rc = -EEXIST;
135
136         write_lock(&qdisc_mod_lock);
137         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
138                 if (!strcmp(qops->id, q->id))
139                         goto out;
140
141         if (qops->enqueue == NULL)
142                 qops->enqueue = noop_qdisc_ops.enqueue;
143         if (qops->peek == NULL) {
144                 if (qops->dequeue == NULL)
145                         qops->peek = noop_qdisc_ops.peek;
146                 else
147                         goto out_einval;
148         }
149         if (qops->dequeue == NULL)
150                 qops->dequeue = noop_qdisc_ops.dequeue;
151
152         if (qops->cl_ops) {
153                 const struct Qdisc_class_ops *cops = qops->cl_ops;
154
155                 if (!(cops->find && cops->walk && cops->leaf))
156                         goto out_einval;
157
158                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
159                         goto out_einval;
160         }
161
162         qops->next = NULL;
163         *qp = qops;
164         rc = 0;
165 out:
166         write_unlock(&qdisc_mod_lock);
167         return rc;
168
169 out_einval:
170         rc = -EINVAL;
171         goto out;
172 }
173 EXPORT_SYMBOL(register_qdisc);
174
175 void unregister_qdisc(struct Qdisc_ops *qops)
176 {
177         struct Qdisc_ops *q, **qp;
178         int err = -ENOENT;
179
180         write_lock(&qdisc_mod_lock);
181         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
182                 if (q == qops)
183                         break;
184         if (q) {
185                 *qp = q->next;
186                 q->next = NULL;
187                 err = 0;
188         }
189         write_unlock(&qdisc_mod_lock);
190
191         WARN(err, "unregister qdisc(%s) failed\n", qops->id);
192 }
193 EXPORT_SYMBOL(unregister_qdisc);
194
195 /* Get default qdisc if not otherwise specified */
196 void qdisc_get_default(char *name, size_t len)
197 {
198         read_lock(&qdisc_mod_lock);
199         strscpy(name, default_qdisc_ops->id, len);
200         read_unlock(&qdisc_mod_lock);
201 }
202
203 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204 {
205         struct Qdisc_ops *q = NULL;
206
207         for (q = qdisc_base; q; q = q->next) {
208                 if (!strcmp(name, q->id)) {
209                         if (!try_module_get(q->owner))
210                                 q = NULL;
211                         break;
212                 }
213         }
214
215         return q;
216 }
217
218 /* Set new default qdisc to use */
219 int qdisc_set_default(const char *name)
220 {
221         const struct Qdisc_ops *ops;
222
223         if (!capable(CAP_NET_ADMIN))
224                 return -EPERM;
225
226         write_lock(&qdisc_mod_lock);
227         ops = qdisc_lookup_default(name);
228         if (!ops) {
229                 /* Not found, drop lock and try to load module */
230                 write_unlock(&qdisc_mod_lock);
231                 request_module("sch_%s", name);
232                 write_lock(&qdisc_mod_lock);
233
234                 ops = qdisc_lookup_default(name);
235         }
236
237         if (ops) {
238                 /* Set new default */
239                 module_put(default_qdisc_ops->owner);
240                 default_qdisc_ops = ops;
241         }
242         write_unlock(&qdisc_mod_lock);
243
244         return ops ? 0 : -ENOENT;
245 }
246
247 #ifdef CONFIG_NET_SCH_DEFAULT
248 /* Set default value from kernel config */
249 static int __init sch_default_qdisc(void)
250 {
251         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252 }
253 late_initcall(sch_default_qdisc);
254 #endif
255
256 /* We know handle. Find qdisc among all qdisc's attached to device
257  * (root qdisc, all its children, children of children etc.)
258  * Note: caller either uses rtnl or rcu_read_lock()
259  */
260
261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
262 {
263         struct Qdisc *q;
264
265         if (!qdisc_dev(root))
266                 return (root->handle == handle ? root : NULL);
267
268         if (!(root->flags & TCQ_F_BUILTIN) &&
269             root->handle == handle)
270                 return root;
271
272         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
273                                    lockdep_rtnl_is_held()) {
274                 if (q->handle == handle)
275                         return q;
276         }
277         return NULL;
278 }
279
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283                 ASSERT_RTNL();
284                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285                 if (invisible)
286                         q->flags |= TCQ_F_INVISIBLE;
287         }
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294                 ASSERT_RTNL();
295                 hash_del_rcu(&q->hash);
296         }
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302         struct Qdisc *q;
303
304         if (!handle)
305                 return NULL;
306         q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
307         if (q)
308                 goto out;
309
310         if (dev_ingress_queue(dev))
311                 q = qdisc_match_from_root(
312                         dev_ingress_queue(dev)->qdisc_sleeping,
313                         handle);
314 out:
315         return q;
316 }
317
318 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
319 {
320         struct netdev_queue *nq;
321         struct Qdisc *q;
322
323         if (!handle)
324                 return NULL;
325         q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
326         if (q)
327                 goto out;
328
329         nq = dev_ingress_queue_rcu(dev);
330         if (nq)
331                 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
332 out:
333         return q;
334 }
335
336 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
337 {
338         unsigned long cl;
339         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
340
341         if (cops == NULL)
342                 return NULL;
343         cl = cops->find(p, classid);
344
345         if (cl == 0)
346                 return NULL;
347         return cops->leaf(p, cl);
348 }
349
350 /* Find queueing discipline by name */
351
352 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
353 {
354         struct Qdisc_ops *q = NULL;
355
356         if (kind) {
357                 read_lock(&qdisc_mod_lock);
358                 for (q = qdisc_base; q; q = q->next) {
359                         if (nla_strcmp(kind, q->id) == 0) {
360                                 if (!try_module_get(q->owner))
361                                         q = NULL;
362                                 break;
363                         }
364                 }
365                 read_unlock(&qdisc_mod_lock);
366         }
367         return q;
368 }
369
370 /* The linklayer setting were not transferred from iproute2, in older
371  * versions, and the rate tables lookup systems have been dropped in
372  * the kernel. To keep backward compatible with older iproute2 tc
373  * utils, we detect the linklayer setting by detecting if the rate
374  * table were modified.
375  *
376  * For linklayer ATM table entries, the rate table will be aligned to
377  * 48 bytes, thus some table entries will contain the same value.  The
378  * mpu (min packet unit) is also encoded into the old rate table, thus
379  * starting from the mpu, we find low and high table entries for
380  * mapping this cell.  If these entries contain the same value, when
381  * the rate tables have been modified for linklayer ATM.
382  *
383  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
384  * and then roundup to the next cell, calc the table entry one below,
385  * and compare.
386  */
387 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
388 {
389         int low       = roundup(r->mpu, 48);
390         int high      = roundup(low+1, 48);
391         int cell_low  = low >> r->cell_log;
392         int cell_high = (high >> r->cell_log) - 1;
393
394         /* rtab is too inaccurate at rates > 100Mbit/s */
395         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
396                 pr_debug("TC linklayer: Giving up ATM detection\n");
397                 return TC_LINKLAYER_ETHERNET;
398         }
399
400         if ((cell_high > cell_low) && (cell_high < 256)
401             && (rtab[cell_low] == rtab[cell_high])) {
402                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
403                          cell_low, cell_high, rtab[cell_high]);
404                 return TC_LINKLAYER_ATM;
405         }
406         return TC_LINKLAYER_ETHERNET;
407 }
408
409 static struct qdisc_rate_table *qdisc_rtab_list;
410
411 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
412                                         struct nlattr *tab,
413                                         struct netlink_ext_ack *extack)
414 {
415         struct qdisc_rate_table *rtab;
416
417         if (tab == NULL || r->rate == 0 ||
418             r->cell_log == 0 || r->cell_log >= 32 ||
419             nla_len(tab) != TC_RTAB_SIZE) {
420                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
421                 return NULL;
422         }
423
424         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
425                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
426                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
427                         rtab->refcnt++;
428                         return rtab;
429                 }
430         }
431
432         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
433         if (rtab) {
434                 rtab->rate = *r;
435                 rtab->refcnt = 1;
436                 memcpy(rtab->data, nla_data(tab), 1024);
437                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
438                         r->linklayer = __detect_linklayer(r, rtab->data);
439                 rtab->next = qdisc_rtab_list;
440                 qdisc_rtab_list = rtab;
441         } else {
442                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
443         }
444         return rtab;
445 }
446 EXPORT_SYMBOL(qdisc_get_rtab);
447
448 void qdisc_put_rtab(struct qdisc_rate_table *tab)
449 {
450         struct qdisc_rate_table *rtab, **rtabp;
451
452         if (!tab || --tab->refcnt)
453                 return;
454
455         for (rtabp = &qdisc_rtab_list;
456              (rtab = *rtabp) != NULL;
457              rtabp = &rtab->next) {
458                 if (rtab == tab) {
459                         *rtabp = rtab->next;
460                         kfree(rtab);
461                         return;
462                 }
463         }
464 }
465 EXPORT_SYMBOL(qdisc_put_rtab);
466
467 static LIST_HEAD(qdisc_stab_list);
468
469 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
470         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
471         [TCA_STAB_DATA] = { .type = NLA_BINARY },
472 };
473
474 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
475                                                struct netlink_ext_ack *extack)
476 {
477         struct nlattr *tb[TCA_STAB_MAX + 1];
478         struct qdisc_size_table *stab;
479         struct tc_sizespec *s;
480         unsigned int tsize = 0;
481         u16 *tab = NULL;
482         int err;
483
484         err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
485                                           extack);
486         if (err < 0)
487                 return ERR_PTR(err);
488         if (!tb[TCA_STAB_BASE]) {
489                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
490                 return ERR_PTR(-EINVAL);
491         }
492
493         s = nla_data(tb[TCA_STAB_BASE]);
494
495         if (s->tsize > 0) {
496                 if (!tb[TCA_STAB_DATA]) {
497                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
498                         return ERR_PTR(-EINVAL);
499                 }
500                 tab = nla_data(tb[TCA_STAB_DATA]);
501                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
502         }
503
504         if (tsize != s->tsize || (!tab && tsize > 0)) {
505                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
506                 return ERR_PTR(-EINVAL);
507         }
508
509         list_for_each_entry(stab, &qdisc_stab_list, list) {
510                 if (memcmp(&stab->szopts, s, sizeof(*s)))
511                         continue;
512                 if (tsize > 0 &&
513                     memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
514                         continue;
515                 stab->refcnt++;
516                 return stab;
517         }
518
519         if (s->size_log > STAB_SIZE_LOG_MAX ||
520             s->cell_log > STAB_SIZE_LOG_MAX) {
521                 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
522                 return ERR_PTR(-EINVAL);
523         }
524
525         stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
526         if (!stab)
527                 return ERR_PTR(-ENOMEM);
528
529         stab->refcnt = 1;
530         stab->szopts = *s;
531         if (tsize > 0)
532                 memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
533
534         list_add_tail(&stab->list, &qdisc_stab_list);
535
536         return stab;
537 }
538
539 void qdisc_put_stab(struct qdisc_size_table *tab)
540 {
541         if (!tab)
542                 return;
543
544         if (--tab->refcnt == 0) {
545                 list_del(&tab->list);
546                 kfree_rcu(tab, rcu);
547         }
548 }
549 EXPORT_SYMBOL(qdisc_put_stab);
550
551 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
552 {
553         struct nlattr *nest;
554
555         nest = nla_nest_start_noflag(skb, TCA_STAB);
556         if (nest == NULL)
557                 goto nla_put_failure;
558         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
559                 goto nla_put_failure;
560         nla_nest_end(skb, nest);
561
562         return skb->len;
563
564 nla_put_failure:
565         return -1;
566 }
567
568 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
569                                const struct qdisc_size_table *stab)
570 {
571         int pkt_len, slot;
572
573         pkt_len = skb->len + stab->szopts.overhead;
574         if (unlikely(!stab->szopts.tsize))
575                 goto out;
576
577         slot = pkt_len + stab->szopts.cell_align;
578         if (unlikely(slot < 0))
579                 slot = 0;
580
581         slot >>= stab->szopts.cell_log;
582         if (likely(slot < stab->szopts.tsize))
583                 pkt_len = stab->data[slot];
584         else
585                 pkt_len = stab->data[stab->szopts.tsize - 1] *
586                                 (slot / stab->szopts.tsize) +
587                                 stab->data[slot % stab->szopts.tsize];
588
589         pkt_len <<= stab->szopts.size_log;
590 out:
591         if (unlikely(pkt_len < 1))
592                 pkt_len = 1;
593         qdisc_skb_cb(skb)->pkt_len = pkt_len;
594 }
595 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
596
597 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
598 {
599         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
600                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
601                         txt, qdisc->ops->id, qdisc->handle >> 16);
602                 qdisc->flags |= TCQ_F_WARN_NONWC;
603         }
604 }
605 EXPORT_SYMBOL(qdisc_warn_nonwc);
606
607 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
608 {
609         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
610                                                  timer);
611
612         rcu_read_lock();
613         __netif_schedule(qdisc_root(wd->qdisc));
614         rcu_read_unlock();
615
616         return HRTIMER_NORESTART;
617 }
618
619 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
620                                  clockid_t clockid)
621 {
622         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
623         wd->timer.function = qdisc_watchdog;
624         wd->qdisc = qdisc;
625 }
626 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
627
628 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
629 {
630         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
631 }
632 EXPORT_SYMBOL(qdisc_watchdog_init);
633
634 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
635                                       u64 delta_ns)
636 {
637         if (test_bit(__QDISC_STATE_DEACTIVATED,
638                      &qdisc_root_sleeping(wd->qdisc)->state))
639                 return;
640
641         if (hrtimer_is_queued(&wd->timer)) {
642                 /* If timer is already set in [expires, expires + delta_ns],
643                  * do not reprogram it.
644                  */
645                 if (wd->last_expires - expires <= delta_ns)
646                         return;
647         }
648
649         wd->last_expires = expires;
650         hrtimer_start_range_ns(&wd->timer,
651                                ns_to_ktime(expires),
652                                delta_ns,
653                                HRTIMER_MODE_ABS_PINNED);
654 }
655 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
656
657 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
658 {
659         hrtimer_cancel(&wd->timer);
660 }
661 EXPORT_SYMBOL(qdisc_watchdog_cancel);
662
663 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
664 {
665         struct hlist_head *h;
666         unsigned int i;
667
668         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
669
670         if (h != NULL) {
671                 for (i = 0; i < n; i++)
672                         INIT_HLIST_HEAD(&h[i]);
673         }
674         return h;
675 }
676
677 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
678 {
679         struct Qdisc_class_common *cl;
680         struct hlist_node *next;
681         struct hlist_head *nhash, *ohash;
682         unsigned int nsize, nmask, osize;
683         unsigned int i, h;
684
685         /* Rehash when load factor exceeds 0.75 */
686         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
687                 return;
688         nsize = clhash->hashsize * 2;
689         nmask = nsize - 1;
690         nhash = qdisc_class_hash_alloc(nsize);
691         if (nhash == NULL)
692                 return;
693
694         ohash = clhash->hash;
695         osize = clhash->hashsize;
696
697         sch_tree_lock(sch);
698         for (i = 0; i < osize; i++) {
699                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
700                         h = qdisc_class_hash(cl->classid, nmask);
701                         hlist_add_head(&cl->hnode, &nhash[h]);
702                 }
703         }
704         clhash->hash     = nhash;
705         clhash->hashsize = nsize;
706         clhash->hashmask = nmask;
707         sch_tree_unlock(sch);
708
709         kvfree(ohash);
710 }
711 EXPORT_SYMBOL(qdisc_class_hash_grow);
712
713 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
714 {
715         unsigned int size = 4;
716
717         clhash->hash = qdisc_class_hash_alloc(size);
718         if (!clhash->hash)
719                 return -ENOMEM;
720         clhash->hashsize  = size;
721         clhash->hashmask  = size - 1;
722         clhash->hashelems = 0;
723         return 0;
724 }
725 EXPORT_SYMBOL(qdisc_class_hash_init);
726
727 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
728 {
729         kvfree(clhash->hash);
730 }
731 EXPORT_SYMBOL(qdisc_class_hash_destroy);
732
733 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
734                              struct Qdisc_class_common *cl)
735 {
736         unsigned int h;
737
738         INIT_HLIST_NODE(&cl->hnode);
739         h = qdisc_class_hash(cl->classid, clhash->hashmask);
740         hlist_add_head(&cl->hnode, &clhash->hash[h]);
741         clhash->hashelems++;
742 }
743 EXPORT_SYMBOL(qdisc_class_hash_insert);
744
745 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
746                              struct Qdisc_class_common *cl)
747 {
748         hlist_del(&cl->hnode);
749         clhash->hashelems--;
750 }
751 EXPORT_SYMBOL(qdisc_class_hash_remove);
752
753 /* Allocate an unique handle from space managed by kernel
754  * Possible range is [8000-FFFF]:0000 (0x8000 values)
755  */
756 static u32 qdisc_alloc_handle(struct net_device *dev)
757 {
758         int i = 0x8000;
759         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
760
761         do {
762                 autohandle += TC_H_MAKE(0x10000U, 0);
763                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
764                         autohandle = TC_H_MAKE(0x80000000U, 0);
765                 if (!qdisc_lookup(dev, autohandle))
766                         return autohandle;
767                 cond_resched();
768         } while (--i > 0);
769
770         return 0;
771 }
772
773 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
774 {
775         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
776         const struct Qdisc_class_ops *cops;
777         unsigned long cl;
778         u32 parentid;
779         bool notify;
780         int drops;
781
782         if (n == 0 && len == 0)
783                 return;
784         drops = max_t(int, n, 0);
785         rcu_read_lock();
786         while ((parentid = sch->parent)) {
787                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
788                         break;
789
790                 if (sch->flags & TCQ_F_NOPARENT)
791                         break;
792                 /* Notify parent qdisc only if child qdisc becomes empty.
793                  *
794                  * If child was empty even before update then backlog
795                  * counter is screwed and we skip notification because
796                  * parent class is already passive.
797                  *
798                  * If the original child was offloaded then it is allowed
799                  * to be seem as empty, so the parent is notified anyway.
800                  */
801                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
802                                                        !qdisc_is_offloaded);
803                 /* TODO: perform the search on a per txq basis */
804                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
805                 if (sch == NULL) {
806                         WARN_ON_ONCE(parentid != TC_H_ROOT);
807                         break;
808                 }
809                 cops = sch->ops->cl_ops;
810                 if (notify && cops->qlen_notify) {
811                         cl = cops->find(sch, parentid);
812                         cops->qlen_notify(sch, cl);
813                 }
814                 sch->q.qlen -= n;
815                 sch->qstats.backlog -= len;
816                 __qdisc_qstats_drop(sch, drops);
817         }
818         rcu_read_unlock();
819 }
820 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
821
822 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
823                               void *type_data)
824 {
825         struct net_device *dev = qdisc_dev(sch);
826         int err;
827
828         sch->flags &= ~TCQ_F_OFFLOADED;
829         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
830                 return 0;
831
832         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
833         if (err == -EOPNOTSUPP)
834                 return 0;
835
836         if (!err)
837                 sch->flags |= TCQ_F_OFFLOADED;
838
839         return err;
840 }
841 EXPORT_SYMBOL(qdisc_offload_dump_helper);
842
843 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
844                                 struct Qdisc *new, struct Qdisc *old,
845                                 enum tc_setup_type type, void *type_data,
846                                 struct netlink_ext_ack *extack)
847 {
848         bool any_qdisc_is_offloaded;
849         int err;
850
851         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
852                 return;
853
854         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
855
856         /* Don't report error if the graft is part of destroy operation. */
857         if (!err || !new || new == &noop_qdisc)
858                 return;
859
860         /* Don't report error if the parent, the old child and the new
861          * one are not offloaded.
862          */
863         any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
864         any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
865         any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
866
867         if (any_qdisc_is_offloaded)
868                 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
869 }
870 EXPORT_SYMBOL(qdisc_offload_graft_helper);
871
872 void qdisc_offload_query_caps(struct net_device *dev,
873                               enum tc_setup_type type,
874                               void *caps, size_t caps_len)
875 {
876         const struct net_device_ops *ops = dev->netdev_ops;
877         struct tc_query_caps_base base = {
878                 .type = type,
879                 .caps = caps,
880         };
881
882         memset(caps, 0, caps_len);
883
884         if (ops->ndo_setup_tc)
885                 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
886 }
887 EXPORT_SYMBOL(qdisc_offload_query_caps);
888
889 static void qdisc_offload_graft_root(struct net_device *dev,
890                                      struct Qdisc *new, struct Qdisc *old,
891                                      struct netlink_ext_ack *extack)
892 {
893         struct tc_root_qopt_offload graft_offload = {
894                 .command        = TC_ROOT_GRAFT,
895                 .handle         = new ? new->handle : 0,
896                 .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
897                                   (old && old->flags & TCQ_F_INGRESS),
898         };
899
900         qdisc_offload_graft_helper(dev, NULL, new, old,
901                                    TC_SETUP_ROOT_QDISC, &graft_offload, extack);
902 }
903
904 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
905                          u32 portid, u32 seq, u16 flags, int event)
906 {
907         struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
908         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
909         struct tcmsg *tcm;
910         struct nlmsghdr  *nlh;
911         unsigned char *b = skb_tail_pointer(skb);
912         struct gnet_dump d;
913         struct qdisc_size_table *stab;
914         u32 block_index;
915         __u32 qlen;
916
917         cond_resched();
918         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
919         if (!nlh)
920                 goto out_nlmsg_trim;
921         tcm = nlmsg_data(nlh);
922         tcm->tcm_family = AF_UNSPEC;
923         tcm->tcm__pad1 = 0;
924         tcm->tcm__pad2 = 0;
925         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
926         tcm->tcm_parent = clid;
927         tcm->tcm_handle = q->handle;
928         tcm->tcm_info = refcount_read(&q->refcnt);
929         if (nla_put_string(skb, TCA_KIND, q->ops->id))
930                 goto nla_put_failure;
931         if (q->ops->ingress_block_get) {
932                 block_index = q->ops->ingress_block_get(q);
933                 if (block_index &&
934                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
935                         goto nla_put_failure;
936         }
937         if (q->ops->egress_block_get) {
938                 block_index = q->ops->egress_block_get(q);
939                 if (block_index &&
940                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
941                         goto nla_put_failure;
942         }
943         if (q->ops->dump && q->ops->dump(q, skb) < 0)
944                 goto nla_put_failure;
945         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
946                 goto nla_put_failure;
947         qlen = qdisc_qlen_sum(q);
948
949         stab = rtnl_dereference(q->stab);
950         if (stab && qdisc_dump_stab(skb, stab) < 0)
951                 goto nla_put_failure;
952
953         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
954                                          NULL, &d, TCA_PAD) < 0)
955                 goto nla_put_failure;
956
957         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
958                 goto nla_put_failure;
959
960         if (qdisc_is_percpu_stats(q)) {
961                 cpu_bstats = q->cpu_bstats;
962                 cpu_qstats = q->cpu_qstats;
963         }
964
965         if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
966             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
967             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
968                 goto nla_put_failure;
969
970         if (gnet_stats_finish_copy(&d) < 0)
971                 goto nla_put_failure;
972
973         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
974         return skb->len;
975
976 out_nlmsg_trim:
977 nla_put_failure:
978         nlmsg_trim(skb, b);
979         return -1;
980 }
981
982 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
983 {
984         if (q->flags & TCQ_F_BUILTIN)
985                 return true;
986         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
987                 return true;
988
989         return false;
990 }
991
992 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
993                         struct nlmsghdr *n, u32 clid,
994                         struct Qdisc *old, struct Qdisc *new)
995 {
996         struct sk_buff *skb;
997         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
998
999         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1000         if (!skb)
1001                 return -ENOBUFS;
1002
1003         if (old && !tc_qdisc_dump_ignore(old, false)) {
1004                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1005                                   0, RTM_DELQDISC) < 0)
1006                         goto err_out;
1007         }
1008         if (new && !tc_qdisc_dump_ignore(new, false)) {
1009                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1010                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1011                         goto err_out;
1012         }
1013
1014         if (skb->len)
1015                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1016                                       n->nlmsg_flags & NLM_F_ECHO);
1017
1018 err_out:
1019         kfree_skb(skb);
1020         return -EINVAL;
1021 }
1022
1023 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1024                                struct nlmsghdr *n, u32 clid,
1025                                struct Qdisc *old, struct Qdisc *new)
1026 {
1027         if (new || old)
1028                 qdisc_notify(net, skb, n, clid, old, new);
1029
1030         if (old)
1031                 qdisc_put(old);
1032 }
1033
1034 static void qdisc_clear_nolock(struct Qdisc *sch)
1035 {
1036         sch->flags &= ~TCQ_F_NOLOCK;
1037         if (!(sch->flags & TCQ_F_CPUSTATS))
1038                 return;
1039
1040         free_percpu(sch->cpu_bstats);
1041         free_percpu(sch->cpu_qstats);
1042         sch->cpu_bstats = NULL;
1043         sch->cpu_qstats = NULL;
1044         sch->flags &= ~TCQ_F_CPUSTATS;
1045 }
1046
1047 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1048  * to device "dev".
1049  *
1050  * When appropriate send a netlink notification using 'skb'
1051  * and "n".
1052  *
1053  * On success, destroy old qdisc.
1054  */
1055
1056 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1057                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1058                        struct Qdisc *new, struct Qdisc *old,
1059                        struct netlink_ext_ack *extack)
1060 {
1061         struct Qdisc *q = old;
1062         struct net *net = dev_net(dev);
1063
1064         if (parent == NULL) {
1065                 unsigned int i, num_q, ingress;
1066
1067                 ingress = 0;
1068                 num_q = dev->num_tx_queues;
1069                 if ((q && q->flags & TCQ_F_INGRESS) ||
1070                     (new && new->flags & TCQ_F_INGRESS)) {
1071                         num_q = 1;
1072                         ingress = 1;
1073                         if (!dev_ingress_queue(dev)) {
1074                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1075                                 return -ENOENT;
1076                         }
1077                 }
1078
1079                 if (dev->flags & IFF_UP)
1080                         dev_deactivate(dev);
1081
1082                 qdisc_offload_graft_root(dev, new, old, extack);
1083
1084                 if (new && new->ops->attach && !ingress)
1085                         goto skip;
1086
1087                 for (i = 0; i < num_q; i++) {
1088                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1089
1090                         if (!ingress)
1091                                 dev_queue = netdev_get_tx_queue(dev, i);
1092
1093                         old = dev_graft_qdisc(dev_queue, new);
1094                         if (new && i > 0)
1095                                 qdisc_refcount_inc(new);
1096
1097                         if (!ingress)
1098                                 qdisc_put(old);
1099                 }
1100
1101 skip:
1102                 if (!ingress) {
1103                         old = rtnl_dereference(dev->qdisc);
1104                         if (new && !new->ops->attach)
1105                                 qdisc_refcount_inc(new);
1106                         rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1107
1108                         notify_and_destroy(net, skb, n, classid, old, new);
1109
1110                         if (new && new->ops->attach)
1111                                 new->ops->attach(new);
1112                 } else {
1113                         notify_and_destroy(net, skb, n, classid, old, new);
1114                 }
1115
1116                 if (dev->flags & IFF_UP)
1117                         dev_activate(dev);
1118         } else {
1119                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1120                 unsigned long cl;
1121                 int err;
1122
1123                 /* Only support running class lockless if parent is lockless */
1124                 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1125                         qdisc_clear_nolock(new);
1126
1127                 if (!cops || !cops->graft)
1128                         return -EOPNOTSUPP;
1129
1130                 cl = cops->find(parent, classid);
1131                 if (!cl) {
1132                         NL_SET_ERR_MSG(extack, "Specified class not found");
1133                         return -ENOENT;
1134                 }
1135
1136                 err = cops->graft(parent, cl, new, &old, extack);
1137                 if (err)
1138                         return err;
1139                 notify_and_destroy(net, skb, n, classid, old, new);
1140         }
1141         return 0;
1142 }
1143
1144 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1145                                    struct netlink_ext_ack *extack)
1146 {
1147         u32 block_index;
1148
1149         if (tca[TCA_INGRESS_BLOCK]) {
1150                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1151
1152                 if (!block_index) {
1153                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1154                         return -EINVAL;
1155                 }
1156                 if (!sch->ops->ingress_block_set) {
1157                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1158                         return -EOPNOTSUPP;
1159                 }
1160                 sch->ops->ingress_block_set(sch, block_index);
1161         }
1162         if (tca[TCA_EGRESS_BLOCK]) {
1163                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1164
1165                 if (!block_index) {
1166                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1167                         return -EINVAL;
1168                 }
1169                 if (!sch->ops->egress_block_set) {
1170                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1171                         return -EOPNOTSUPP;
1172                 }
1173                 sch->ops->egress_block_set(sch, block_index);
1174         }
1175         return 0;
1176 }
1177
1178 /*
1179    Allocate and initialize new qdisc.
1180
1181    Parameters are passed via opt.
1182  */
1183
1184 static struct Qdisc *qdisc_create(struct net_device *dev,
1185                                   struct netdev_queue *dev_queue,
1186                                   u32 parent, u32 handle,
1187                                   struct nlattr **tca, int *errp,
1188                                   struct netlink_ext_ack *extack)
1189 {
1190         int err;
1191         struct nlattr *kind = tca[TCA_KIND];
1192         struct Qdisc *sch;
1193         struct Qdisc_ops *ops;
1194         struct qdisc_size_table *stab;
1195
1196         ops = qdisc_lookup_ops(kind);
1197 #ifdef CONFIG_MODULES
1198         if (ops == NULL && kind != NULL) {
1199                 char name[IFNAMSIZ];
1200                 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1201                         /* We dropped the RTNL semaphore in order to
1202                          * perform the module load.  So, even if we
1203                          * succeeded in loading the module we have to
1204                          * tell the caller to replay the request.  We
1205                          * indicate this using -EAGAIN.
1206                          * We replay the request because the device may
1207                          * go away in the mean time.
1208                          */
1209                         rtnl_unlock();
1210                         request_module("sch_%s", name);
1211                         rtnl_lock();
1212                         ops = qdisc_lookup_ops(kind);
1213                         if (ops != NULL) {
1214                                 /* We will try again qdisc_lookup_ops,
1215                                  * so don't keep a reference.
1216                                  */
1217                                 module_put(ops->owner);
1218                                 err = -EAGAIN;
1219                                 goto err_out;
1220                         }
1221                 }
1222         }
1223 #endif
1224
1225         err = -ENOENT;
1226         if (!ops) {
1227                 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1228                 goto err_out;
1229         }
1230
1231         sch = qdisc_alloc(dev_queue, ops, extack);
1232         if (IS_ERR(sch)) {
1233                 err = PTR_ERR(sch);
1234                 goto err_out2;
1235         }
1236
1237         sch->parent = parent;
1238
1239         if (handle == TC_H_INGRESS) {
1240                 sch->flags |= TCQ_F_INGRESS;
1241                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1242         } else {
1243                 if (handle == 0) {
1244                         handle = qdisc_alloc_handle(dev);
1245                         if (handle == 0) {
1246                                 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1247                                 err = -ENOSPC;
1248                                 goto err_out3;
1249                         }
1250                 }
1251                 if (!netif_is_multiqueue(dev))
1252                         sch->flags |= TCQ_F_ONETXQUEUE;
1253         }
1254
1255         sch->handle = handle;
1256
1257         /* This exist to keep backward compatible with a userspace
1258          * loophole, what allowed userspace to get IFF_NO_QUEUE
1259          * facility on older kernels by setting tx_queue_len=0 (prior
1260          * to qdisc init), and then forgot to reinit tx_queue_len
1261          * before again attaching a qdisc.
1262          */
1263         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1264                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1265                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1266         }
1267
1268         err = qdisc_block_indexes_set(sch, tca, extack);
1269         if (err)
1270                 goto err_out3;
1271
1272         if (ops->init) {
1273                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1274                 if (err != 0)
1275                         goto err_out5;
1276         }
1277
1278         if (tca[TCA_STAB]) {
1279                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1280                 if (IS_ERR(stab)) {
1281                         err = PTR_ERR(stab);
1282                         goto err_out4;
1283                 }
1284                 rcu_assign_pointer(sch->stab, stab);
1285         }
1286         if (tca[TCA_RATE]) {
1287                 err = -EOPNOTSUPP;
1288                 if (sch->flags & TCQ_F_MQROOT) {
1289                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1290                         goto err_out4;
1291                 }
1292
1293                 err = gen_new_estimator(&sch->bstats,
1294                                         sch->cpu_bstats,
1295                                         &sch->rate_est,
1296                                         NULL,
1297                                         true,
1298                                         tca[TCA_RATE]);
1299                 if (err) {
1300                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1301                         goto err_out4;
1302                 }
1303         }
1304
1305         qdisc_hash_add(sch, false);
1306         trace_qdisc_create(ops, dev, parent);
1307
1308         return sch;
1309
1310 err_out5:
1311         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1312         if (ops->destroy)
1313                 ops->destroy(sch);
1314 err_out3:
1315         netdev_put(dev, &sch->dev_tracker);
1316         qdisc_free(sch);
1317 err_out2:
1318         module_put(ops->owner);
1319 err_out:
1320         *errp = err;
1321         return NULL;
1322
1323 err_out4:
1324         /*
1325          * Any broken qdiscs that would require a ops->reset() here?
1326          * The qdisc was never in action so it shouldn't be necessary.
1327          */
1328         qdisc_put_stab(rtnl_dereference(sch->stab));
1329         if (ops->destroy)
1330                 ops->destroy(sch);
1331         goto err_out3;
1332 }
1333
1334 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1335                         struct netlink_ext_ack *extack)
1336 {
1337         struct qdisc_size_table *ostab, *stab = NULL;
1338         int err = 0;
1339
1340         if (tca[TCA_OPTIONS]) {
1341                 if (!sch->ops->change) {
1342                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1343                         return -EINVAL;
1344                 }
1345                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1346                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1347                         return -EOPNOTSUPP;
1348                 }
1349                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1350                 if (err)
1351                         return err;
1352         }
1353
1354         if (tca[TCA_STAB]) {
1355                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1356                 if (IS_ERR(stab))
1357                         return PTR_ERR(stab);
1358         }
1359
1360         ostab = rtnl_dereference(sch->stab);
1361         rcu_assign_pointer(sch->stab, stab);
1362         qdisc_put_stab(ostab);
1363
1364         if (tca[TCA_RATE]) {
1365                 /* NB: ignores errors from replace_estimator
1366                    because change can't be undone. */
1367                 if (sch->flags & TCQ_F_MQROOT)
1368                         goto out;
1369                 gen_replace_estimator(&sch->bstats,
1370                                       sch->cpu_bstats,
1371                                       &sch->rate_est,
1372                                       NULL,
1373                                       true,
1374                                       tca[TCA_RATE]);
1375         }
1376 out:
1377         return 0;
1378 }
1379
1380 struct check_loop_arg {
1381         struct qdisc_walker     w;
1382         struct Qdisc            *p;
1383         int                     depth;
1384 };
1385
1386 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1387                          struct qdisc_walker *w);
1388
1389 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1390 {
1391         struct check_loop_arg   arg;
1392
1393         if (q->ops->cl_ops == NULL)
1394                 return 0;
1395
1396         arg.w.stop = arg.w.skip = arg.w.count = 0;
1397         arg.w.fn = check_loop_fn;
1398         arg.depth = depth;
1399         arg.p = p;
1400         q->ops->cl_ops->walk(q, &arg.w);
1401         return arg.w.stop ? -ELOOP : 0;
1402 }
1403
1404 static int
1405 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1406 {
1407         struct Qdisc *leaf;
1408         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1409         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1410
1411         leaf = cops->leaf(q, cl);
1412         if (leaf) {
1413                 if (leaf == arg->p || arg->depth > 7)
1414                         return -ELOOP;
1415                 return check_loop(leaf, arg->p, arg->depth + 1);
1416         }
1417         return 0;
1418 }
1419
1420 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1421         [TCA_KIND]              = { .type = NLA_STRING },
1422         [TCA_RATE]              = { .type = NLA_BINARY,
1423                                     .len = sizeof(struct tc_estimator) },
1424         [TCA_STAB]              = { .type = NLA_NESTED },
1425         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1426         [TCA_CHAIN]             = { .type = NLA_U32 },
1427         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1428         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1429 };
1430
1431 /*
1432  * Delete/get qdisc.
1433  */
1434
1435 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1436                         struct netlink_ext_ack *extack)
1437 {
1438         struct net *net = sock_net(skb->sk);
1439         struct tcmsg *tcm = nlmsg_data(n);
1440         struct nlattr *tca[TCA_MAX + 1];
1441         struct net_device *dev;
1442         u32 clid;
1443         struct Qdisc *q = NULL;
1444         struct Qdisc *p = NULL;
1445         int err;
1446
1447         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1448                                      rtm_tca_policy, extack);
1449         if (err < 0)
1450                 return err;
1451
1452         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1453         if (!dev)
1454                 return -ENODEV;
1455
1456         clid = tcm->tcm_parent;
1457         if (clid) {
1458                 if (clid != TC_H_ROOT) {
1459                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1460                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1461                                 if (!p) {
1462                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1463                                         return -ENOENT;
1464                                 }
1465                                 q = qdisc_leaf(p, clid);
1466                         } else if (dev_ingress_queue(dev)) {
1467                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1468                         }
1469                 } else {
1470                         q = rtnl_dereference(dev->qdisc);
1471                 }
1472                 if (!q) {
1473                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1474                         return -ENOENT;
1475                 }
1476
1477                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1478                         NL_SET_ERR_MSG(extack, "Invalid handle");
1479                         return -EINVAL;
1480                 }
1481         } else {
1482                 q = qdisc_lookup(dev, tcm->tcm_handle);
1483                 if (!q) {
1484                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1485                         return -ENOENT;
1486                 }
1487         }
1488
1489         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1490                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1491                 return -EINVAL;
1492         }
1493
1494         if (n->nlmsg_type == RTM_DELQDISC) {
1495                 if (!clid) {
1496                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1497                         return -EINVAL;
1498                 }
1499                 if (q->handle == 0) {
1500                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1501                         return -ENOENT;
1502                 }
1503                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1504                 if (err != 0)
1505                         return err;
1506         } else {
1507                 qdisc_notify(net, skb, n, clid, NULL, q);
1508         }
1509         return 0;
1510 }
1511
1512 /*
1513  * Create/change qdisc.
1514  */
1515
1516 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1517                            struct netlink_ext_ack *extack)
1518 {
1519         struct net *net = sock_net(skb->sk);
1520         struct tcmsg *tcm;
1521         struct nlattr *tca[TCA_MAX + 1];
1522         struct net_device *dev;
1523         u32 clid;
1524         struct Qdisc *q, *p;
1525         int err;
1526
1527 replay:
1528         /* Reinit, just in case something touches this. */
1529         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1530                                      rtm_tca_policy, extack);
1531         if (err < 0)
1532                 return err;
1533
1534         tcm = nlmsg_data(n);
1535         clid = tcm->tcm_parent;
1536         q = p = NULL;
1537
1538         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1539         if (!dev)
1540                 return -ENODEV;
1541
1542
1543         if (clid) {
1544                 if (clid != TC_H_ROOT) {
1545                         if (clid != TC_H_INGRESS) {
1546                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1547                                 if (!p) {
1548                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1549                                         return -ENOENT;
1550                                 }
1551                                 q = qdisc_leaf(p, clid);
1552                         } else if (dev_ingress_queue_create(dev)) {
1553                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1554                         }
1555                 } else {
1556                         q = rtnl_dereference(dev->qdisc);
1557                 }
1558
1559                 /* It may be default qdisc, ignore it */
1560                 if (q && q->handle == 0)
1561                         q = NULL;
1562
1563                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1564                         if (tcm->tcm_handle) {
1565                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1566                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1567                                         return -EEXIST;
1568                                 }
1569                                 if (TC_H_MIN(tcm->tcm_handle)) {
1570                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1571                                         return -EINVAL;
1572                                 }
1573                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1574                                 if (!q)
1575                                         goto create_n_graft;
1576                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1577                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1578                                         return -EEXIST;
1579                                 }
1580                                 if (tca[TCA_KIND] &&
1581                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1582                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1583                                         return -EINVAL;
1584                                 }
1585                                 if (q == p ||
1586                                     (p && check_loop(q, p, 0))) {
1587                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1588                                         return -ELOOP;
1589                                 }
1590                                 qdisc_refcount_inc(q);
1591                                 goto graft;
1592                         } else {
1593                                 if (!q)
1594                                         goto create_n_graft;
1595
1596                                 /* This magic test requires explanation.
1597                                  *
1598                                  *   We know, that some child q is already
1599                                  *   attached to this parent and have choice:
1600                                  *   either to change it or to create/graft new one.
1601                                  *
1602                                  *   1. We are allowed to create/graft only
1603                                  *   if CREATE and REPLACE flags are set.
1604                                  *
1605                                  *   2. If EXCL is set, requestor wanted to say,
1606                                  *   that qdisc tcm_handle is not expected
1607                                  *   to exist, so that we choose create/graft too.
1608                                  *
1609                                  *   3. The last case is when no flags are set.
1610                                  *   Alas, it is sort of hole in API, we
1611                                  *   cannot decide what to do unambiguously.
1612                                  *   For now we select create/graft, if
1613                                  *   user gave KIND, which does not match existing.
1614                                  */
1615                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1616                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1617                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1618                                      (tca[TCA_KIND] &&
1619                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1620                                         goto create_n_graft;
1621                         }
1622                 }
1623         } else {
1624                 if (!tcm->tcm_handle) {
1625                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1626                         return -EINVAL;
1627                 }
1628                 q = qdisc_lookup(dev, tcm->tcm_handle);
1629         }
1630
1631         /* Change qdisc parameters */
1632         if (!q) {
1633                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1634                 return -ENOENT;
1635         }
1636         if (n->nlmsg_flags & NLM_F_EXCL) {
1637                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1638                 return -EEXIST;
1639         }
1640         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1641                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1642                 return -EINVAL;
1643         }
1644         err = qdisc_change(q, tca, extack);
1645         if (err == 0)
1646                 qdisc_notify(net, skb, n, clid, NULL, q);
1647         return err;
1648
1649 create_n_graft:
1650         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1651                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1652                 return -ENOENT;
1653         }
1654         if (clid == TC_H_INGRESS) {
1655                 if (dev_ingress_queue(dev)) {
1656                         q = qdisc_create(dev, dev_ingress_queue(dev),
1657                                          tcm->tcm_parent, tcm->tcm_parent,
1658                                          tca, &err, extack);
1659                 } else {
1660                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1661                         err = -ENOENT;
1662                 }
1663         } else {
1664                 struct netdev_queue *dev_queue;
1665
1666                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1667                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1668                 else if (p)
1669                         dev_queue = p->dev_queue;
1670                 else
1671                         dev_queue = netdev_get_tx_queue(dev, 0);
1672
1673                 q = qdisc_create(dev, dev_queue,
1674                                  tcm->tcm_parent, tcm->tcm_handle,
1675                                  tca, &err, extack);
1676         }
1677         if (q == NULL) {
1678                 if (err == -EAGAIN)
1679                         goto replay;
1680                 return err;
1681         }
1682
1683 graft:
1684         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1685         if (err) {
1686                 if (q)
1687                         qdisc_put(q);
1688                 return err;
1689         }
1690
1691         return 0;
1692 }
1693
1694 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1695                               struct netlink_callback *cb,
1696                               int *q_idx_p, int s_q_idx, bool recur,
1697                               bool dump_invisible)
1698 {
1699         int ret = 0, q_idx = *q_idx_p;
1700         struct Qdisc *q;
1701         int b;
1702
1703         if (!root)
1704                 return 0;
1705
1706         q = root;
1707         if (q_idx < s_q_idx) {
1708                 q_idx++;
1709         } else {
1710                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1711                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1712                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1713                                   RTM_NEWQDISC) <= 0)
1714                         goto done;
1715                 q_idx++;
1716         }
1717
1718         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1719          * itself has already been dumped.
1720          *
1721          * If we've already dumped the top-level (ingress) qdisc above and the global
1722          * qdisc hashtable, we don't want to hit it again
1723          */
1724         if (!qdisc_dev(root) || !recur)
1725                 goto out;
1726
1727         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1728                 if (q_idx < s_q_idx) {
1729                         q_idx++;
1730                         continue;
1731                 }
1732                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1733                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1734                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1735                                   RTM_NEWQDISC) <= 0)
1736                         goto done;
1737                 q_idx++;
1738         }
1739
1740 out:
1741         *q_idx_p = q_idx;
1742         return ret;
1743 done:
1744         ret = -1;
1745         goto out;
1746 }
1747
1748 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1749 {
1750         struct net *net = sock_net(skb->sk);
1751         int idx, q_idx;
1752         int s_idx, s_q_idx;
1753         struct net_device *dev;
1754         const struct nlmsghdr *nlh = cb->nlh;
1755         struct nlattr *tca[TCA_MAX + 1];
1756         int err;
1757
1758         s_idx = cb->args[0];
1759         s_q_idx = q_idx = cb->args[1];
1760
1761         idx = 0;
1762         ASSERT_RTNL();
1763
1764         err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1765                                      rtm_tca_policy, cb->extack);
1766         if (err < 0)
1767                 return err;
1768
1769         for_each_netdev(net, dev) {
1770                 struct netdev_queue *dev_queue;
1771
1772                 if (idx < s_idx)
1773                         goto cont;
1774                 if (idx > s_idx)
1775                         s_q_idx = 0;
1776                 q_idx = 0;
1777
1778                 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1779                                        skb, cb, &q_idx, s_q_idx,
1780                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1781                         goto done;
1782
1783                 dev_queue = dev_ingress_queue(dev);
1784                 if (dev_queue &&
1785                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1786                                        &q_idx, s_q_idx, false,
1787                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1788                         goto done;
1789
1790 cont:
1791                 idx++;
1792         }
1793
1794 done:
1795         cb->args[0] = idx;
1796         cb->args[1] = q_idx;
1797
1798         return skb->len;
1799 }
1800
1801
1802
1803 /************************************************
1804  *      Traffic classes manipulation.           *
1805  ************************************************/
1806
1807 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1808                           unsigned long cl,
1809                           u32 portid, u32 seq, u16 flags, int event)
1810 {
1811         struct tcmsg *tcm;
1812         struct nlmsghdr  *nlh;
1813         unsigned char *b = skb_tail_pointer(skb);
1814         struct gnet_dump d;
1815         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1816
1817         cond_resched();
1818         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1819         if (!nlh)
1820                 goto out_nlmsg_trim;
1821         tcm = nlmsg_data(nlh);
1822         tcm->tcm_family = AF_UNSPEC;
1823         tcm->tcm__pad1 = 0;
1824         tcm->tcm__pad2 = 0;
1825         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1826         tcm->tcm_parent = q->handle;
1827         tcm->tcm_handle = q->handle;
1828         tcm->tcm_info = 0;
1829         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1830                 goto nla_put_failure;
1831         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1832                 goto nla_put_failure;
1833
1834         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1835                                          NULL, &d, TCA_PAD) < 0)
1836                 goto nla_put_failure;
1837
1838         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1839                 goto nla_put_failure;
1840
1841         if (gnet_stats_finish_copy(&d) < 0)
1842                 goto nla_put_failure;
1843
1844         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1845         return skb->len;
1846
1847 out_nlmsg_trim:
1848 nla_put_failure:
1849         nlmsg_trim(skb, b);
1850         return -1;
1851 }
1852
1853 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1854                          struct nlmsghdr *n, struct Qdisc *q,
1855                          unsigned long cl, int event)
1856 {
1857         struct sk_buff *skb;
1858         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1859
1860         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1861         if (!skb)
1862                 return -ENOBUFS;
1863
1864         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1865                 kfree_skb(skb);
1866                 return -EINVAL;
1867         }
1868
1869         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1870                               n->nlmsg_flags & NLM_F_ECHO);
1871 }
1872
1873 static int tclass_del_notify(struct net *net,
1874                              const struct Qdisc_class_ops *cops,
1875                              struct sk_buff *oskb, struct nlmsghdr *n,
1876                              struct Qdisc *q, unsigned long cl,
1877                              struct netlink_ext_ack *extack)
1878 {
1879         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1880         struct sk_buff *skb;
1881         int err = 0;
1882
1883         if (!cops->delete)
1884                 return -EOPNOTSUPP;
1885
1886         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1887         if (!skb)
1888                 return -ENOBUFS;
1889
1890         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1891                            RTM_DELTCLASS) < 0) {
1892                 kfree_skb(skb);
1893                 return -EINVAL;
1894         }
1895
1896         err = cops->delete(q, cl, extack);
1897         if (err) {
1898                 kfree_skb(skb);
1899                 return err;
1900         }
1901
1902         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1903                              n->nlmsg_flags & NLM_F_ECHO);
1904         return err;
1905 }
1906
1907 #ifdef CONFIG_NET_CLS
1908
1909 struct tcf_bind_args {
1910         struct tcf_walker w;
1911         unsigned long base;
1912         unsigned long cl;
1913         u32 classid;
1914 };
1915
1916 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1917 {
1918         struct tcf_bind_args *a = (void *)arg;
1919
1920         if (n && tp->ops->bind_class) {
1921                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1922
1923                 sch_tree_lock(q);
1924                 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
1925                 sch_tree_unlock(q);
1926         }
1927         return 0;
1928 }
1929
1930 struct tc_bind_class_args {
1931         struct qdisc_walker w;
1932         unsigned long new_cl;
1933         u32 portid;
1934         u32 clid;
1935 };
1936
1937 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1938                                 struct qdisc_walker *w)
1939 {
1940         struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1941         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1942         struct tcf_block *block;
1943         struct tcf_chain *chain;
1944
1945         block = cops->tcf_block(q, cl, NULL);
1946         if (!block)
1947                 return 0;
1948         for (chain = tcf_get_next_chain(block, NULL);
1949              chain;
1950              chain = tcf_get_next_chain(block, chain)) {
1951                 struct tcf_proto *tp;
1952
1953                 for (tp = tcf_get_next_proto(chain, NULL);
1954                      tp; tp = tcf_get_next_proto(chain, tp)) {
1955                         struct tcf_bind_args arg = {};
1956
1957                         arg.w.fn = tcf_node_bind;
1958                         arg.classid = a->clid;
1959                         arg.base = cl;
1960                         arg.cl = a->new_cl;
1961                         tp->ops->walk(tp, &arg.w, true);
1962                 }
1963         }
1964
1965         return 0;
1966 }
1967
1968 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1969                            unsigned long new_cl)
1970 {
1971         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1972         struct tc_bind_class_args args = {};
1973
1974         if (!cops->tcf_block)
1975                 return;
1976         args.portid = portid;
1977         args.clid = clid;
1978         args.new_cl = new_cl;
1979         args.w.fn = tc_bind_class_walker;
1980         q->ops->cl_ops->walk(q, &args.w);
1981 }
1982
1983 #else
1984
1985 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1986                            unsigned long new_cl)
1987 {
1988 }
1989
1990 #endif
1991
1992 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1993                          struct netlink_ext_ack *extack)
1994 {
1995         struct net *net = sock_net(skb->sk);
1996         struct tcmsg *tcm = nlmsg_data(n);
1997         struct nlattr *tca[TCA_MAX + 1];
1998         struct net_device *dev;
1999         struct Qdisc *q = NULL;
2000         const struct Qdisc_class_ops *cops;
2001         unsigned long cl = 0;
2002         unsigned long new_cl;
2003         u32 portid;
2004         u32 clid;
2005         u32 qid;
2006         int err;
2007
2008         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2009                                      rtm_tca_policy, extack);
2010         if (err < 0)
2011                 return err;
2012
2013         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2014         if (!dev)
2015                 return -ENODEV;
2016
2017         /*
2018            parent == TC_H_UNSPEC - unspecified parent.
2019            parent == TC_H_ROOT   - class is root, which has no parent.
2020            parent == X:0         - parent is root class.
2021            parent == X:Y         - parent is a node in hierarchy.
2022            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
2023
2024            handle == 0:0         - generate handle from kernel pool.
2025            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
2026            handle == X:Y         - clear.
2027            handle == X:0         - root class.
2028          */
2029
2030         /* Step 1. Determine qdisc handle X:0 */
2031
2032         portid = tcm->tcm_parent;
2033         clid = tcm->tcm_handle;
2034         qid = TC_H_MAJ(clid);
2035
2036         if (portid != TC_H_ROOT) {
2037                 u32 qid1 = TC_H_MAJ(portid);
2038
2039                 if (qid && qid1) {
2040                         /* If both majors are known, they must be identical. */
2041                         if (qid != qid1)
2042                                 return -EINVAL;
2043                 } else if (qid1) {
2044                         qid = qid1;
2045                 } else if (qid == 0)
2046                         qid = rtnl_dereference(dev->qdisc)->handle;
2047
2048                 /* Now qid is genuine qdisc handle consistent
2049                  * both with parent and child.
2050                  *
2051                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
2052                  */
2053                 if (portid)
2054                         portid = TC_H_MAKE(qid, portid);
2055         } else {
2056                 if (qid == 0)
2057                         qid = rtnl_dereference(dev->qdisc)->handle;
2058         }
2059
2060         /* OK. Locate qdisc */
2061         q = qdisc_lookup(dev, qid);
2062         if (!q)
2063                 return -ENOENT;
2064
2065         /* An check that it supports classes */
2066         cops = q->ops->cl_ops;
2067         if (cops == NULL)
2068                 return -EINVAL;
2069
2070         /* Now try to get class */
2071         if (clid == 0) {
2072                 if (portid == TC_H_ROOT)
2073                         clid = qid;
2074         } else
2075                 clid = TC_H_MAKE(qid, clid);
2076
2077         if (clid)
2078                 cl = cops->find(q, clid);
2079
2080         if (cl == 0) {
2081                 err = -ENOENT;
2082                 if (n->nlmsg_type != RTM_NEWTCLASS ||
2083                     !(n->nlmsg_flags & NLM_F_CREATE))
2084                         goto out;
2085         } else {
2086                 switch (n->nlmsg_type) {
2087                 case RTM_NEWTCLASS:
2088                         err = -EEXIST;
2089                         if (n->nlmsg_flags & NLM_F_EXCL)
2090                                 goto out;
2091                         break;
2092                 case RTM_DELTCLASS:
2093                         err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2094                         /* Unbind the class with flilters with 0 */
2095                         tc_bind_tclass(q, portid, clid, 0);
2096                         goto out;
2097                 case RTM_GETTCLASS:
2098                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2099                         goto out;
2100                 default:
2101                         err = -EINVAL;
2102                         goto out;
2103                 }
2104         }
2105
2106         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2107                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2108                 return -EOPNOTSUPP;
2109         }
2110
2111         new_cl = cl;
2112         err = -EOPNOTSUPP;
2113         if (cops->change)
2114                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2115         if (err == 0) {
2116                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2117                 /* We just create a new class, need to do reverse binding. */
2118                 if (cl != new_cl)
2119                         tc_bind_tclass(q, portid, clid, new_cl);
2120         }
2121 out:
2122         return err;
2123 }
2124
2125 struct qdisc_dump_args {
2126         struct qdisc_walker     w;
2127         struct sk_buff          *skb;
2128         struct netlink_callback *cb;
2129 };
2130
2131 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2132                             struct qdisc_walker *arg)
2133 {
2134         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2135
2136         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2137                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2138                               RTM_NEWTCLASS);
2139 }
2140
2141 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2142                                 struct tcmsg *tcm, struct netlink_callback *cb,
2143                                 int *t_p, int s_t)
2144 {
2145         struct qdisc_dump_args arg;
2146
2147         if (tc_qdisc_dump_ignore(q, false) ||
2148             *t_p < s_t || !q->ops->cl_ops ||
2149             (tcm->tcm_parent &&
2150              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2151                 (*t_p)++;
2152                 return 0;
2153         }
2154         if (*t_p > s_t)
2155                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2156         arg.w.fn = qdisc_class_dump;
2157         arg.skb = skb;
2158         arg.cb = cb;
2159         arg.w.stop  = 0;
2160         arg.w.skip = cb->args[1];
2161         arg.w.count = 0;
2162         q->ops->cl_ops->walk(q, &arg.w);
2163         cb->args[1] = arg.w.count;
2164         if (arg.w.stop)
2165                 return -1;
2166         (*t_p)++;
2167         return 0;
2168 }
2169
2170 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2171                                struct tcmsg *tcm, struct netlink_callback *cb,
2172                                int *t_p, int s_t, bool recur)
2173 {
2174         struct Qdisc *q;
2175         int b;
2176
2177         if (!root)
2178                 return 0;
2179
2180         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2181                 return -1;
2182
2183         if (!qdisc_dev(root) || !recur)
2184                 return 0;
2185
2186         if (tcm->tcm_parent) {
2187                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2188                 if (q && q != root &&
2189                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2190                         return -1;
2191                 return 0;
2192         }
2193         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2194                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2195                         return -1;
2196         }
2197
2198         return 0;
2199 }
2200
2201 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2202 {
2203         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2204         struct net *net = sock_net(skb->sk);
2205         struct netdev_queue *dev_queue;
2206         struct net_device *dev;
2207         int t, s_t;
2208
2209         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2210                 return 0;
2211         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2212         if (!dev)
2213                 return 0;
2214
2215         s_t = cb->args[0];
2216         t = 0;
2217
2218         if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2219                                 skb, tcm, cb, &t, s_t, true) < 0)
2220                 goto done;
2221
2222         dev_queue = dev_ingress_queue(dev);
2223         if (dev_queue &&
2224             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2225                                 &t, s_t, false) < 0)
2226                 goto done;
2227
2228 done:
2229         cb->args[0] = t;
2230
2231         dev_put(dev);
2232         return skb->len;
2233 }
2234
2235 #ifdef CONFIG_PROC_FS
2236 static int psched_show(struct seq_file *seq, void *v)
2237 {
2238         seq_printf(seq, "%08x %08x %08x %08x\n",
2239                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2240                    1000000,
2241                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2242
2243         return 0;
2244 }
2245
2246 static int __net_init psched_net_init(struct net *net)
2247 {
2248         struct proc_dir_entry *e;
2249
2250         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2251         if (e == NULL)
2252                 return -ENOMEM;
2253
2254         return 0;
2255 }
2256
2257 static void __net_exit psched_net_exit(struct net *net)
2258 {
2259         remove_proc_entry("psched", net->proc_net);
2260 }
2261 #else
2262 static int __net_init psched_net_init(struct net *net)
2263 {
2264         return 0;
2265 }
2266
2267 static void __net_exit psched_net_exit(struct net *net)
2268 {
2269 }
2270 #endif
2271
2272 static struct pernet_operations psched_net_ops = {
2273         .init = psched_net_init,
2274         .exit = psched_net_exit,
2275 };
2276
2277 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2278
2279 static int __init pktsched_init(void)
2280 {
2281         int err;
2282
2283         err = register_pernet_subsys(&psched_net_ops);
2284         if (err) {
2285                 pr_err("pktsched_init: "
2286                        "cannot initialize per netns operations\n");
2287                 return err;
2288         }
2289
2290         register_qdisc(&pfifo_fast_ops);
2291         register_qdisc(&pfifo_qdisc_ops);
2292         register_qdisc(&bfifo_qdisc_ops);
2293         register_qdisc(&pfifo_head_drop_qdisc_ops);
2294         register_qdisc(&mq_qdisc_ops);
2295         register_qdisc(&noqueue_qdisc_ops);
2296
2297         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2298         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2299         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2300                       0);
2301         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2302         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2303         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2304                       0);
2305
2306         tc_wrapper_init();
2307
2308         return 0;
2309 }
2310
2311 subsys_initcall(pktsched_init);
This page took 0.159268 seconds and 4 git commands to generate.