]>
Commit | Line | Data |
---|---|---|
2504ba9f | 1 | // SPDX-License-Identifier: GPL-2.0-only |
d4b36210 | 2 | /* Copyright (C) 2013 Cisco Systems, Inc, 2013. |
d4b36210 VS |
3 | * |
4 | * Author: Vijay Subramanian <[email protected]> | |
5 | * Author: Mythili Prabhu <[email protected]> | |
6 | * | |
7 | * ECN support is added by Naeem Khademi <[email protected]> | |
8 | * University of Oslo, Norway. | |
219e288e VS |
9 | * |
10 | * References: | |
24ed4900 | 11 | * RFC 8033: https://tools.ietf.org/html/rfc8033 |
d4b36210 VS |
12 | */ |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/types.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/errno.h> | |
19 | #include <linux/skbuff.h> | |
20 | #include <net/pkt_sched.h> | |
21 | #include <net/inet_ecn.h> | |
84bf557f | 22 | #include <net/pie.h> |
d4b36210 VS |
23 | |
24 | /* private data for the Qdisc */ | |
25 | struct pie_sched_data { | |
26 | struct pie_params params; | |
27 | struct pie_vars vars; | |
28 | struct pie_stats stats; | |
29 | struct timer_list adapt_timer; | |
cdeabbb8 | 30 | struct Qdisc *sch; |
d4b36210 VS |
31 | }; |
32 | ||
d4b36210 VS |
33 | static bool drop_early(struct Qdisc *sch, u32 packet_size) |
34 | { | |
35 | struct pie_sched_data *q = qdisc_priv(sch); | |
3f7ae5f3 MT |
36 | u64 rnd; |
37 | u64 local_prob = q->vars.prob; | |
d4b36210 VS |
38 | u32 mtu = psched_mtu(qdisc_dev(sch)); |
39 | ||
40 | /* If there is still burst allowance left skip random early drop */ | |
41 | if (q->vars.burst_time > 0) | |
42 | return false; | |
43 | ||
44 | /* If current delay is less than half of target, and | |
45 | * if drop prob is low already, disable early_drop | |
46 | */ | |
ac4a02c5 LM |
47 | if ((q->vars.qdelay < q->params.target / 2) && |
48 | (q->vars.prob < MAX_PROB / 5)) | |
d4b36210 VS |
49 | return false; |
50 | ||
51 | /* If we have fewer than 2 mtu-sized packets, disable drop_early, | |
52 | * similar to min_th in RED | |
53 | */ | |
54 | if (sch->qstats.backlog < 2 * mtu) | |
55 | return false; | |
56 | ||
57 | /* If bytemode is turned on, use packet size to compute new | |
58 | * probablity. Smaller packets will have lower drop prob in this case | |
59 | */ | |
60 | if (q->params.bytemode && packet_size <= mtu) | |
3f7ae5f3 | 61 | local_prob = (u64)packet_size * div_u64(local_prob, mtu); |
d4b36210 VS |
62 | else |
63 | local_prob = q->vars.prob; | |
64 | ||
95400b97 MT |
65 | if (local_prob == 0) { |
66 | q->vars.accu_prob = 0; | |
67 | q->vars.accu_prob_overflows = 0; | |
68 | } | |
69 | ||
70 | if (local_prob > MAX_PROB - q->vars.accu_prob) | |
71 | q->vars.accu_prob_overflows++; | |
72 | ||
73 | q->vars.accu_prob += local_prob; | |
74 | ||
75 | if (q->vars.accu_prob_overflows == 0 && | |
76 | q->vars.accu_prob < (MAX_PROB / 100) * 85) | |
77 | return false; | |
78 | if (q->vars.accu_prob_overflows == 8 && | |
79 | q->vars.accu_prob >= MAX_PROB / 2) | |
80 | return true; | |
81 | ||
3f7ae5f3 | 82 | prandom_bytes(&rnd, 8); |
95400b97 MT |
83 | if (rnd < local_prob) { |
84 | q->vars.accu_prob = 0; | |
85 | q->vars.accu_prob_overflows = 0; | |
d4b36210 | 86 | return true; |
95400b97 | 87 | } |
d4b36210 VS |
88 | |
89 | return false; | |
90 | } | |
91 | ||
520ac30f ED |
92 | static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
93 | struct sk_buff **to_free) | |
d4b36210 VS |
94 | { |
95 | struct pie_sched_data *q = qdisc_priv(sch); | |
96 | bool enqueue = false; | |
97 | ||
98 | if (unlikely(qdisc_qlen(sch) >= sch->limit)) { | |
99 | q->stats.overlimit++; | |
100 | goto out; | |
101 | } | |
102 | ||
103 | if (!drop_early(sch, skb->len)) { | |
104 | enqueue = true; | |
105 | } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) && | |
106 | INET_ECN_set_ce(skb)) { | |
107 | /* If packet is ecn capable, mark it if drop probability | |
108 | * is lower than 10%, else drop it. | |
109 | */ | |
110 | q->stats.ecn_mark++; | |
111 | enqueue = true; | |
112 | } | |
113 | ||
114 | /* we can enqueue the packet */ | |
115 | if (enqueue) { | |
cec2975f GR |
116 | /* Set enqueue time only when dq_rate_estimator is disabled. */ |
117 | if (!q->params.dq_rate_estimator) | |
118 | pie_set_enqueue_time(skb); | |
119 | ||
d4b36210 VS |
120 | q->stats.packets_in++; |
121 | if (qdisc_qlen(sch) > q->stats.maxq) | |
122 | q->stats.maxq = qdisc_qlen(sch); | |
123 | ||
124 | return qdisc_enqueue_tail(skb, sch); | |
125 | } | |
126 | ||
127 | out: | |
128 | q->stats.dropped++; | |
95400b97 MT |
129 | q->vars.accu_prob = 0; |
130 | q->vars.accu_prob_overflows = 0; | |
520ac30f | 131 | return qdisc_drop(skb, sch, to_free); |
d4b36210 VS |
132 | } |
133 | ||
134 | static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { | |
135 | [TCA_PIE_TARGET] = {.type = NLA_U32}, | |
136 | [TCA_PIE_LIMIT] = {.type = NLA_U32}, | |
137 | [TCA_PIE_TUPDATE] = {.type = NLA_U32}, | |
138 | [TCA_PIE_ALPHA] = {.type = NLA_U32}, | |
139 | [TCA_PIE_BETA] = {.type = NLA_U32}, | |
140 | [TCA_PIE_ECN] = {.type = NLA_U32}, | |
141 | [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, | |
cec2975f | 142 | [TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32}, |
d4b36210 VS |
143 | }; |
144 | ||
2030721c AA |
145 | static int pie_change(struct Qdisc *sch, struct nlattr *opt, |
146 | struct netlink_ext_ack *extack) | |
d4b36210 VS |
147 | { |
148 | struct pie_sched_data *q = qdisc_priv(sch); | |
149 | struct nlattr *tb[TCA_PIE_MAX + 1]; | |
2ccccf5f | 150 | unsigned int qlen, dropped = 0; |
d4b36210 VS |
151 | int err; |
152 | ||
153 | if (!opt) | |
154 | return -EINVAL; | |
155 | ||
8cb08174 JB |
156 | err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, |
157 | NULL); | |
d4b36210 VS |
158 | if (err < 0) |
159 | return err; | |
160 | ||
161 | sch_tree_lock(sch); | |
162 | ||
163 | /* convert from microseconds to pschedtime */ | |
164 | if (tb[TCA_PIE_TARGET]) { | |
165 | /* target is in us */ | |
166 | u32 target = nla_get_u32(tb[TCA_PIE_TARGET]); | |
167 | ||
168 | /* convert to pschedtime */ | |
169 | q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); | |
170 | } | |
171 | ||
172 | /* tupdate is in jiffies */ | |
173 | if (tb[TCA_PIE_TUPDATE]) | |
ac4a02c5 LM |
174 | q->params.tupdate = |
175 | usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); | |
d4b36210 VS |
176 | |
177 | if (tb[TCA_PIE_LIMIT]) { | |
178 | u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); | |
179 | ||
180 | q->params.limit = limit; | |
181 | sch->limit = limit; | |
182 | } | |
183 | ||
184 | if (tb[TCA_PIE_ALPHA]) | |
185 | q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]); | |
186 | ||
187 | if (tb[TCA_PIE_BETA]) | |
188 | q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]); | |
189 | ||
190 | if (tb[TCA_PIE_ECN]) | |
191 | q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]); | |
192 | ||
193 | if (tb[TCA_PIE_BYTEMODE]) | |
194 | q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); | |
195 | ||
cec2975f GR |
196 | if (tb[TCA_PIE_DQ_RATE_ESTIMATOR]) |
197 | q->params.dq_rate_estimator = | |
198 | nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]); | |
199 | ||
d4b36210 VS |
200 | /* Drop excess packets if new limit is lower */ |
201 | qlen = sch->q.qlen; | |
202 | while (sch->q.qlen > sch->limit) { | |
ed760cb8 | 203 | struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); |
d4b36210 | 204 | |
2ccccf5f | 205 | dropped += qdisc_pkt_len(skb); |
25331d6c | 206 | qdisc_qstats_backlog_dec(sch, skb); |
db4879d9 | 207 | rtnl_qdisc_drop(skb, sch); |
d4b36210 | 208 | } |
2ccccf5f | 209 | qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); |
d4b36210 VS |
210 | |
211 | sch_tree_unlock(sch); | |
212 | return 0; | |
213 | } | |
214 | ||
215 | static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) | |
216 | { | |
d4b36210 VS |
217 | struct pie_sched_data *q = qdisc_priv(sch); |
218 | int qlen = sch->qstats.backlog; /* current queue size in bytes */ | |
cec2975f GR |
219 | psched_time_t now = psched_get_time(); |
220 | u32 dtime = 0; | |
221 | ||
222 | /* If dq_rate_estimator is disabled, calculate qdelay using the | |
223 | * packet timestamp. | |
224 | */ | |
225 | if (!q->params.dq_rate_estimator) { | |
226 | q->vars.qdelay = now - pie_get_enqueue_time(skb); | |
227 | ||
228 | if (q->vars.dq_tstamp != DTIME_INVALID) | |
229 | dtime = now - q->vars.dq_tstamp; | |
230 | ||
231 | q->vars.dq_tstamp = now; | |
232 | ||
233 | if (qlen == 0) | |
234 | q->vars.qdelay = 0; | |
235 | ||
236 | if (dtime == 0) | |
237 | return; | |
238 | ||
239 | goto burst_allowance_reduction; | |
240 | } | |
d4b36210 VS |
241 | |
242 | /* If current queue is about 10 packets or more and dq_count is unset | |
243 | * we have enough packets to calculate the drain rate. Save | |
244 | * current time as dq_tstamp and start measurement cycle. | |
245 | */ | |
246 | if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) { | |
247 | q->vars.dq_tstamp = psched_get_time(); | |
248 | q->vars.dq_count = 0; | |
249 | } | |
250 | ||
251 | /* Calculate the average drain rate from this value. If queue length | |
252 | * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset | |
253 | * the dq_count to -1 as we don't have enough packets to calculate the | |
254 | * drain rate anymore The following if block is entered only when we | |
255 | * have a substantial queue built up (QUEUE_THRESHOLD bytes or more) | |
256 | * and we calculate the drain rate for the threshold here. dq_count is | |
257 | * in bytes, time difference in psched_time, hence rate is in | |
258 | * bytes/psched_time. | |
259 | */ | |
260 | if (q->vars.dq_count != DQCOUNT_INVALID) { | |
261 | q->vars.dq_count += skb->len; | |
262 | ||
263 | if (q->vars.dq_count >= QUEUE_THRESHOLD) { | |
d4b36210 VS |
264 | u32 count = q->vars.dq_count << PIE_SCALE; |
265 | ||
cec2975f GR |
266 | dtime = now - q->vars.dq_tstamp; |
267 | ||
d4b36210 VS |
268 | if (dtime == 0) |
269 | return; | |
270 | ||
271 | count = count / dtime; | |
272 | ||
273 | if (q->vars.avg_dq_rate == 0) | |
274 | q->vars.avg_dq_rate = count; | |
275 | else | |
276 | q->vars.avg_dq_rate = | |
277 | (q->vars.avg_dq_rate - | |
278 | (q->vars.avg_dq_rate >> 3)) + (count >> 3); | |
279 | ||
280 | /* If the queue has receded below the threshold, we hold | |
281 | * on to the last drain rate calculated, else we reset | |
282 | * dq_count to 0 to re-enter the if block when the next | |
283 | * packet is dequeued | |
284 | */ | |
ac4a02c5 | 285 | if (qlen < QUEUE_THRESHOLD) { |
d4b36210 | 286 | q->vars.dq_count = DQCOUNT_INVALID; |
ac4a02c5 | 287 | } else { |
d4b36210 VS |
288 | q->vars.dq_count = 0; |
289 | q->vars.dq_tstamp = psched_get_time(); | |
290 | } | |
291 | ||
cec2975f | 292 | goto burst_allowance_reduction; |
d4b36210 VS |
293 | } |
294 | } | |
cec2975f GR |
295 | |
296 | return; | |
297 | ||
298 | burst_allowance_reduction: | |
299 | if (q->vars.burst_time > 0) { | |
300 | if (q->vars.burst_time > dtime) | |
301 | q->vars.burst_time -= dtime; | |
302 | else | |
303 | q->vars.burst_time = 0; | |
304 | } | |
d4b36210 VS |
305 | } |
306 | ||
307 | static void calculate_probability(struct Qdisc *sch) | |
308 | { | |
309 | struct pie_sched_data *q = qdisc_priv(sch); | |
310 | u32 qlen = sch->qstats.backlog; /* queue size in bytes */ | |
311 | psched_time_t qdelay = 0; /* in pschedtime */ | |
cec2975f | 312 | psched_time_t qdelay_old = 0; /* in pschedtime */ |
3f7ae5f3 MT |
313 | s64 delta = 0; /* determines the change in probability */ |
314 | u64 oldprob; | |
315 | u64 alpha, beta; | |
316 | u32 power; | |
d4b36210 VS |
317 | bool update_prob = true; |
318 | ||
cec2975f GR |
319 | if (q->params.dq_rate_estimator) { |
320 | qdelay_old = q->vars.qdelay; | |
321 | q->vars.qdelay_old = q->vars.qdelay; | |
d4b36210 | 322 | |
cec2975f GR |
323 | if (q->vars.avg_dq_rate > 0) |
324 | qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate; | |
325 | else | |
326 | qdelay = 0; | |
327 | } else { | |
328 | qdelay = q->vars.qdelay; | |
329 | qdelay_old = q->vars.qdelay_old; | |
330 | } | |
d4b36210 VS |
331 | |
332 | /* If qdelay is zero and qlen is not, it means qlen is very small, less | |
333 | * than dequeue_rate, so we do not update probabilty in this round | |
334 | */ | |
335 | if (qdelay == 0 && qlen != 0) | |
336 | update_prob = false; | |
337 | ||
219e288e VS |
338 | /* In the algorithm, alpha and beta are between 0 and 2 with typical |
339 | * value for alpha as 0.125. In this implementation, we use values 0-32 | |
340 | * passed from user space to represent this. Also, alpha and beta have | |
341 | * unit of HZ and need to be scaled before they can used to update | |
3f7ae5f3 MT |
342 | * probability. alpha/beta are updated locally below by scaling down |
343 | * by 16 to come to 0-2 range. | |
d4b36210 | 344 | */ |
3f7ae5f3 MT |
345 | alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; |
346 | beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; | |
347 | ||
348 | /* We scale alpha and beta differently depending on how heavy the | |
349 | * congestion is. Please see RFC 8033 for details. | |
350 | */ | |
351 | if (q->vars.prob < MAX_PROB / 10) { | |
352 | alpha >>= 1; | |
353 | beta >>= 1; | |
354 | ||
355 | power = 100; | |
356 | while (q->vars.prob < div_u64(MAX_PROB, power) && | |
357 | power <= 1000000) { | |
358 | alpha >>= 2; | |
359 | beta >>= 2; | |
360 | power *= 10; | |
361 | } | |
d4b36210 VS |
362 | } |
363 | ||
364 | /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ | |
3f7ae5f3 MT |
365 | delta += alpha * (u64)(qdelay - q->params.target); |
366 | delta += beta * (u64)(qdelay - qdelay_old); | |
d4b36210 VS |
367 | |
368 | oldprob = q->vars.prob; | |
369 | ||
370 | /* to ensure we increase probability in steps of no more than 2% */ | |
3f7ae5f3 | 371 | if (delta > (s64)(MAX_PROB / (100 / 2)) && |
d4b36210 VS |
372 | q->vars.prob >= MAX_PROB / 10) |
373 | delta = (MAX_PROB / 100) * 2; | |
374 | ||
375 | /* Non-linear drop: | |
376 | * Tune drop probability to increase quickly for high delays(>= 250ms) | |
377 | * 250ms is derived through experiments and provides error protection | |
378 | */ | |
379 | ||
380 | if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) | |
381 | delta += MAX_PROB / (100 / 2); | |
382 | ||
383 | q->vars.prob += delta; | |
384 | ||
385 | if (delta > 0) { | |
386 | /* prevent overflow */ | |
387 | if (q->vars.prob < oldprob) { | |
388 | q->vars.prob = MAX_PROB; | |
389 | /* Prevent normalization error. If probability is at | |
390 | * maximum value already, we normalize it here, and | |
391 | * skip the check to do a non-linear drop in the next | |
392 | * section. | |
393 | */ | |
394 | update_prob = false; | |
395 | } | |
396 | } else { | |
397 | /* prevent underflow */ | |
398 | if (q->vars.prob > oldprob) | |
399 | q->vars.prob = 0; | |
400 | } | |
401 | ||
402 | /* Non-linear drop in probability: Reduce drop probability quickly if | |
403 | * delay is 0 for 2 consecutive Tupdate periods. | |
404 | */ | |
405 | ||
ac4a02c5 | 406 | if (qdelay == 0 && qdelay_old == 0 && update_prob) |
6c97da14 LM |
407 | /* Reduce drop probability to 98.4% */ |
408 | q->vars.prob -= q->vars.prob / 64u; | |
d4b36210 VS |
409 | |
410 | q->vars.qdelay = qdelay; | |
411 | q->vars.qlen_old = qlen; | |
412 | ||
413 | /* We restart the measurement cycle if the following conditions are met | |
414 | * 1. If the delay has been low for 2 consecutive Tupdate periods | |
415 | * 2. Calculated drop probability is zero | |
cec2975f GR |
416 | * 3. If average dq_rate_estimator is enabled, we have atleast one |
417 | * estimate for the avg_dq_rate ie., is a non-zero value | |
d4b36210 VS |
418 | */ |
419 | if ((q->vars.qdelay < q->params.target / 2) && | |
420 | (q->vars.qdelay_old < q->params.target / 2) && | |
ac4a02c5 | 421 | q->vars.prob == 0 && |
cec2975f | 422 | (!q->params.dq_rate_estimator || q->vars.avg_dq_rate > 0)) { |
d4b36210 | 423 | pie_vars_init(&q->vars); |
cec2975f GR |
424 | } |
425 | ||
426 | if (!q->params.dq_rate_estimator) | |
427 | q->vars.qdelay_old = qdelay; | |
d4b36210 VS |
428 | } |
429 | ||
cdeabbb8 | 430 | static void pie_timer(struct timer_list *t) |
d4b36210 | 431 | { |
cdeabbb8 KC |
432 | struct pie_sched_data *q = from_timer(q, t, adapt_timer); |
433 | struct Qdisc *sch = q->sch; | |
d4b36210 VS |
434 | spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); |
435 | ||
436 | spin_lock(root_lock); | |
437 | calculate_probability(sch); | |
438 | ||
439 | /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */ | |
440 | if (q->params.tupdate) | |
441 | mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); | |
442 | spin_unlock(root_lock); | |
d4b36210 VS |
443 | } |
444 | ||
e63d7dfd AA |
445 | static int pie_init(struct Qdisc *sch, struct nlattr *opt, |
446 | struct netlink_ext_ack *extack) | |
d4b36210 VS |
447 | { |
448 | struct pie_sched_data *q = qdisc_priv(sch); | |
449 | ||
450 | pie_params_init(&q->params); | |
451 | pie_vars_init(&q->vars); | |
452 | sch->limit = q->params.limit; | |
453 | ||
cdeabbb8 KC |
454 | q->sch = sch; |
455 | timer_setup(&q->adapt_timer, pie_timer, 0); | |
d4b36210 VS |
456 | |
457 | if (opt) { | |
2030721c | 458 | int err = pie_change(sch, opt, extack); |
d4b36210 VS |
459 | |
460 | if (err) | |
461 | return err; | |
462 | } | |
463 | ||
d5610902 | 464 | mod_timer(&q->adapt_timer, jiffies + HZ / 2); |
d4b36210 VS |
465 | return 0; |
466 | } | |
467 | ||
468 | static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) | |
469 | { | |
470 | struct pie_sched_data *q = qdisc_priv(sch); | |
471 | struct nlattr *opts; | |
472 | ||
ae0be8de | 473 | opts = nla_nest_start_noflag(skb, TCA_OPTIONS); |
ac4a02c5 | 474 | if (!opts) |
d4b36210 VS |
475 | goto nla_put_failure; |
476 | ||
477 | /* convert target from pschedtime to us */ | |
478 | if (nla_put_u32(skb, TCA_PIE_TARGET, | |
ac4a02c5 | 479 | ((u32)PSCHED_TICKS2NS(q->params.target)) / |
d4b36210 VS |
480 | NSEC_PER_USEC) || |
481 | nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || | |
ac4a02c5 LM |
482 | nla_put_u32(skb, TCA_PIE_TUPDATE, |
483 | jiffies_to_usecs(q->params.tupdate)) || | |
d4b36210 VS |
484 | nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || |
485 | nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || | |
486 | nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || | |
cec2975f GR |
487 | nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) || |
488 | nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR, | |
489 | q->params.dq_rate_estimator)) | |
d4b36210 VS |
490 | goto nla_put_failure; |
491 | ||
492 | return nla_nest_end(skb, opts); | |
493 | ||
494 | nla_put_failure: | |
495 | nla_nest_cancel(skb, opts); | |
496 | return -1; | |
d4b36210 VS |
497 | } |
498 | ||
499 | static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | |
500 | { | |
501 | struct pie_sched_data *q = qdisc_priv(sch); | |
502 | struct tc_pie_xstats st = { | |
503 | .prob = q->vars.prob, | |
ac4a02c5 | 504 | .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / |
d4b36210 | 505 | NSEC_PER_USEC, |
d4b36210 VS |
506 | .packets_in = q->stats.packets_in, |
507 | .overlimit = q->stats.overlimit, | |
508 | .maxq = q->stats.maxq, | |
509 | .dropped = q->stats.dropped, | |
510 | .ecn_mark = q->stats.ecn_mark, | |
511 | }; | |
512 | ||
cec2975f GR |
513 | /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ |
514 | st.dq_rate_estimating = q->params.dq_rate_estimator; | |
515 | ||
516 | /* unscale and return dq_rate in bytes per sec */ | |
517 | if (q->params.dq_rate_estimator) | |
518 | st.avg_dq_rate = q->vars.avg_dq_rate * | |
519 | (PSCHED_TICKS_PER_SEC) >> PIE_SCALE; | |
520 | ||
d4b36210 VS |
521 | return gnet_stats_copy_app(d, &st, sizeof(st)); |
522 | } | |
523 | ||
524 | static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) | |
525 | { | |
ac4a02c5 | 526 | struct sk_buff *skb = qdisc_dequeue_head(sch); |
d4b36210 VS |
527 | |
528 | if (!skb) | |
529 | return NULL; | |
530 | ||
531 | pie_process_dequeue(sch, skb); | |
532 | return skb; | |
533 | } | |
534 | ||
535 | static void pie_reset(struct Qdisc *sch) | |
536 | { | |
537 | struct pie_sched_data *q = qdisc_priv(sch); | |
ac4a02c5 | 538 | |
d4b36210 VS |
539 | qdisc_reset_queue(sch); |
540 | pie_vars_init(&q->vars); | |
541 | } | |
542 | ||
543 | static void pie_destroy(struct Qdisc *sch) | |
544 | { | |
545 | struct pie_sched_data *q = qdisc_priv(sch); | |
ac4a02c5 | 546 | |
d4b36210 VS |
547 | q->params.tupdate = 0; |
548 | del_timer_sync(&q->adapt_timer); | |
549 | } | |
550 | ||
551 | static struct Qdisc_ops pie_qdisc_ops __read_mostly = { | |
552 | .id = "pie", | |
553 | .priv_size = sizeof(struct pie_sched_data), | |
554 | .enqueue = pie_qdisc_enqueue, | |
555 | .dequeue = pie_qdisc_dequeue, | |
556 | .peek = qdisc_peek_dequeued, | |
557 | .init = pie_init, | |
558 | .destroy = pie_destroy, | |
559 | .reset = pie_reset, | |
560 | .change = pie_change, | |
561 | .dump = pie_dump, | |
562 | .dump_stats = pie_dump_stats, | |
563 | .owner = THIS_MODULE, | |
564 | }; | |
565 | ||
566 | static int __init pie_module_init(void) | |
567 | { | |
568 | return register_qdisc(&pie_qdisc_ops); | |
569 | } | |
570 | ||
571 | static void __exit pie_module_exit(void) | |
572 | { | |
573 | unregister_qdisc(&pie_qdisc_ops); | |
574 | } | |
575 | ||
576 | module_init(pie_module_init); | |
577 | module_exit(pie_module_exit); | |
578 | ||
579 | MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler"); | |
580 | MODULE_AUTHOR("Vijay Subramanian"); | |
581 | MODULE_AUTHOR("Mythili Prabhu"); | |
582 | MODULE_LICENSE("GPL"); |