]>
Commit | Line | Data |
---|---|---|
2504ba9f | 1 | // SPDX-License-Identifier: GPL-2.0-only |
d4b36210 | 2 | /* Copyright (C) 2013 Cisco Systems, Inc, 2013. |
d4b36210 VS |
3 | * |
4 | * Author: Vijay Subramanian <[email protected]> | |
5 | * Author: Mythili Prabhu <[email protected]> | |
6 | * | |
7 | * ECN support is added by Naeem Khademi <[email protected]> | |
8 | * University of Oslo, Norway. | |
219e288e VS |
9 | * |
10 | * References: | |
24ed4900 | 11 | * RFC 8033: https://tools.ietf.org/html/rfc8033 |
d4b36210 VS |
12 | */ |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/types.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/errno.h> | |
19 | #include <linux/skbuff.h> | |
20 | #include <net/pkt_sched.h> | |
21 | #include <net/inet_ecn.h> | |
84bf557f | 22 | #include <net/pie.h> |
d4b36210 VS |
23 | |
24 | /* private data for the Qdisc */ | |
25 | struct pie_sched_data { | |
d4b36210 | 26 | struct pie_vars vars; |
2dfb1952 | 27 | struct pie_params params; |
d4b36210 VS |
28 | struct pie_stats stats; |
29 | struct timer_list adapt_timer; | |
cdeabbb8 | 30 | struct Qdisc *sch; |
d4b36210 VS |
31 | }; |
32 | ||
5205ea00 | 33 | bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, |
90baeb9d | 34 | struct pie_vars *vars, u32 backlog, u32 packet_size) |
d4b36210 | 35 | { |
3f7ae5f3 | 36 | u64 rnd; |
5205ea00 | 37 | u64 local_prob = vars->prob; |
d4b36210 VS |
38 | u32 mtu = psched_mtu(qdisc_dev(sch)); |
39 | ||
40 | /* If there is still burst allowance left skip random early drop */ | |
5205ea00 | 41 | if (vars->burst_time > 0) |
d4b36210 VS |
42 | return false; |
43 | ||
44 | /* If current delay is less than half of target, and | |
45 | * if drop prob is low already, disable early_drop | |
46 | */ | |
5205ea00 MT |
47 | if ((vars->qdelay < params->target / 2) && |
48 | (vars->prob < MAX_PROB / 5)) | |
d4b36210 VS |
49 | return false; |
50 | ||
5205ea00 | 51 | /* If we have fewer than 2 mtu-sized packets, disable pie_drop_early, |
d4b36210 VS |
52 | * similar to min_th in RED |
53 | */ | |
90baeb9d | 54 | if (backlog < 2 * mtu) |
d4b36210 VS |
55 | return false; |
56 | ||
57 | /* If bytemode is turned on, use packet size to compute new | |
58 | * probablity. Smaller packets will have lower drop prob in this case | |
59 | */ | |
5205ea00 | 60 | if (params->bytemode && packet_size <= mtu) |
3f7ae5f3 | 61 | local_prob = (u64)packet_size * div_u64(local_prob, mtu); |
d4b36210 | 62 | else |
5205ea00 | 63 | local_prob = vars->prob; |
d4b36210 | 64 | |
105e808c | 65 | if (local_prob == 0) |
5205ea00 | 66 | vars->accu_prob = 0; |
105e808c LM |
67 | else |
68 | vars->accu_prob += local_prob; | |
95400b97 | 69 | |
105e808c | 70 | if (vars->accu_prob < (MAX_PROB / 100) * 85) |
95400b97 | 71 | return false; |
105e808c | 72 | if (vars->accu_prob >= (MAX_PROB / 2) * 17) |
95400b97 MT |
73 | return true; |
74 | ||
3f7ae5f3 | 75 | prandom_bytes(&rnd, 8); |
105e808c | 76 | if ((rnd >> BITS_PER_BYTE) < local_prob) { |
5205ea00 | 77 | vars->accu_prob = 0; |
d4b36210 | 78 | return true; |
95400b97 | 79 | } |
d4b36210 VS |
80 | |
81 | return false; | |
82 | } | |
5205ea00 | 83 | EXPORT_SYMBOL_GPL(pie_drop_early); |
d4b36210 | 84 | |
520ac30f ED |
85 | static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
86 | struct sk_buff **to_free) | |
d4b36210 VS |
87 | { |
88 | struct pie_sched_data *q = qdisc_priv(sch); | |
89 | bool enqueue = false; | |
90 | ||
91 | if (unlikely(qdisc_qlen(sch) >= sch->limit)) { | |
92 | q->stats.overlimit++; | |
93 | goto out; | |
94 | } | |
95 | ||
5205ea00 MT |
96 | if (!pie_drop_early(sch, &q->params, &q->vars, sch->qstats.backlog, |
97 | skb->len)) { | |
d4b36210 VS |
98 | enqueue = true; |
99 | } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) && | |
100 | INET_ECN_set_ce(skb)) { | |
101 | /* If packet is ecn capable, mark it if drop probability | |
102 | * is lower than 10%, else drop it. | |
103 | */ | |
104 | q->stats.ecn_mark++; | |
105 | enqueue = true; | |
106 | } | |
107 | ||
108 | /* we can enqueue the packet */ | |
109 | if (enqueue) { | |
cec2975f GR |
110 | /* Set enqueue time only when dq_rate_estimator is disabled. */ |
111 | if (!q->params.dq_rate_estimator) | |
112 | pie_set_enqueue_time(skb); | |
113 | ||
d4b36210 VS |
114 | q->stats.packets_in++; |
115 | if (qdisc_qlen(sch) > q->stats.maxq) | |
116 | q->stats.maxq = qdisc_qlen(sch); | |
117 | ||
118 | return qdisc_enqueue_tail(skb, sch); | |
119 | } | |
120 | ||
121 | out: | |
122 | q->stats.dropped++; | |
95400b97 | 123 | q->vars.accu_prob = 0; |
520ac30f | 124 | return qdisc_drop(skb, sch, to_free); |
d4b36210 VS |
125 | } |
126 | ||
127 | static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { | |
00ea2fb7 MT |
128 | [TCA_PIE_TARGET] = {.type = NLA_U32}, |
129 | [TCA_PIE_LIMIT] = {.type = NLA_U32}, | |
130 | [TCA_PIE_TUPDATE] = {.type = NLA_U32}, | |
131 | [TCA_PIE_ALPHA] = {.type = NLA_U32}, | |
132 | [TCA_PIE_BETA] = {.type = NLA_U32}, | |
133 | [TCA_PIE_ECN] = {.type = NLA_U32}, | |
134 | [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, | |
135 | [TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32}, | |
d4b36210 VS |
136 | }; |
137 | ||
2030721c AA |
138 | static int pie_change(struct Qdisc *sch, struct nlattr *opt, |
139 | struct netlink_ext_ack *extack) | |
d4b36210 VS |
140 | { |
141 | struct pie_sched_data *q = qdisc_priv(sch); | |
142 | struct nlattr *tb[TCA_PIE_MAX + 1]; | |
2ccccf5f | 143 | unsigned int qlen, dropped = 0; |
d4b36210 VS |
144 | int err; |
145 | ||
146 | if (!opt) | |
147 | return -EINVAL; | |
148 | ||
8cb08174 JB |
149 | err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, |
150 | NULL); | |
d4b36210 VS |
151 | if (err < 0) |
152 | return err; | |
153 | ||
154 | sch_tree_lock(sch); | |
155 | ||
156 | /* convert from microseconds to pschedtime */ | |
157 | if (tb[TCA_PIE_TARGET]) { | |
158 | /* target is in us */ | |
159 | u32 target = nla_get_u32(tb[TCA_PIE_TARGET]); | |
160 | ||
161 | /* convert to pschedtime */ | |
162 | q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); | |
163 | } | |
164 | ||
165 | /* tupdate is in jiffies */ | |
166 | if (tb[TCA_PIE_TUPDATE]) | |
ac4a02c5 LM |
167 | q->params.tupdate = |
168 | usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); | |
d4b36210 VS |
169 | |
170 | if (tb[TCA_PIE_LIMIT]) { | |
171 | u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); | |
172 | ||
173 | q->params.limit = limit; | |
174 | sch->limit = limit; | |
175 | } | |
176 | ||
177 | if (tb[TCA_PIE_ALPHA]) | |
178 | q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]); | |
179 | ||
180 | if (tb[TCA_PIE_BETA]) | |
181 | q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]); | |
182 | ||
183 | if (tb[TCA_PIE_ECN]) | |
184 | q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]); | |
185 | ||
186 | if (tb[TCA_PIE_BYTEMODE]) | |
187 | q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); | |
188 | ||
cec2975f GR |
189 | if (tb[TCA_PIE_DQ_RATE_ESTIMATOR]) |
190 | q->params.dq_rate_estimator = | |
191 | nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]); | |
192 | ||
d4b36210 VS |
193 | /* Drop excess packets if new limit is lower */ |
194 | qlen = sch->q.qlen; | |
195 | while (sch->q.qlen > sch->limit) { | |
ed760cb8 | 196 | struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); |
d4b36210 | 197 | |
2ccccf5f | 198 | dropped += qdisc_pkt_len(skb); |
25331d6c | 199 | qdisc_qstats_backlog_dec(sch, skb); |
db4879d9 | 200 | rtnl_qdisc_drop(skb, sch); |
d4b36210 | 201 | } |
2ccccf5f | 202 | qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); |
d4b36210 VS |
203 | |
204 | sch_tree_unlock(sch); | |
205 | return 0; | |
206 | } | |
207 | ||
5205ea00 | 208 | void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, |
90baeb9d | 209 | struct pie_vars *vars, u32 backlog) |
d4b36210 | 210 | { |
cec2975f GR |
211 | psched_time_t now = psched_get_time(); |
212 | u32 dtime = 0; | |
213 | ||
214 | /* If dq_rate_estimator is disabled, calculate qdelay using the | |
215 | * packet timestamp. | |
216 | */ | |
5205ea00 MT |
217 | if (!params->dq_rate_estimator) { |
218 | vars->qdelay = now - pie_get_enqueue_time(skb); | |
cec2975f | 219 | |
5205ea00 MT |
220 | if (vars->dq_tstamp != DTIME_INVALID) |
221 | dtime = now - vars->dq_tstamp; | |
cec2975f | 222 | |
5205ea00 | 223 | vars->dq_tstamp = now; |
cec2975f | 224 | |
90baeb9d | 225 | if (backlog == 0) |
5205ea00 | 226 | vars->qdelay = 0; |
cec2975f GR |
227 | |
228 | if (dtime == 0) | |
229 | return; | |
230 | ||
231 | goto burst_allowance_reduction; | |
232 | } | |
d4b36210 VS |
233 | |
234 | /* If current queue is about 10 packets or more and dq_count is unset | |
235 | * we have enough packets to calculate the drain rate. Save | |
236 | * current time as dq_tstamp and start measurement cycle. | |
237 | */ | |
90baeb9d | 238 | if (backlog >= QUEUE_THRESHOLD && vars->dq_count == DQCOUNT_INVALID) { |
5205ea00 MT |
239 | vars->dq_tstamp = psched_get_time(); |
240 | vars->dq_count = 0; | |
d4b36210 VS |
241 | } |
242 | ||
55f780c4 MT |
243 | /* Calculate the average drain rate from this value. If queue length |
244 | * has receded to a small value viz., <= QUEUE_THRESHOLD bytes, reset | |
d4b36210 | 245 | * the dq_count to -1 as we don't have enough packets to calculate the |
55f780c4 | 246 | * drain rate anymore. The following if block is entered only when we |
d4b36210 VS |
247 | * have a substantial queue built up (QUEUE_THRESHOLD bytes or more) |
248 | * and we calculate the drain rate for the threshold here. dq_count is | |
249 | * in bytes, time difference in psched_time, hence rate is in | |
250 | * bytes/psched_time. | |
251 | */ | |
5205ea00 MT |
252 | if (vars->dq_count != DQCOUNT_INVALID) { |
253 | vars->dq_count += skb->len; | |
d4b36210 | 254 | |
5205ea00 MT |
255 | if (vars->dq_count >= QUEUE_THRESHOLD) { |
256 | u32 count = vars->dq_count << PIE_SCALE; | |
d4b36210 | 257 | |
5205ea00 | 258 | dtime = now - vars->dq_tstamp; |
cec2975f | 259 | |
d4b36210 VS |
260 | if (dtime == 0) |
261 | return; | |
262 | ||
263 | count = count / dtime; | |
264 | ||
5205ea00 MT |
265 | if (vars->avg_dq_rate == 0) |
266 | vars->avg_dq_rate = count; | |
d4b36210 | 267 | else |
5205ea00 MT |
268 | vars->avg_dq_rate = |
269 | (vars->avg_dq_rate - | |
270 | (vars->avg_dq_rate >> 3)) + (count >> 3); | |
d4b36210 VS |
271 | |
272 | /* If the queue has receded below the threshold, we hold | |
273 | * on to the last drain rate calculated, else we reset | |
274 | * dq_count to 0 to re-enter the if block when the next | |
275 | * packet is dequeued | |
276 | */ | |
90baeb9d | 277 | if (backlog < QUEUE_THRESHOLD) { |
5205ea00 | 278 | vars->dq_count = DQCOUNT_INVALID; |
ac4a02c5 | 279 | } else { |
5205ea00 MT |
280 | vars->dq_count = 0; |
281 | vars->dq_tstamp = psched_get_time(); | |
d4b36210 VS |
282 | } |
283 | ||
cec2975f | 284 | goto burst_allowance_reduction; |
d4b36210 VS |
285 | } |
286 | } | |
cec2975f GR |
287 | |
288 | return; | |
289 | ||
290 | burst_allowance_reduction: | |
5205ea00 MT |
291 | if (vars->burst_time > 0) { |
292 | if (vars->burst_time > dtime) | |
293 | vars->burst_time -= dtime; | |
cec2975f | 294 | else |
5205ea00 | 295 | vars->burst_time = 0; |
cec2975f | 296 | } |
d4b36210 | 297 | } |
5205ea00 | 298 | EXPORT_SYMBOL_GPL(pie_process_dequeue); |
d4b36210 | 299 | |
5205ea00 | 300 | void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, |
90baeb9d | 301 | u32 backlog) |
d4b36210 | 302 | { |
d4b36210 | 303 | psched_time_t qdelay = 0; /* in pschedtime */ |
cec2975f | 304 | psched_time_t qdelay_old = 0; /* in pschedtime */ |
3f7ae5f3 MT |
305 | s64 delta = 0; /* determines the change in probability */ |
306 | u64 oldprob; | |
307 | u64 alpha, beta; | |
308 | u32 power; | |
d4b36210 VS |
309 | bool update_prob = true; |
310 | ||
5205ea00 MT |
311 | if (params->dq_rate_estimator) { |
312 | qdelay_old = vars->qdelay; | |
313 | vars->qdelay_old = vars->qdelay; | |
d4b36210 | 314 | |
5205ea00 | 315 | if (vars->avg_dq_rate > 0) |
90baeb9d | 316 | qdelay = (backlog << PIE_SCALE) / vars->avg_dq_rate; |
cec2975f GR |
317 | else |
318 | qdelay = 0; | |
319 | } else { | |
5205ea00 MT |
320 | qdelay = vars->qdelay; |
321 | qdelay_old = vars->qdelay_old; | |
cec2975f | 322 | } |
d4b36210 | 323 | |
90baeb9d | 324 | /* If qdelay is zero and backlog is not, it means backlog is very small, |
55f780c4 | 325 | * so we do not update probabilty in this round. |
d4b36210 | 326 | */ |
90baeb9d | 327 | if (qdelay == 0 && backlog != 0) |
d4b36210 VS |
328 | update_prob = false; |
329 | ||
219e288e VS |
330 | /* In the algorithm, alpha and beta are between 0 and 2 with typical |
331 | * value for alpha as 0.125. In this implementation, we use values 0-32 | |
332 | * passed from user space to represent this. Also, alpha and beta have | |
333 | * unit of HZ and need to be scaled before they can used to update | |
3f7ae5f3 MT |
334 | * probability. alpha/beta are updated locally below by scaling down |
335 | * by 16 to come to 0-2 range. | |
d4b36210 | 336 | */ |
5205ea00 MT |
337 | alpha = ((u64)params->alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; |
338 | beta = ((u64)params->beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; | |
3f7ae5f3 MT |
339 | |
340 | /* We scale alpha and beta differently depending on how heavy the | |
341 | * congestion is. Please see RFC 8033 for details. | |
342 | */ | |
5205ea00 | 343 | if (vars->prob < MAX_PROB / 10) { |
3f7ae5f3 MT |
344 | alpha >>= 1; |
345 | beta >>= 1; | |
346 | ||
347 | power = 100; | |
5205ea00 | 348 | while (vars->prob < div_u64(MAX_PROB, power) && |
3f7ae5f3 MT |
349 | power <= 1000000) { |
350 | alpha >>= 2; | |
351 | beta >>= 2; | |
352 | power *= 10; | |
353 | } | |
d4b36210 VS |
354 | } |
355 | ||
356 | /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ | |
220d4ac7 LM |
357 | delta += alpha * (qdelay - params->target); |
358 | delta += beta * (qdelay - qdelay_old); | |
d4b36210 | 359 | |
5205ea00 | 360 | oldprob = vars->prob; |
d4b36210 VS |
361 | |
362 | /* to ensure we increase probability in steps of no more than 2% */ | |
3f7ae5f3 | 363 | if (delta > (s64)(MAX_PROB / (100 / 2)) && |
5205ea00 | 364 | vars->prob >= MAX_PROB / 10) |
d4b36210 VS |
365 | delta = (MAX_PROB / 100) * 2; |
366 | ||
367 | /* Non-linear drop: | |
368 | * Tune drop probability to increase quickly for high delays(>= 250ms) | |
369 | * 250ms is derived through experiments and provides error protection | |
370 | */ | |
371 | ||
372 | if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) | |
373 | delta += MAX_PROB / (100 / 2); | |
374 | ||
5205ea00 | 375 | vars->prob += delta; |
d4b36210 VS |
376 | |
377 | if (delta > 0) { | |
378 | /* prevent overflow */ | |
5205ea00 MT |
379 | if (vars->prob < oldprob) { |
380 | vars->prob = MAX_PROB; | |
d4b36210 VS |
381 | /* Prevent normalization error. If probability is at |
382 | * maximum value already, we normalize it here, and | |
383 | * skip the check to do a non-linear drop in the next | |
384 | * section. | |
385 | */ | |
386 | update_prob = false; | |
387 | } | |
388 | } else { | |
389 | /* prevent underflow */ | |
5205ea00 MT |
390 | if (vars->prob > oldprob) |
391 | vars->prob = 0; | |
d4b36210 VS |
392 | } |
393 | ||
394 | /* Non-linear drop in probability: Reduce drop probability quickly if | |
395 | * delay is 0 for 2 consecutive Tupdate periods. | |
396 | */ | |
397 | ||
ac4a02c5 | 398 | if (qdelay == 0 && qdelay_old == 0 && update_prob) |
6c97da14 | 399 | /* Reduce drop probability to 98.4% */ |
5205ea00 | 400 | vars->prob -= vars->prob / 64; |
d4b36210 | 401 | |
5205ea00 | 402 | vars->qdelay = qdelay; |
90baeb9d | 403 | vars->backlog_old = backlog; |
d4b36210 VS |
404 | |
405 | /* We restart the measurement cycle if the following conditions are met | |
406 | * 1. If the delay has been low for 2 consecutive Tupdate periods | |
407 | * 2. Calculated drop probability is zero | |
e5a4b17d | 408 | * 3. If average dq_rate_estimator is enabled, we have at least one |
cec2975f | 409 | * estimate for the avg_dq_rate ie., is a non-zero value |
d4b36210 | 410 | */ |
5205ea00 MT |
411 | if ((vars->qdelay < params->target / 2) && |
412 | (vars->qdelay_old < params->target / 2) && | |
413 | vars->prob == 0 && | |
414 | (!params->dq_rate_estimator || vars->avg_dq_rate > 0)) { | |
415 | pie_vars_init(vars); | |
cec2975f GR |
416 | } |
417 | ||
5205ea00 MT |
418 | if (!params->dq_rate_estimator) |
419 | vars->qdelay_old = qdelay; | |
d4b36210 | 420 | } |
5205ea00 | 421 | EXPORT_SYMBOL_GPL(pie_calculate_probability); |
d4b36210 | 422 | |
cdeabbb8 | 423 | static void pie_timer(struct timer_list *t) |
d4b36210 | 424 | { |
cdeabbb8 KC |
425 | struct pie_sched_data *q = from_timer(q, t, adapt_timer); |
426 | struct Qdisc *sch = q->sch; | |
d4b36210 VS |
427 | spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); |
428 | ||
429 | spin_lock(root_lock); | |
5205ea00 | 430 | pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog); |
d4b36210 VS |
431 | |
432 | /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */ | |
433 | if (q->params.tupdate) | |
434 | mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); | |
435 | spin_unlock(root_lock); | |
d4b36210 VS |
436 | } |
437 | ||
e63d7dfd AA |
438 | static int pie_init(struct Qdisc *sch, struct nlattr *opt, |
439 | struct netlink_ext_ack *extack) | |
d4b36210 VS |
440 | { |
441 | struct pie_sched_data *q = qdisc_priv(sch); | |
442 | ||
443 | pie_params_init(&q->params); | |
444 | pie_vars_init(&q->vars); | |
445 | sch->limit = q->params.limit; | |
446 | ||
cdeabbb8 KC |
447 | q->sch = sch; |
448 | timer_setup(&q->adapt_timer, pie_timer, 0); | |
d4b36210 VS |
449 | |
450 | if (opt) { | |
2030721c | 451 | int err = pie_change(sch, opt, extack); |
d4b36210 VS |
452 | |
453 | if (err) | |
454 | return err; | |
455 | } | |
456 | ||
d5610902 | 457 | mod_timer(&q->adapt_timer, jiffies + HZ / 2); |
d4b36210 VS |
458 | return 0; |
459 | } | |
460 | ||
461 | static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) | |
462 | { | |
463 | struct pie_sched_data *q = qdisc_priv(sch); | |
464 | struct nlattr *opts; | |
465 | ||
ae0be8de | 466 | opts = nla_nest_start_noflag(skb, TCA_OPTIONS); |
ac4a02c5 | 467 | if (!opts) |
d4b36210 VS |
468 | goto nla_put_failure; |
469 | ||
470 | /* convert target from pschedtime to us */ | |
471 | if (nla_put_u32(skb, TCA_PIE_TARGET, | |
ac4a02c5 | 472 | ((u32)PSCHED_TICKS2NS(q->params.target)) / |
d4b36210 VS |
473 | NSEC_PER_USEC) || |
474 | nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || | |
ac4a02c5 LM |
475 | nla_put_u32(skb, TCA_PIE_TUPDATE, |
476 | jiffies_to_usecs(q->params.tupdate)) || | |
d4b36210 VS |
477 | nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || |
478 | nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || | |
479 | nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || | |
cec2975f GR |
480 | nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) || |
481 | nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR, | |
482 | q->params.dq_rate_estimator)) | |
d4b36210 VS |
483 | goto nla_put_failure; |
484 | ||
485 | return nla_nest_end(skb, opts); | |
486 | ||
487 | nla_put_failure: | |
488 | nla_nest_cancel(skb, opts); | |
489 | return -1; | |
d4b36210 VS |
490 | } |
491 | ||
492 | static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | |
493 | { | |
494 | struct pie_sched_data *q = qdisc_priv(sch); | |
495 | struct tc_pie_xstats st = { | |
3f95f55e | 496 | .prob = q->vars.prob << BITS_PER_BYTE, |
ac4a02c5 | 497 | .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / |
d4b36210 | 498 | NSEC_PER_USEC, |
d4b36210 VS |
499 | .packets_in = q->stats.packets_in, |
500 | .overlimit = q->stats.overlimit, | |
501 | .maxq = q->stats.maxq, | |
502 | .dropped = q->stats.dropped, | |
503 | .ecn_mark = q->stats.ecn_mark, | |
504 | }; | |
505 | ||
cec2975f GR |
506 | /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ |
507 | st.dq_rate_estimating = q->params.dq_rate_estimator; | |
508 | ||
509 | /* unscale and return dq_rate in bytes per sec */ | |
510 | if (q->params.dq_rate_estimator) | |
511 | st.avg_dq_rate = q->vars.avg_dq_rate * | |
512 | (PSCHED_TICKS_PER_SEC) >> PIE_SCALE; | |
513 | ||
d4b36210 VS |
514 | return gnet_stats_copy_app(d, &st, sizeof(st)); |
515 | } | |
516 | ||
517 | static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) | |
518 | { | |
5205ea00 | 519 | struct pie_sched_data *q = qdisc_priv(sch); |
ac4a02c5 | 520 | struct sk_buff *skb = qdisc_dequeue_head(sch); |
d4b36210 VS |
521 | |
522 | if (!skb) | |
523 | return NULL; | |
524 | ||
5205ea00 | 525 | pie_process_dequeue(skb, &q->params, &q->vars, sch->qstats.backlog); |
d4b36210 VS |
526 | return skb; |
527 | } | |
528 | ||
529 | static void pie_reset(struct Qdisc *sch) | |
530 | { | |
531 | struct pie_sched_data *q = qdisc_priv(sch); | |
ac4a02c5 | 532 | |
d4b36210 VS |
533 | qdisc_reset_queue(sch); |
534 | pie_vars_init(&q->vars); | |
535 | } | |
536 | ||
537 | static void pie_destroy(struct Qdisc *sch) | |
538 | { | |
539 | struct pie_sched_data *q = qdisc_priv(sch); | |
ac4a02c5 | 540 | |
d4b36210 VS |
541 | q->params.tupdate = 0; |
542 | del_timer_sync(&q->adapt_timer); | |
543 | } | |
544 | ||
545 | static struct Qdisc_ops pie_qdisc_ops __read_mostly = { | |
00ea2fb7 | 546 | .id = "pie", |
d4b36210 VS |
547 | .priv_size = sizeof(struct pie_sched_data), |
548 | .enqueue = pie_qdisc_enqueue, | |
549 | .dequeue = pie_qdisc_dequeue, | |
550 | .peek = qdisc_peek_dequeued, | |
551 | .init = pie_init, | |
552 | .destroy = pie_destroy, | |
553 | .reset = pie_reset, | |
554 | .change = pie_change, | |
555 | .dump = pie_dump, | |
556 | .dump_stats = pie_dump_stats, | |
557 | .owner = THIS_MODULE, | |
558 | }; | |
559 | ||
560 | static int __init pie_module_init(void) | |
561 | { | |
562 | return register_qdisc(&pie_qdisc_ops); | |
563 | } | |
564 | ||
565 | static void __exit pie_module_exit(void) | |
566 | { | |
567 | unregister_qdisc(&pie_qdisc_ops); | |
568 | } | |
569 | ||
570 | module_init(pie_module_init); | |
571 | module_exit(pie_module_exit); | |
572 | ||
573 | MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler"); | |
574 | MODULE_AUTHOR("Vijay Subramanian"); | |
575 | MODULE_AUTHOR("Mythili Prabhu"); | |
576 | MODULE_LICENSE("GPL"); |