]>
Commit | Line | Data |
---|---|---|
5ddfffbd BC |
1 | /* |
2 | * QEMU throttling infrastructure | |
3 | * | |
4 | * Copyright (C) Nodalink, SARL. 2013 | |
5 | * | |
6 | * Author: | |
7 | * Benoît Canet <[email protected]> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License as | |
11 | * published by the Free Software Foundation; either version 2 or | |
12 | * (at your option) version 3 of the License. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | * GNU General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public License | |
20 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
21 | */ | |
22 | ||
23 | #include "qemu/throttle.h" | |
24 | #include "qemu/timer.h" | |
25 | ||
26 | /* This function make a bucket leak | |
27 | * | |
28 | * @bkt: the bucket to make leak | |
29 | * @delta_ns: the time delta | |
30 | */ | |
31 | void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) | |
32 | { | |
33 | double leak; | |
34 | ||
35 | /* compute how much to leak */ | |
36 | leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; | |
37 | ||
38 | /* make the bucket leak */ | |
39 | bkt->level = MAX(bkt->level - leak, 0); | |
40 | } | |
41 | ||
42 | /* Calculate the time delta since last leak and make proportionals leaks | |
43 | * | |
44 | * @now: the current timestamp in ns | |
45 | */ | |
46 | static void throttle_do_leak(ThrottleState *ts, int64_t now) | |
47 | { | |
48 | /* compute the time elapsed since the last leak */ | |
49 | int64_t delta_ns = now - ts->previous_leak; | |
50 | int i; | |
51 | ||
52 | ts->previous_leak = now; | |
53 | ||
54 | if (delta_ns <= 0) { | |
55 | return; | |
56 | } | |
57 | ||
58 | /* make each bucket leak */ | |
59 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
60 | throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); | |
61 | } | |
62 | } | |
63 | ||
64 | /* do the real job of computing the time to wait | |
65 | * | |
66 | * @limit: the throttling limit | |
67 | * @extra: the number of operation to delay | |
68 | * @ret: the time to wait in ns | |
69 | */ | |
70 | static int64_t throttle_do_compute_wait(double limit, double extra) | |
71 | { | |
72 | double wait = extra * NANOSECONDS_PER_SECOND; | |
73 | wait /= limit; | |
74 | return wait; | |
75 | } | |
76 | ||
77 | /* This function compute the wait time in ns that a leaky bucket should trigger | |
78 | * | |
79 | * @bkt: the leaky bucket we operate on | |
80 | * @ret: the resulting wait time in ns or 0 if the operation can go through | |
81 | */ | |
82 | int64_t throttle_compute_wait(LeakyBucket *bkt) | |
83 | { | |
84 | double extra; /* the number of extra units blocking the io */ | |
85 | ||
86 | if (!bkt->avg) { | |
87 | return 0; | |
88 | } | |
89 | ||
90 | extra = bkt->level - bkt->max; | |
91 | ||
92 | if (extra <= 0) { | |
93 | return 0; | |
94 | } | |
95 | ||
96 | return throttle_do_compute_wait(bkt->avg, extra); | |
97 | } | |
98 | ||
99 | /* This function compute the time that must be waited while this IO | |
100 | * | |
101 | * @is_write: true if the current IO is a write, false if it's a read | |
102 | * @ret: time to wait | |
103 | */ | |
104 | static int64_t throttle_compute_wait_for(ThrottleState *ts, | |
105 | bool is_write) | |
106 | { | |
107 | BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, | |
108 | THROTTLE_OPS_TOTAL, | |
109 | THROTTLE_BPS_READ, | |
110 | THROTTLE_OPS_READ}, | |
111 | {THROTTLE_BPS_TOTAL, | |
112 | THROTTLE_OPS_TOTAL, | |
113 | THROTTLE_BPS_WRITE, | |
114 | THROTTLE_OPS_WRITE}, }; | |
115 | int64_t wait, max_wait = 0; | |
116 | int i; | |
117 | ||
118 | for (i = 0; i < 4; i++) { | |
119 | BucketType index = to_check[is_write][i]; | |
120 | wait = throttle_compute_wait(&ts->cfg.buckets[index]); | |
121 | if (wait > max_wait) { | |
122 | max_wait = wait; | |
123 | } | |
124 | } | |
125 | ||
126 | return max_wait; | |
127 | } | |
128 | ||
129 | /* compute the timer for this type of operation | |
130 | * | |
131 | * @is_write: the type of operation | |
132 | * @now: the current clock timestamp | |
133 | * @next_timestamp: the resulting timer | |
134 | * @ret: true if a timer must be set | |
135 | */ | |
136 | bool throttle_compute_timer(ThrottleState *ts, | |
137 | bool is_write, | |
138 | int64_t now, | |
139 | int64_t *next_timestamp) | |
140 | { | |
141 | int64_t wait; | |
142 | ||
143 | /* leak proportionally to the time elapsed */ | |
144 | throttle_do_leak(ts, now); | |
145 | ||
146 | /* compute the wait time if any */ | |
147 | wait = throttle_compute_wait_for(ts, is_write); | |
148 | ||
149 | /* if the code must wait compute when the next timer should fire */ | |
150 | if (wait) { | |
151 | *next_timestamp = now + wait; | |
152 | return true; | |
153 | } | |
154 | ||
155 | /* else no need to wait at all */ | |
156 | *next_timestamp = now; | |
157 | return false; | |
158 | } | |
159 | ||
160 | /* To be called first on the ThrottleState */ | |
161 | void throttle_init(ThrottleState *ts, | |
162 | QEMUClockType clock_type, | |
163 | QEMUTimerCB *read_timer_cb, | |
164 | QEMUTimerCB *write_timer_cb, | |
165 | void *timer_opaque) | |
166 | { | |
167 | memset(ts, 0, sizeof(ThrottleState)); | |
168 | ||
169 | ts->clock_type = clock_type; | |
170 | ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque); | |
171 | ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque); | |
172 | } | |
173 | ||
174 | /* destroy a timer */ | |
175 | static void throttle_timer_destroy(QEMUTimer **timer) | |
176 | { | |
177 | assert(*timer != NULL); | |
178 | ||
179 | timer_del(*timer); | |
180 | timer_free(*timer); | |
181 | *timer = NULL; | |
182 | } | |
183 | ||
184 | /* To be called last on the ThrottleState */ | |
185 | void throttle_destroy(ThrottleState *ts) | |
186 | { | |
187 | int i; | |
188 | ||
189 | for (i = 0; i < 2; i++) { | |
190 | throttle_timer_destroy(&ts->timers[i]); | |
191 | } | |
192 | } | |
193 | ||
194 | /* is any throttling timer configured */ | |
195 | bool throttle_have_timer(ThrottleState *ts) | |
196 | { | |
197 | if (ts->timers[0]) { | |
198 | return true; | |
199 | } | |
200 | ||
201 | return false; | |
202 | } | |
203 | ||
204 | /* Does any throttling must be done | |
205 | * | |
206 | * @cfg: the throttling configuration to inspect | |
207 | * @ret: true if throttling must be done else false | |
208 | */ | |
209 | bool throttle_enabled(ThrottleConfig *cfg) | |
210 | { | |
211 | int i; | |
212 | ||
213 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
214 | if (cfg->buckets[i].avg > 0) { | |
215 | return true; | |
216 | } | |
217 | } | |
218 | ||
219 | return false; | |
220 | } | |
221 | ||
222 | /* return true if any two throttling parameters conflicts | |
223 | * | |
224 | * @cfg: the throttling configuration to inspect | |
225 | * @ret: true if any conflict detected else false | |
226 | */ | |
227 | bool throttle_conflicting(ThrottleConfig *cfg) | |
228 | { | |
229 | bool bps_flag, ops_flag; | |
230 | bool bps_max_flag, ops_max_flag; | |
231 | ||
232 | bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && | |
233 | (cfg->buckets[THROTTLE_BPS_READ].avg || | |
234 | cfg->buckets[THROTTLE_BPS_WRITE].avg); | |
235 | ||
236 | ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && | |
237 | (cfg->buckets[THROTTLE_OPS_READ].avg || | |
238 | cfg->buckets[THROTTLE_OPS_WRITE].avg); | |
239 | ||
240 | bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && | |
241 | (cfg->buckets[THROTTLE_BPS_READ].max || | |
242 | cfg->buckets[THROTTLE_BPS_WRITE].max); | |
243 | ||
244 | ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && | |
245 | (cfg->buckets[THROTTLE_OPS_READ].max || | |
246 | cfg->buckets[THROTTLE_OPS_WRITE].max); | |
247 | ||
248 | return bps_flag || ops_flag || bps_max_flag || ops_max_flag; | |
249 | } | |
250 | ||
251 | /* check if a throttling configuration is valid | |
252 | * @cfg: the throttling configuration to inspect | |
253 | * @ret: true if valid else false | |
254 | */ | |
255 | bool throttle_is_valid(ThrottleConfig *cfg) | |
256 | { | |
257 | bool invalid = false; | |
258 | int i; | |
259 | ||
260 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
261 | if (cfg->buckets[i].avg < 0) { | |
262 | invalid = true; | |
263 | } | |
264 | } | |
265 | ||
266 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
267 | if (cfg->buckets[i].max < 0) { | |
268 | invalid = true; | |
269 | } | |
270 | } | |
271 | ||
272 | return !invalid; | |
273 | } | |
274 | ||
275 | /* fix bucket parameters */ | |
276 | static void throttle_fix_bucket(LeakyBucket *bkt) | |
277 | { | |
278 | double min; | |
279 | ||
280 | /* zero bucket level */ | |
281 | bkt->level = 0; | |
282 | ||
283 | /* The following is done to cope with the Linux CFQ block scheduler | |
284 | * which regroup reads and writes by block of 100ms in the guest. | |
285 | * When they are two process one making reads and one making writes cfq | |
286 | * make a pattern looking like the following: | |
287 | * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR | |
288 | * Having a max burst value of 100ms of the average will help smooth the | |
289 | * throttling | |
290 | */ | |
291 | min = bkt->avg / 10; | |
292 | if (bkt->avg && !bkt->max) { | |
293 | bkt->max = min; | |
294 | } | |
295 | } | |
296 | ||
297 | /* take care of canceling a timer */ | |
298 | static void throttle_cancel_timer(QEMUTimer *timer) | |
299 | { | |
300 | assert(timer != NULL); | |
301 | ||
302 | timer_del(timer); | |
303 | } | |
304 | ||
305 | /* Used to configure the throttle | |
306 | * | |
307 | * @ts: the throttle state we are working on | |
308 | * @cfg: the config to set | |
309 | */ | |
310 | void throttle_config(ThrottleState *ts, ThrottleConfig *cfg) | |
311 | { | |
312 | int i; | |
313 | ||
314 | ts->cfg = *cfg; | |
315 | ||
316 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
317 | throttle_fix_bucket(&ts->cfg.buckets[i]); | |
318 | } | |
319 | ||
320 | ts->previous_leak = qemu_clock_get_ns(ts->clock_type); | |
321 | ||
322 | for (i = 0; i < 2; i++) { | |
323 | throttle_cancel_timer(ts->timers[i]); | |
324 | } | |
325 | } | |
326 | ||
327 | /* used to get config | |
328 | * | |
329 | * @ts: the throttle state we are working on | |
330 | * @cfg: the config to write | |
331 | */ | |
332 | void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) | |
333 | { | |
334 | *cfg = ts->cfg; | |
335 | } | |
336 | ||
337 | ||
338 | /* Schedule the read or write timer if needed | |
339 | * | |
340 | * NOTE: this function is not unit tested due to it's usage of timer_mod | |
341 | * | |
342 | * @is_write: the type of operation (read/write) | |
343 | * @ret: true if the timer has been scheduled else false | |
344 | */ | |
345 | bool throttle_schedule_timer(ThrottleState *ts, bool is_write) | |
346 | { | |
347 | int64_t now = qemu_clock_get_ns(ts->clock_type); | |
348 | int64_t next_timestamp; | |
349 | bool must_wait; | |
350 | ||
351 | must_wait = throttle_compute_timer(ts, | |
352 | is_write, | |
353 | now, | |
354 | &next_timestamp); | |
355 | ||
356 | /* request not throttled */ | |
357 | if (!must_wait) { | |
358 | return false; | |
359 | } | |
360 | ||
361 | /* request throttled and timer pending -> do nothing */ | |
362 | if (timer_pending(ts->timers[is_write])) { | |
363 | return true; | |
364 | } | |
365 | ||
366 | /* request throttled and timer not pending -> arm timer */ | |
367 | timer_mod(ts->timers[is_write], next_timestamp); | |
368 | return true; | |
369 | } | |
370 | ||
371 | /* do the accounting for this operation | |
372 | * | |
373 | * @is_write: the type of operation (read/write) | |
374 | * @size: the size of the operation | |
375 | */ | |
376 | void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) | |
377 | { | |
378 | double units = 1.0; | |
379 | ||
380 | /* if cfg.op_size is defined and smaller than size we compute unit count */ | |
381 | if (ts->cfg.op_size && size > ts->cfg.op_size) { | |
382 | units = (double) size / ts->cfg.op_size; | |
383 | } | |
384 | ||
385 | ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; | |
386 | ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; | |
387 | ||
388 | if (is_write) { | |
389 | ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; | |
390 | ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; | |
391 | } else { | |
392 | ts->cfg.buckets[THROTTLE_BPS_READ].level += size; | |
393 | ts->cfg.buckets[THROTTLE_OPS_READ].level += units; | |
394 | } | |
395 | } | |
396 |