]>
Commit | Line | Data |
---|---|---|
5ddfffbd BC |
1 | /* |
2 | * QEMU throttling infrastructure | |
3 | * | |
a291d5d9 AG |
4 | * Copyright (C) Nodalink, EURL. 2013-2014 |
5 | * Copyright (C) Igalia, S.L. 2015 | |
5ddfffbd | 6 | * |
a291d5d9 AG |
7 | * Authors: |
8 | * Benoît Canet <[email protected]> | |
9 | * Alberto Garcia <[email protected]> | |
5ddfffbd BC |
10 | * |
11 | * This program is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU General Public License as | |
13 | * published by the Free Software Foundation; either version 2 or | |
14 | * (at your option) version 3 of the License. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
23 | */ | |
24 | ||
25 | #include "qemu/throttle.h" | |
26 | #include "qemu/timer.h" | |
13af91eb | 27 | #include "block/aio.h" |
5ddfffbd BC |
28 | |
29 | /* This function make a bucket leak | |
30 | * | |
31 | * @bkt: the bucket to make leak | |
32 | * @delta_ns: the time delta | |
33 | */ | |
34 | void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) | |
35 | { | |
36 | double leak; | |
37 | ||
38 | /* compute how much to leak */ | |
13566fe3 | 39 | leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; |
5ddfffbd BC |
40 | |
41 | /* make the bucket leak */ | |
42 | bkt->level = MAX(bkt->level - leak, 0); | |
43 | } | |
44 | ||
45 | /* Calculate the time delta since last leak and make proportionals leaks | |
46 | * | |
47 | * @now: the current timestamp in ns | |
48 | */ | |
49 | static void throttle_do_leak(ThrottleState *ts, int64_t now) | |
50 | { | |
51 | /* compute the time elapsed since the last leak */ | |
52 | int64_t delta_ns = now - ts->previous_leak; | |
53 | int i; | |
54 | ||
55 | ts->previous_leak = now; | |
56 | ||
57 | if (delta_ns <= 0) { | |
58 | return; | |
59 | } | |
60 | ||
61 | /* make each bucket leak */ | |
62 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
63 | throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); | |
64 | } | |
65 | } | |
66 | ||
67 | /* do the real job of computing the time to wait | |
68 | * | |
69 | * @limit: the throttling limit | |
70 | * @extra: the number of operation to delay | |
71 | * @ret: the time to wait in ns | |
72 | */ | |
73 | static int64_t throttle_do_compute_wait(double limit, double extra) | |
74 | { | |
13566fe3 | 75 | double wait = extra * NANOSECONDS_PER_SECOND; |
5ddfffbd BC |
76 | wait /= limit; |
77 | return wait; | |
78 | } | |
79 | ||
80 | /* This function compute the wait time in ns that a leaky bucket should trigger | |
81 | * | |
82 | * @bkt: the leaky bucket we operate on | |
83 | * @ret: the resulting wait time in ns or 0 if the operation can go through | |
84 | */ | |
85 | int64_t throttle_compute_wait(LeakyBucket *bkt) | |
86 | { | |
87 | double extra; /* the number of extra units blocking the io */ | |
88 | ||
89 | if (!bkt->avg) { | |
90 | return 0; | |
91 | } | |
92 | ||
93 | extra = bkt->level - bkt->max; | |
94 | ||
95 | if (extra <= 0) { | |
96 | return 0; | |
97 | } | |
98 | ||
99 | return throttle_do_compute_wait(bkt->avg, extra); | |
100 | } | |
101 | ||
102 | /* This function compute the time that must be waited while this IO | |
103 | * | |
104 | * @is_write: true if the current IO is a write, false if it's a read | |
105 | * @ret: time to wait | |
106 | */ | |
107 | static int64_t throttle_compute_wait_for(ThrottleState *ts, | |
108 | bool is_write) | |
109 | { | |
110 | BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, | |
111 | THROTTLE_OPS_TOTAL, | |
112 | THROTTLE_BPS_READ, | |
113 | THROTTLE_OPS_READ}, | |
114 | {THROTTLE_BPS_TOTAL, | |
115 | THROTTLE_OPS_TOTAL, | |
116 | THROTTLE_BPS_WRITE, | |
117 | THROTTLE_OPS_WRITE}, }; | |
118 | int64_t wait, max_wait = 0; | |
119 | int i; | |
120 | ||
121 | for (i = 0; i < 4; i++) { | |
122 | BucketType index = to_check[is_write][i]; | |
123 | wait = throttle_compute_wait(&ts->cfg.buckets[index]); | |
124 | if (wait > max_wait) { | |
125 | max_wait = wait; | |
126 | } | |
127 | } | |
128 | ||
129 | return max_wait; | |
130 | } | |
131 | ||
132 | /* compute the timer for this type of operation | |
133 | * | |
134 | * @is_write: the type of operation | |
135 | * @now: the current clock timestamp | |
136 | * @next_timestamp: the resulting timer | |
137 | * @ret: true if a timer must be set | |
138 | */ | |
139 | bool throttle_compute_timer(ThrottleState *ts, | |
140 | bool is_write, | |
141 | int64_t now, | |
142 | int64_t *next_timestamp) | |
143 | { | |
144 | int64_t wait; | |
145 | ||
146 | /* leak proportionally to the time elapsed */ | |
147 | throttle_do_leak(ts, now); | |
148 | ||
149 | /* compute the wait time if any */ | |
150 | wait = throttle_compute_wait_for(ts, is_write); | |
151 | ||
152 | /* if the code must wait compute when the next timer should fire */ | |
153 | if (wait) { | |
154 | *next_timestamp = now + wait; | |
155 | return true; | |
156 | } | |
157 | ||
158 | /* else no need to wait at all */ | |
159 | *next_timestamp = now; | |
160 | return false; | |
161 | } | |
162 | ||
13af91eb | 163 | /* Add timers to event loop */ |
0e5b0a2d BC |
164 | void throttle_timers_attach_aio_context(ThrottleTimers *tt, |
165 | AioContext *new_context) | |
13af91eb | 166 | { |
0e5b0a2d BC |
167 | tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, |
168 | tt->read_timer_cb, tt->timer_opaque); | |
169 | tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, | |
170 | tt->write_timer_cb, tt->timer_opaque); | |
13af91eb SH |
171 | } |
172 | ||
5ddfffbd | 173 | /* To be called first on the ThrottleState */ |
0e5b0a2d | 174 | void throttle_init(ThrottleState *ts) |
5ddfffbd BC |
175 | { |
176 | memset(ts, 0, sizeof(ThrottleState)); | |
0e5b0a2d BC |
177 | } |
178 | ||
179 | /* To be called first on the ThrottleTimers */ | |
180 | void throttle_timers_init(ThrottleTimers *tt, | |
181 | AioContext *aio_context, | |
182 | QEMUClockType clock_type, | |
183 | QEMUTimerCB *read_timer_cb, | |
184 | QEMUTimerCB *write_timer_cb, | |
185 | void *timer_opaque) | |
186 | { | |
187 | memset(tt, 0, sizeof(ThrottleTimers)); | |
5ddfffbd | 188 | |
0e5b0a2d BC |
189 | tt->clock_type = clock_type; |
190 | tt->read_timer_cb = read_timer_cb; | |
191 | tt->write_timer_cb = write_timer_cb; | |
192 | tt->timer_opaque = timer_opaque; | |
193 | throttle_timers_attach_aio_context(tt, aio_context); | |
5ddfffbd BC |
194 | } |
195 | ||
196 | /* destroy a timer */ | |
197 | static void throttle_timer_destroy(QEMUTimer **timer) | |
198 | { | |
199 | assert(*timer != NULL); | |
200 | ||
201 | timer_del(*timer); | |
202 | timer_free(*timer); | |
203 | *timer = NULL; | |
204 | } | |
205 | ||
13af91eb | 206 | /* Remove timers from event loop */ |
0e5b0a2d | 207 | void throttle_timers_detach_aio_context(ThrottleTimers *tt) |
5ddfffbd BC |
208 | { |
209 | int i; | |
210 | ||
211 | for (i = 0; i < 2; i++) { | |
0e5b0a2d | 212 | throttle_timer_destroy(&tt->timers[i]); |
5ddfffbd BC |
213 | } |
214 | } | |
215 | ||
0e5b0a2d BC |
216 | /* To be called last on the ThrottleTimers */ |
217 | void throttle_timers_destroy(ThrottleTimers *tt) | |
13af91eb | 218 | { |
0e5b0a2d | 219 | throttle_timers_detach_aio_context(tt); |
13af91eb SH |
220 | } |
221 | ||
5ddfffbd | 222 | /* is any throttling timer configured */ |
0e5b0a2d | 223 | bool throttle_timers_are_initialized(ThrottleTimers *tt) |
5ddfffbd | 224 | { |
0e5b0a2d | 225 | if (tt->timers[0]) { |
5ddfffbd BC |
226 | return true; |
227 | } | |
228 | ||
229 | return false; | |
230 | } | |
231 | ||
232 | /* Does any throttling must be done | |
233 | * | |
234 | * @cfg: the throttling configuration to inspect | |
235 | * @ret: true if throttling must be done else false | |
236 | */ | |
237 | bool throttle_enabled(ThrottleConfig *cfg) | |
238 | { | |
239 | int i; | |
240 | ||
241 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
242 | if (cfg->buckets[i].avg > 0) { | |
243 | return true; | |
244 | } | |
245 | } | |
246 | ||
247 | return false; | |
248 | } | |
249 | ||
250 | /* return true if any two throttling parameters conflicts | |
251 | * | |
252 | * @cfg: the throttling configuration to inspect | |
253 | * @ret: true if any conflict detected else false | |
254 | */ | |
255 | bool throttle_conflicting(ThrottleConfig *cfg) | |
256 | { | |
257 | bool bps_flag, ops_flag; | |
258 | bool bps_max_flag, ops_max_flag; | |
259 | ||
260 | bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && | |
261 | (cfg->buckets[THROTTLE_BPS_READ].avg || | |
262 | cfg->buckets[THROTTLE_BPS_WRITE].avg); | |
263 | ||
264 | ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && | |
265 | (cfg->buckets[THROTTLE_OPS_READ].avg || | |
266 | cfg->buckets[THROTTLE_OPS_WRITE].avg); | |
267 | ||
268 | bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && | |
269 | (cfg->buckets[THROTTLE_BPS_READ].max || | |
270 | cfg->buckets[THROTTLE_BPS_WRITE].max); | |
271 | ||
272 | ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && | |
273 | (cfg->buckets[THROTTLE_OPS_READ].max || | |
274 | cfg->buckets[THROTTLE_OPS_WRITE].max); | |
275 | ||
276 | return bps_flag || ops_flag || bps_max_flag || ops_max_flag; | |
277 | } | |
278 | ||
279 | /* check if a throttling configuration is valid | |
280 | * @cfg: the throttling configuration to inspect | |
281 | * @ret: true if valid else false | |
282 | */ | |
283 | bool throttle_is_valid(ThrottleConfig *cfg) | |
284 | { | |
5ddfffbd BC |
285 | int i; |
286 | ||
287 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
972606c4 FZ |
288 | if (cfg->buckets[i].avg < 0 || |
289 | cfg->buckets[i].max < 0 || | |
290 | cfg->buckets[i].avg > THROTTLE_VALUE_MAX || | |
291 | cfg->buckets[i].max > THROTTLE_VALUE_MAX) { | |
292 | return false; | |
5ddfffbd BC |
293 | } |
294 | } | |
295 | ||
972606c4 | 296 | return true; |
5ddfffbd BC |
297 | } |
298 | ||
ee2bdc33 SH |
299 | /* check if bps_max/iops_max is used without bps/iops |
300 | * @cfg: the throttling configuration to inspect | |
301 | */ | |
302 | bool throttle_max_is_missing_limit(ThrottleConfig *cfg) | |
303 | { | |
304 | int i; | |
305 | ||
306 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
307 | if (cfg->buckets[i].max && !cfg->buckets[i].avg) { | |
308 | return true; | |
309 | } | |
310 | } | |
311 | return false; | |
312 | } | |
313 | ||
5ddfffbd BC |
314 | /* fix bucket parameters */ |
315 | static void throttle_fix_bucket(LeakyBucket *bkt) | |
316 | { | |
317 | double min; | |
318 | ||
319 | /* zero bucket level */ | |
320 | bkt->level = 0; | |
321 | ||
322 | /* The following is done to cope with the Linux CFQ block scheduler | |
323 | * which regroup reads and writes by block of 100ms in the guest. | |
324 | * When they are two process one making reads and one making writes cfq | |
325 | * make a pattern looking like the following: | |
326 | * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR | |
327 | * Having a max burst value of 100ms of the average will help smooth the | |
328 | * throttling | |
329 | */ | |
330 | min = bkt->avg / 10; | |
331 | if (bkt->avg && !bkt->max) { | |
332 | bkt->max = min; | |
333 | } | |
334 | } | |
335 | ||
336 | /* take care of canceling a timer */ | |
337 | static void throttle_cancel_timer(QEMUTimer *timer) | |
338 | { | |
339 | assert(timer != NULL); | |
340 | ||
341 | timer_del(timer); | |
342 | } | |
343 | ||
344 | /* Used to configure the throttle | |
345 | * | |
346 | * @ts: the throttle state we are working on | |
0e5b0a2d | 347 | * @tt: the throttle timers we use in this aio context |
5ddfffbd BC |
348 | * @cfg: the config to set |
349 | */ | |
0e5b0a2d BC |
350 | void throttle_config(ThrottleState *ts, |
351 | ThrottleTimers *tt, | |
352 | ThrottleConfig *cfg) | |
5ddfffbd BC |
353 | { |
354 | int i; | |
355 | ||
356 | ts->cfg = *cfg; | |
357 | ||
358 | for (i = 0; i < BUCKETS_COUNT; i++) { | |
359 | throttle_fix_bucket(&ts->cfg.buckets[i]); | |
360 | } | |
361 | ||
0e5b0a2d | 362 | ts->previous_leak = qemu_clock_get_ns(tt->clock_type); |
5ddfffbd BC |
363 | |
364 | for (i = 0; i < 2; i++) { | |
0e5b0a2d | 365 | throttle_cancel_timer(tt->timers[i]); |
5ddfffbd BC |
366 | } |
367 | } | |
368 | ||
369 | /* used to get config | |
370 | * | |
371 | * @ts: the throttle state we are working on | |
372 | * @cfg: the config to write | |
373 | */ | |
374 | void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) | |
375 | { | |
376 | *cfg = ts->cfg; | |
377 | } | |
378 | ||
379 | ||
380 | /* Schedule the read or write timer if needed | |
381 | * | |
382 | * NOTE: this function is not unit tested due to it's usage of timer_mod | |
383 | * | |
0e5b0a2d | 384 | * @tt: the timers structure |
5ddfffbd BC |
385 | * @is_write: the type of operation (read/write) |
386 | * @ret: true if the timer has been scheduled else false | |
387 | */ | |
0e5b0a2d BC |
388 | bool throttle_schedule_timer(ThrottleState *ts, |
389 | ThrottleTimers *tt, | |
390 | bool is_write) | |
5ddfffbd | 391 | { |
0e5b0a2d | 392 | int64_t now = qemu_clock_get_ns(tt->clock_type); |
5ddfffbd BC |
393 | int64_t next_timestamp; |
394 | bool must_wait; | |
395 | ||
396 | must_wait = throttle_compute_timer(ts, | |
397 | is_write, | |
398 | now, | |
399 | &next_timestamp); | |
400 | ||
401 | /* request not throttled */ | |
402 | if (!must_wait) { | |
403 | return false; | |
404 | } | |
405 | ||
406 | /* request throttled and timer pending -> do nothing */ | |
0e5b0a2d | 407 | if (timer_pending(tt->timers[is_write])) { |
5ddfffbd BC |
408 | return true; |
409 | } | |
410 | ||
411 | /* request throttled and timer not pending -> arm timer */ | |
0e5b0a2d | 412 | timer_mod(tt->timers[is_write], next_timestamp); |
5ddfffbd BC |
413 | return true; |
414 | } | |
415 | ||
416 | /* do the accounting for this operation | |
417 | * | |
418 | * @is_write: the type of operation (read/write) | |
419 | * @size: the size of the operation | |
420 | */ | |
421 | void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) | |
422 | { | |
423 | double units = 1.0; | |
424 | ||
425 | /* if cfg.op_size is defined and smaller than size we compute unit count */ | |
426 | if (ts->cfg.op_size && size > ts->cfg.op_size) { | |
427 | units = (double) size / ts->cfg.op_size; | |
428 | } | |
429 | ||
430 | ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; | |
431 | ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; | |
432 | ||
433 | if (is_write) { | |
434 | ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; | |
435 | ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; | |
436 | } else { | |
437 | ts->cfg.buckets[THROTTLE_BPS_READ].level += size; | |
438 | ts->cfg.buckets[THROTTLE_OPS_READ].level += units; | |
439 | } | |
440 | } | |
441 |