]> Git Repo - qemu.git/blame - util/throttle.c
throttle: add the name of the ThrottleGroup to BlockDeviceInfo
[qemu.git] / util / throttle.c
CommitLineData
5ddfffbd
BC
1/*
2 * QEMU throttling infrastructure
3 *
4 * Copyright (C) Nodalink, SARL. 2013
5 *
6 * Author:
7 * Benoît Canet <[email protected]>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 or
12 * (at your option) version 3 of the License.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23#include "qemu/throttle.h"
24#include "qemu/timer.h"
13af91eb 25#include "block/aio.h"
5ddfffbd
BC
26
27/* This function make a bucket leak
28 *
29 * @bkt: the bucket to make leak
30 * @delta_ns: the time delta
31 */
32void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
33{
34 double leak;
35
36 /* compute how much to leak */
37 leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
38
39 /* make the bucket leak */
40 bkt->level = MAX(bkt->level - leak, 0);
41}
42
43/* Calculate the time delta since last leak and make proportionals leaks
44 *
45 * @now: the current timestamp in ns
46 */
47static void throttle_do_leak(ThrottleState *ts, int64_t now)
48{
49 /* compute the time elapsed since the last leak */
50 int64_t delta_ns = now - ts->previous_leak;
51 int i;
52
53 ts->previous_leak = now;
54
55 if (delta_ns <= 0) {
56 return;
57 }
58
59 /* make each bucket leak */
60 for (i = 0; i < BUCKETS_COUNT; i++) {
61 throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
62 }
63}
64
65/* do the real job of computing the time to wait
66 *
67 * @limit: the throttling limit
68 * @extra: the number of operation to delay
69 * @ret: the time to wait in ns
70 */
71static int64_t throttle_do_compute_wait(double limit, double extra)
72{
73 double wait = extra * NANOSECONDS_PER_SECOND;
74 wait /= limit;
75 return wait;
76}
77
78/* This function compute the wait time in ns that a leaky bucket should trigger
79 *
80 * @bkt: the leaky bucket we operate on
81 * @ret: the resulting wait time in ns or 0 if the operation can go through
82 */
83int64_t throttle_compute_wait(LeakyBucket *bkt)
84{
85 double extra; /* the number of extra units blocking the io */
86
87 if (!bkt->avg) {
88 return 0;
89 }
90
91 extra = bkt->level - bkt->max;
92
93 if (extra <= 0) {
94 return 0;
95 }
96
97 return throttle_do_compute_wait(bkt->avg, extra);
98}
99
100/* This function compute the time that must be waited while this IO
101 *
102 * @is_write: true if the current IO is a write, false if it's a read
103 * @ret: time to wait
104 */
105static int64_t throttle_compute_wait_for(ThrottleState *ts,
106 bool is_write)
107{
108 BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
109 THROTTLE_OPS_TOTAL,
110 THROTTLE_BPS_READ,
111 THROTTLE_OPS_READ},
112 {THROTTLE_BPS_TOTAL,
113 THROTTLE_OPS_TOTAL,
114 THROTTLE_BPS_WRITE,
115 THROTTLE_OPS_WRITE}, };
116 int64_t wait, max_wait = 0;
117 int i;
118
119 for (i = 0; i < 4; i++) {
120 BucketType index = to_check[is_write][i];
121 wait = throttle_compute_wait(&ts->cfg.buckets[index]);
122 if (wait > max_wait) {
123 max_wait = wait;
124 }
125 }
126
127 return max_wait;
128}
129
130/* compute the timer for this type of operation
131 *
132 * @is_write: the type of operation
133 * @now: the current clock timestamp
134 * @next_timestamp: the resulting timer
135 * @ret: true if a timer must be set
136 */
137bool throttle_compute_timer(ThrottleState *ts,
138 bool is_write,
139 int64_t now,
140 int64_t *next_timestamp)
141{
142 int64_t wait;
143
144 /* leak proportionally to the time elapsed */
145 throttle_do_leak(ts, now);
146
147 /* compute the wait time if any */
148 wait = throttle_compute_wait_for(ts, is_write);
149
150 /* if the code must wait compute when the next timer should fire */
151 if (wait) {
152 *next_timestamp = now + wait;
153 return true;
154 }
155
156 /* else no need to wait at all */
157 *next_timestamp = now;
158 return false;
159}
160
13af91eb 161/* Add timers to event loop */
0e5b0a2d
BC
162void throttle_timers_attach_aio_context(ThrottleTimers *tt,
163 AioContext *new_context)
13af91eb 164{
0e5b0a2d
BC
165 tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
166 tt->read_timer_cb, tt->timer_opaque);
167 tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
168 tt->write_timer_cb, tt->timer_opaque);
13af91eb
SH
169}
170
5ddfffbd 171/* To be called first on the ThrottleState */
0e5b0a2d 172void throttle_init(ThrottleState *ts)
5ddfffbd
BC
173{
174 memset(ts, 0, sizeof(ThrottleState));
0e5b0a2d
BC
175}
176
177/* To be called first on the ThrottleTimers */
178void throttle_timers_init(ThrottleTimers *tt,
179 AioContext *aio_context,
180 QEMUClockType clock_type,
181 QEMUTimerCB *read_timer_cb,
182 QEMUTimerCB *write_timer_cb,
183 void *timer_opaque)
184{
185 memset(tt, 0, sizeof(ThrottleTimers));
5ddfffbd 186
0e5b0a2d
BC
187 tt->clock_type = clock_type;
188 tt->read_timer_cb = read_timer_cb;
189 tt->write_timer_cb = write_timer_cb;
190 tt->timer_opaque = timer_opaque;
191 throttle_timers_attach_aio_context(tt, aio_context);
5ddfffbd
BC
192}
193
194/* destroy a timer */
195static void throttle_timer_destroy(QEMUTimer **timer)
196{
197 assert(*timer != NULL);
198
199 timer_del(*timer);
200 timer_free(*timer);
201 *timer = NULL;
202}
203
13af91eb 204/* Remove timers from event loop */
0e5b0a2d 205void throttle_timers_detach_aio_context(ThrottleTimers *tt)
5ddfffbd
BC
206{
207 int i;
208
209 for (i = 0; i < 2; i++) {
0e5b0a2d 210 throttle_timer_destroy(&tt->timers[i]);
5ddfffbd
BC
211 }
212}
213
0e5b0a2d
BC
214/* To be called last on the ThrottleTimers */
215void throttle_timers_destroy(ThrottleTimers *tt)
13af91eb 216{
0e5b0a2d 217 throttle_timers_detach_aio_context(tt);
13af91eb
SH
218}
219
5ddfffbd 220/* is any throttling timer configured */
0e5b0a2d 221bool throttle_timers_are_initialized(ThrottleTimers *tt)
5ddfffbd 222{
0e5b0a2d 223 if (tt->timers[0]) {
5ddfffbd
BC
224 return true;
225 }
226
227 return false;
228}
229
230/* Does any throttling must be done
231 *
232 * @cfg: the throttling configuration to inspect
233 * @ret: true if throttling must be done else false
234 */
235bool throttle_enabled(ThrottleConfig *cfg)
236{
237 int i;
238
239 for (i = 0; i < BUCKETS_COUNT; i++) {
240 if (cfg->buckets[i].avg > 0) {
241 return true;
242 }
243 }
244
245 return false;
246}
247
248/* return true if any two throttling parameters conflicts
249 *
250 * @cfg: the throttling configuration to inspect
251 * @ret: true if any conflict detected else false
252 */
253bool throttle_conflicting(ThrottleConfig *cfg)
254{
255 bool bps_flag, ops_flag;
256 bool bps_max_flag, ops_max_flag;
257
258 bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
259 (cfg->buckets[THROTTLE_BPS_READ].avg ||
260 cfg->buckets[THROTTLE_BPS_WRITE].avg);
261
262 ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
263 (cfg->buckets[THROTTLE_OPS_READ].avg ||
264 cfg->buckets[THROTTLE_OPS_WRITE].avg);
265
266 bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
267 (cfg->buckets[THROTTLE_BPS_READ].max ||
268 cfg->buckets[THROTTLE_BPS_WRITE].max);
269
270 ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
271 (cfg->buckets[THROTTLE_OPS_READ].max ||
272 cfg->buckets[THROTTLE_OPS_WRITE].max);
273
274 return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
275}
276
277/* check if a throttling configuration is valid
278 * @cfg: the throttling configuration to inspect
279 * @ret: true if valid else false
280 */
281bool throttle_is_valid(ThrottleConfig *cfg)
282{
283 bool invalid = false;
284 int i;
285
286 for (i = 0; i < BUCKETS_COUNT; i++) {
287 if (cfg->buckets[i].avg < 0) {
288 invalid = true;
289 }
290 }
291
292 for (i = 0; i < BUCKETS_COUNT; i++) {
293 if (cfg->buckets[i].max < 0) {
294 invalid = true;
295 }
296 }
297
298 return !invalid;
299}
300
301/* fix bucket parameters */
302static void throttle_fix_bucket(LeakyBucket *bkt)
303{
304 double min;
305
306 /* zero bucket level */
307 bkt->level = 0;
308
309 /* The following is done to cope with the Linux CFQ block scheduler
310 * which regroup reads and writes by block of 100ms in the guest.
311 * When they are two process one making reads and one making writes cfq
312 * make a pattern looking like the following:
313 * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR
314 * Having a max burst value of 100ms of the average will help smooth the
315 * throttling
316 */
317 min = bkt->avg / 10;
318 if (bkt->avg && !bkt->max) {
319 bkt->max = min;
320 }
321}
322
323/* take care of canceling a timer */
324static void throttle_cancel_timer(QEMUTimer *timer)
325{
326 assert(timer != NULL);
327
328 timer_del(timer);
329}
330
331/* Used to configure the throttle
332 *
333 * @ts: the throttle state we are working on
0e5b0a2d 334 * @tt: the throttle timers we use in this aio context
5ddfffbd
BC
335 * @cfg: the config to set
336 */
0e5b0a2d
BC
337void throttle_config(ThrottleState *ts,
338 ThrottleTimers *tt,
339 ThrottleConfig *cfg)
5ddfffbd
BC
340{
341 int i;
342
343 ts->cfg = *cfg;
344
345 for (i = 0; i < BUCKETS_COUNT; i++) {
346 throttle_fix_bucket(&ts->cfg.buckets[i]);
347 }
348
0e5b0a2d 349 ts->previous_leak = qemu_clock_get_ns(tt->clock_type);
5ddfffbd
BC
350
351 for (i = 0; i < 2; i++) {
0e5b0a2d 352 throttle_cancel_timer(tt->timers[i]);
5ddfffbd
BC
353 }
354}
355
356/* used to get config
357 *
358 * @ts: the throttle state we are working on
359 * @cfg: the config to write
360 */
361void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
362{
363 *cfg = ts->cfg;
364}
365
366
367/* Schedule the read or write timer if needed
368 *
369 * NOTE: this function is not unit tested due to it's usage of timer_mod
370 *
0e5b0a2d 371 * @tt: the timers structure
5ddfffbd
BC
372 * @is_write: the type of operation (read/write)
373 * @ret: true if the timer has been scheduled else false
374 */
0e5b0a2d
BC
375bool throttle_schedule_timer(ThrottleState *ts,
376 ThrottleTimers *tt,
377 bool is_write)
5ddfffbd 378{
0e5b0a2d 379 int64_t now = qemu_clock_get_ns(tt->clock_type);
5ddfffbd
BC
380 int64_t next_timestamp;
381 bool must_wait;
382
383 must_wait = throttle_compute_timer(ts,
384 is_write,
385 now,
386 &next_timestamp);
387
388 /* request not throttled */
389 if (!must_wait) {
390 return false;
391 }
392
393 /* request throttled and timer pending -> do nothing */
0e5b0a2d 394 if (timer_pending(tt->timers[is_write])) {
5ddfffbd
BC
395 return true;
396 }
397
398 /* request throttled and timer not pending -> arm timer */
0e5b0a2d 399 timer_mod(tt->timers[is_write], next_timestamp);
5ddfffbd
BC
400 return true;
401}
402
403/* do the accounting for this operation
404 *
405 * @is_write: the type of operation (read/write)
406 * @size: the size of the operation
407 */
408void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
409{
410 double units = 1.0;
411
412 /* if cfg.op_size is defined and smaller than size we compute unit count */
413 if (ts->cfg.op_size && size > ts->cfg.op_size) {
414 units = (double) size / ts->cfg.op_size;
415 }
416
417 ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
418 ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
419
420 if (is_write) {
421 ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
422 ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
423 } else {
424 ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
425 ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
426 }
427}
428
This page took 0.182998 seconds and 4 git commands to generate.