]>
Commit | Line | Data |
---|---|---|
2ff1f2e3 AG |
1 | /* |
2 | * QEMU block throttling group infrastructure | |
3 | * | |
4 | * Copyright (C) Nodalink, EURL. 2014 | |
5 | * Copyright (C) Igalia, S.L. 2015 | |
6 | * | |
7 | * Authors: | |
8 | * BenoƮt Canet <[email protected]> | |
9 | * Alberto Garcia <[email protected]> | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU General Public License as | |
13 | * published by the Free Software Foundation; either version 2 or | |
14 | * (at your option) version 3 of the License. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
23 | */ | |
24 | ||
80c71a24 | 25 | #include "qemu/osdep.h" |
31dce3cc | 26 | #include "sysemu/block-backend.h" |
2ff1f2e3 | 27 | #include "block/throttle-groups.h" |
432d889e | 28 | #include "qemu/throttle-options.h" |
db725815 | 29 | #include "qemu/main-loop.h" |
76f4afb4 AG |
30 | #include "qemu/queue.h" |
31 | #include "qemu/thread.h" | |
32 | #include "sysemu/qtest.h" | |
432d889e | 33 | #include "qapi/error.h" |
9af23989 | 34 | #include "qapi/qapi-visit-block-core.h" |
432d889e MP |
35 | #include "qom/object.h" |
36 | #include "qom/object_interfaces.h" | |
37 | ||
38 | static void throttle_group_obj_init(Object *obj); | |
39 | static void throttle_group_obj_complete(UserCreatable *obj, Error **errp); | |
25b8e4db | 40 | static void timer_cb(ThrottleGroupMember *tgm, bool is_write); |
2ff1f2e3 AG |
41 | |
42 | /* The ThrottleGroup structure (with its ThrottleState) is shared | |
022cdc9f | 43 | * among different ThrottleGroupMembers and it's independent from |
2ff1f2e3 AG |
44 | * AioContext, so in order to use it from different threads it needs |
45 | * its own locking. | |
46 | * | |
47 | * This locking is however handled internally in this file, so it's | |
d87d01e1 | 48 | * transparent to outside users. |
2ff1f2e3 AG |
49 | * |
50 | * The whole ThrottleGroup structure is private and invisible to | |
51 | * outside users, that only use it through its ThrottleState. | |
52 | * | |
022cdc9f | 53 | * In addition to the ThrottleGroup structure, ThrottleGroupMember has |
2ff1f2e3 | 54 | * fields that need to be accessed by other members of the group and |
27ccdd52 | 55 | * therefore also need to be protected by this lock. Once a |
022cdc9f | 56 | * ThrottleGroupMember is registered in a group those fields can be accessed |
27ccdd52 | 57 | * by other threads any time. |
2ff1f2e3 AG |
58 | * |
59 | * Again, all this is handled internally and is mostly transparent to | |
60 | * the outside. The 'throttle_timers' field however has an additional | |
61 | * constraint because it may be temporarily invalid (see for example | |
0d2fac8e | 62 | * blk_set_aio_context()). Therefore in this file a thread will |
022cdc9f MP |
63 | * access some other ThrottleGroupMember's timers only after verifying that |
64 | * that ThrottleGroupMember has throttled requests in the queue. | |
2ff1f2e3 AG |
65 | */ |
66 | typedef struct ThrottleGroup { | |
432d889e MP |
67 | Object parent_obj; |
68 | ||
69 | /* refuse individual property change if initialization is complete */ | |
70 | bool is_initialized; | |
2ff1f2e3 AG |
71 | char *name; /* This is constant during the lifetime of the group */ |
72 | ||
73 | QemuMutex lock; /* This lock protects the following four fields */ | |
74 | ThrottleState ts; | |
022cdc9f MP |
75 | QLIST_HEAD(, ThrottleGroupMember) head; |
76 | ThrottleGroupMember *tokens[2]; | |
2ff1f2e3 | 77 | bool any_timer_armed[2]; |
dbe824cc | 78 | QEMUClockType clock_type; |
2ff1f2e3 | 79 | |
432d889e | 80 | /* This field is protected by the global QEMU mutex */ |
2ff1f2e3 AG |
81 | QTAILQ_ENTRY(ThrottleGroup) list; |
82 | } ThrottleGroup; | |
83 | ||
432d889e | 84 | /* This is protected by the global QEMU mutex */ |
2ff1f2e3 AG |
85 | static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = |
86 | QTAILQ_HEAD_INITIALIZER(throttle_groups); | |
87 | ||
432d889e MP |
88 | |
89 | /* This function reads throttle_groups and must be called under the global | |
90 | * mutex. | |
91 | */ | |
92 | static ThrottleGroup *throttle_group_by_name(const char *name) | |
93 | { | |
94 | ThrottleGroup *iter; | |
95 | ||
96 | /* Look for an existing group with that name */ | |
97 | QTAILQ_FOREACH(iter, &throttle_groups, list) { | |
98 | if (!g_strcmp0(name, iter->name)) { | |
99 | return iter; | |
100 | } | |
101 | } | |
102 | ||
103 | return NULL; | |
104 | } | |
105 | ||
d8e7d87e MP |
106 | /* This function reads throttle_groups and must be called under the global |
107 | * mutex. | |
108 | */ | |
109 | bool throttle_group_exists(const char *name) | |
110 | { | |
111 | return throttle_group_by_name(name) != NULL; | |
112 | } | |
113 | ||
2ff1f2e3 AG |
114 | /* Increments the reference count of a ThrottleGroup given its name. |
115 | * | |
116 | * If no ThrottleGroup is found with the given name a new one is | |
117 | * created. | |
118 | * | |
432d889e MP |
119 | * This function edits throttle_groups and must be called under the global |
120 | * mutex. | |
121 | * | |
2ff1f2e3 | 122 | * @name: the name of the ThrottleGroup |
973f2ddf | 123 | * @ret: the ThrottleState member of the ThrottleGroup |
2ff1f2e3 | 124 | */ |
973f2ddf | 125 | ThrottleState *throttle_group_incref(const char *name) |
2ff1f2e3 AG |
126 | { |
127 | ThrottleGroup *tg = NULL; | |
2ff1f2e3 AG |
128 | |
129 | /* Look for an existing group with that name */ | |
432d889e MP |
130 | tg = throttle_group_by_name(name); |
131 | ||
132 | if (tg) { | |
133 | object_ref(OBJECT(tg)); | |
134 | } else { | |
135 | /* Create a new one if not found */ | |
136 | /* new ThrottleGroup obj will have a refcnt = 1 */ | |
137 | tg = THROTTLE_GROUP(object_new(TYPE_THROTTLE_GROUP)); | |
2ff1f2e3 | 138 | tg->name = g_strdup(name); |
432d889e | 139 | throttle_group_obj_complete(USER_CREATABLE(tg), &error_abort); |
2ff1f2e3 AG |
140 | } |
141 | ||
973f2ddf | 142 | return &tg->ts; |
2ff1f2e3 AG |
143 | } |
144 | ||
145 | /* Decrease the reference count of a ThrottleGroup. | |
146 | * | |
147 | * When the reference count reaches zero the ThrottleGroup is | |
148 | * destroyed. | |
149 | * | |
432d889e MP |
150 | * This function edits throttle_groups and must be called under the global |
151 | * mutex. | |
152 | * | |
973f2ddf | 153 | * @ts: The ThrottleGroup to unref, given by its ThrottleState member |
2ff1f2e3 | 154 | */ |
973f2ddf | 155 | void throttle_group_unref(ThrottleState *ts) |
2ff1f2e3 | 156 | { |
973f2ddf | 157 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
432d889e | 158 | object_unref(OBJECT(tg)); |
2ff1f2e3 AG |
159 | } |
160 | ||
022cdc9f | 161 | /* Get the name from a ThrottleGroupMember's group. The name (and the pointer) |
49d2165d | 162 | * is guaranteed to remain constant during the lifetime of the group. |
2ff1f2e3 | 163 | * |
022cdc9f | 164 | * @tgm: a ThrottleGroupMember |
2ff1f2e3 AG |
165 | * @ret: the name of the group. |
166 | */ | |
022cdc9f | 167 | const char *throttle_group_get_name(ThrottleGroupMember *tgm) |
2ff1f2e3 | 168 | { |
022cdc9f | 169 | ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); |
2ff1f2e3 AG |
170 | return tg->name; |
171 | } | |
172 | ||
022cdc9f MP |
173 | /* Return the next ThrottleGroupMember in the round-robin sequence, simulating |
174 | * a circular list. | |
2ff1f2e3 AG |
175 | * |
176 | * This assumes that tg->lock is held. | |
177 | * | |
022cdc9f MP |
178 | * @tgm: the current ThrottleGroupMember |
179 | * @ret: the next ThrottleGroupMember in the sequence | |
2ff1f2e3 | 180 | */ |
022cdc9f | 181 | static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm) |
2ff1f2e3 | 182 | { |
022cdc9f | 183 | ThrottleState *ts = tgm->throttle_state; |
2ff1f2e3 | 184 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
022cdc9f | 185 | ThrottleGroupMember *next = QLIST_NEXT(tgm, round_robin); |
2ff1f2e3 AG |
186 | |
187 | if (!next) { | |
31dce3cc | 188 | next = QLIST_FIRST(&tg->head); |
2ff1f2e3 AG |
189 | } |
190 | ||
022cdc9f | 191 | return next; |
2ff1f2e3 AG |
192 | } |
193 | ||
6bf77e1c | 194 | /* |
022cdc9f | 195 | * Return whether a ThrottleGroupMember has pending requests. |
6bf77e1c AG |
196 | * |
197 | * This assumes that tg->lock is held. | |
198 | * | |
022cdc9f MP |
199 | * @tgm: the ThrottleGroupMember |
200 | * @is_write: the type of operation (read/write) | |
201 | * @ret: whether the ThrottleGroupMember has pending requests. | |
6bf77e1c | 202 | */ |
022cdc9f | 203 | static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm, |
6bf77e1c AG |
204 | bool is_write) |
205 | { | |
022cdc9f | 206 | return tgm->pending_reqs[is_write]; |
6bf77e1c AG |
207 | } |
208 | ||
022cdc9f MP |
209 | /* Return the next ThrottleGroupMember in the round-robin sequence with pending |
210 | * I/O requests. | |
76f4afb4 AG |
211 | * |
212 | * This assumes that tg->lock is held. | |
213 | * | |
022cdc9f | 214 | * @tgm: the current ThrottleGroupMember |
76f4afb4 | 215 | * @is_write: the type of operation (read/write) |
022cdc9f MP |
216 | * @ret: the next ThrottleGroupMember with pending requests, or tgm if |
217 | * there is none. | |
76f4afb4 | 218 | */ |
022cdc9f MP |
219 | static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, |
220 | bool is_write) | |
76f4afb4 | 221 | { |
022cdc9f MP |
222 | ThrottleState *ts = tgm->throttle_state; |
223 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
224 | ThrottleGroupMember *token, *start; | |
76f4afb4 | 225 | |
5d8e4ca0 AG |
226 | /* If this member has its I/O limits disabled then it means that |
227 | * it's being drained. Skip the round-robin search and return tgm | |
228 | * immediately if it has pending requests. Otherwise we could be | |
229 | * forcing it to wait for other member's throttled requests. */ | |
230 | if (tgm_has_pending_reqs(tgm, is_write) && | |
231 | atomic_read(&tgm->io_limits_disabled)) { | |
232 | return tgm; | |
233 | } | |
234 | ||
76f4afb4 AG |
235 | start = token = tg->tokens[is_write]; |
236 | ||
237 | /* get next bs round in round robin style */ | |
022cdc9f MP |
238 | token = throttle_group_next_tgm(token); |
239 | while (token != start && !tgm_has_pending_reqs(token, is_write)) { | |
240 | token = throttle_group_next_tgm(token); | |
76f4afb4 AG |
241 | } |
242 | ||
243 | /* If no IO are queued for scheduling on the next round robin token | |
022cdc9f MP |
244 | * then decide the token is the current tgm because chances are |
245 | * the current tgm got the current request queued. | |
76f4afb4 | 246 | */ |
022cdc9f MP |
247 | if (token == start && !tgm_has_pending_reqs(token, is_write)) { |
248 | token = tgm; | |
76f4afb4 AG |
249 | } |
250 | ||
022cdc9f MP |
251 | /* Either we return the original TGM, or one with pending requests */ |
252 | assert(token == tgm || tgm_has_pending_reqs(token, is_write)); | |
6bf77e1c | 253 | |
76f4afb4 AG |
254 | return token; |
255 | } | |
256 | ||
022cdc9f MP |
257 | /* Check if the next I/O request for a ThrottleGroupMember needs to be |
258 | * throttled or not. If there's no timer set in this group, set one and update | |
259 | * the token accordingly. | |
76f4afb4 AG |
260 | * |
261 | * This assumes that tg->lock is held. | |
262 | * | |
022cdc9f | 263 | * @tgm: the current ThrottleGroupMember |
76f4afb4 AG |
264 | * @is_write: the type of operation (read/write) |
265 | * @ret: whether the I/O request needs to be throttled or not | |
266 | */ | |
022cdc9f MP |
267 | static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, |
268 | bool is_write) | |
76f4afb4 | 269 | { |
022cdc9f | 270 | ThrottleState *ts = tgm->throttle_state; |
76f4afb4 | 271 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
022cdc9f | 272 | ThrottleTimers *tt = &tgm->throttle_timers; |
76f4afb4 AG |
273 | bool must_wait; |
274 | ||
022cdc9f | 275 | if (atomic_read(&tgm->io_limits_disabled)) { |
ce0f1412 PB |
276 | return false; |
277 | } | |
278 | ||
76f4afb4 AG |
279 | /* Check if any of the timers in this group is already armed */ |
280 | if (tg->any_timer_armed[is_write]) { | |
281 | return true; | |
282 | } | |
283 | ||
284 | must_wait = throttle_schedule_timer(ts, tt, is_write); | |
285 | ||
022cdc9f | 286 | /* If a timer just got armed, set tgm as the current token */ |
76f4afb4 | 287 | if (must_wait) { |
022cdc9f | 288 | tg->tokens[is_write] = tgm; |
76f4afb4 AG |
289 | tg->any_timer_armed[is_write] = true; |
290 | } | |
291 | ||
292 | return must_wait; | |
293 | } | |
294 | ||
022cdc9f | 295 | /* Start the next pending I/O request for a ThrottleGroupMember. Return whether |
3b170dc8 PB |
296 | * any request was actually pending. |
297 | * | |
022cdc9f | 298 | * @tgm: the current ThrottleGroupMember |
3b170dc8 PB |
299 | * @is_write: the type of operation (read/write) |
300 | */ | |
022cdc9f | 301 | static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm, |
3b170dc8 PB |
302 | bool is_write) |
303 | { | |
93001e9d | 304 | bool ret; |
3b170dc8 | 305 | |
022cdc9f MP |
306 | qemu_co_mutex_lock(&tgm->throttled_reqs_lock); |
307 | ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]); | |
308 | qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); | |
93001e9d PB |
309 | |
310 | return ret; | |
3b170dc8 PB |
311 | } |
312 | ||
76f4afb4 AG |
313 | /* Look for the next pending I/O request and schedule it. |
314 | * | |
315 | * This assumes that tg->lock is held. | |
316 | * | |
022cdc9f | 317 | * @tgm: the current ThrottleGroupMember |
76f4afb4 AG |
318 | * @is_write: the type of operation (read/write) |
319 | */ | |
022cdc9f | 320 | static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write) |
76f4afb4 | 321 | { |
022cdc9f MP |
322 | ThrottleState *ts = tgm->throttle_state; |
323 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
76f4afb4 | 324 | bool must_wait; |
022cdc9f | 325 | ThrottleGroupMember *token; |
76f4afb4 AG |
326 | |
327 | /* Check if there's any pending request to schedule next */ | |
022cdc9f MP |
328 | token = next_throttle_token(tgm, is_write); |
329 | if (!tgm_has_pending_reqs(token, is_write)) { | |
76f4afb4 AG |
330 | return; |
331 | } | |
332 | ||
333 | /* Set a timer for the request if it needs to be throttled */ | |
334 | must_wait = throttle_group_schedule_timer(token, is_write); | |
335 | ||
336 | /* If it doesn't have to wait, queue it for immediate execution */ | |
337 | if (!must_wait) { | |
022cdc9f | 338 | /* Give preference to requests from the current tgm */ |
76f4afb4 | 339 | if (qemu_in_coroutine() && |
022cdc9f MP |
340 | throttle_group_co_restart_queue(tgm, is_write)) { |
341 | token = tgm; | |
76f4afb4 | 342 | } else { |
022cdc9f | 343 | ThrottleTimers *tt = &token->throttle_timers; |
dbe824cc | 344 | int64_t now = qemu_clock_get_ns(tg->clock_type); |
7258ed93 | 345 | timer_mod(tt->timers[is_write], now); |
76f4afb4 AG |
346 | tg->any_timer_armed[is_write] = true; |
347 | } | |
348 | tg->tokens[is_write] = token; | |
349 | } | |
350 | } | |
351 | ||
352 | /* Check if an I/O request needs to be throttled, wait and set a timer | |
353 | * if necessary, and schedule the next request using a round robin | |
354 | * algorithm. | |
355 | * | |
022cdc9f | 356 | * @tgm: the current ThrottleGroupMember |
76f4afb4 AG |
357 | * @bytes: the number of bytes for this I/O |
358 | * @is_write: the type of operation (read/write) | |
359 | */ | |
022cdc9f | 360 | void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, |
76f4afb4 AG |
361 | unsigned int bytes, |
362 | bool is_write) | |
363 | { | |
364 | bool must_wait; | |
022cdc9f MP |
365 | ThrottleGroupMember *token; |
366 | ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); | |
76f4afb4 AG |
367 | qemu_mutex_lock(&tg->lock); |
368 | ||
369 | /* First we check if this I/O has to be throttled. */ | |
022cdc9f | 370 | token = next_throttle_token(tgm, is_write); |
76f4afb4 AG |
371 | must_wait = throttle_group_schedule_timer(token, is_write); |
372 | ||
373 | /* Wait if there's a timer set or queued requests of this type */ | |
022cdc9f MP |
374 | if (must_wait || tgm->pending_reqs[is_write]) { |
375 | tgm->pending_reqs[is_write]++; | |
76f4afb4 | 376 | qemu_mutex_unlock(&tg->lock); |
022cdc9f MP |
377 | qemu_co_mutex_lock(&tgm->throttled_reqs_lock); |
378 | qemu_co_queue_wait(&tgm->throttled_reqs[is_write], | |
379 | &tgm->throttled_reqs_lock); | |
380 | qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); | |
76f4afb4 | 381 | qemu_mutex_lock(&tg->lock); |
022cdc9f | 382 | tgm->pending_reqs[is_write]--; |
76f4afb4 AG |
383 | } |
384 | ||
385 | /* The I/O will be executed, so do the accounting */ | |
022cdc9f | 386 | throttle_account(tgm->throttle_state, is_write, bytes); |
76f4afb4 AG |
387 | |
388 | /* Schedule the next request */ | |
022cdc9f | 389 | schedule_next_request(tgm, is_write); |
76f4afb4 AG |
390 | |
391 | qemu_mutex_unlock(&tg->lock); | |
392 | } | |
393 | ||
3b170dc8 | 394 | typedef struct { |
022cdc9f | 395 | ThrottleGroupMember *tgm; |
3b170dc8 PB |
396 | bool is_write; |
397 | } RestartData; | |
398 | ||
399 | static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) | |
7258ed93 | 400 | { |
3b170dc8 | 401 | RestartData *data = opaque; |
022cdc9f MP |
402 | ThrottleGroupMember *tgm = data->tgm; |
403 | ThrottleState *ts = tgm->throttle_state; | |
404 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
3b170dc8 | 405 | bool is_write = data->is_write; |
7258ed93 PB |
406 | bool empty_queue; |
407 | ||
022cdc9f | 408 | empty_queue = !throttle_group_co_restart_queue(tgm, is_write); |
7258ed93 PB |
409 | |
410 | /* If the request queue was empty then we have to take care of | |
411 | * scheduling the next one */ | |
412 | if (empty_queue) { | |
413 | qemu_mutex_lock(&tg->lock); | |
022cdc9f | 414 | schedule_next_request(tgm, is_write); |
7258ed93 PB |
415 | qemu_mutex_unlock(&tg->lock); |
416 | } | |
43a5dc02 MP |
417 | |
418 | g_free(data); | |
bc19a0a6 SH |
419 | |
420 | atomic_dec(&tgm->restart_pending); | |
421 | aio_wait_kick(); | |
7258ed93 PB |
422 | } |
423 | ||
022cdc9f | 424 | static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) |
3b170dc8 PB |
425 | { |
426 | Coroutine *co; | |
43a5dc02 MP |
427 | RestartData *rd = g_new0(RestartData, 1); |
428 | ||
429 | rd->tgm = tgm; | |
430 | rd->is_write = is_write; | |
3b170dc8 | 431 | |
25b8e4db AG |
432 | /* This function is called when a timer is fired or when |
433 | * throttle_group_restart_tgm() is called. Either way, there can | |
434 | * be no timer pending on this tgm at this point */ | |
435 | assert(!timer_pending(tgm->throttle_timers.timers[is_write])); | |
436 | ||
bc19a0a6 SH |
437 | atomic_inc(&tgm->restart_pending); |
438 | ||
43a5dc02 | 439 | co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); |
c61791fc | 440 | aio_co_enter(tgm->aio_context, co); |
3b170dc8 PB |
441 | } |
442 | ||
022cdc9f | 443 | void throttle_group_restart_tgm(ThrottleGroupMember *tgm) |
a72f6414 | 444 | { |
25b8e4db AG |
445 | int i; |
446 | ||
022cdc9f | 447 | if (tgm->throttle_state) { |
25b8e4db AG |
448 | for (i = 0; i < 2; i++) { |
449 | QEMUTimer *t = tgm->throttle_timers.timers[i]; | |
450 | if (timer_pending(t)) { | |
451 | /* If there's a pending timer on this tgm, fire it now */ | |
452 | timer_del(t); | |
453 | timer_cb(tgm, i); | |
454 | } else { | |
455 | /* Else run the next request from the queue manually */ | |
456 | throttle_group_restart_queue(tgm, i); | |
457 | } | |
458 | } | |
a72f6414 PB |
459 | } |
460 | } | |
461 | ||
2ff1f2e3 AG |
462 | /* Update the throttle configuration for a particular group. Similar |
463 | * to throttle_config(), but guarantees atomicity within the | |
464 | * throttling group. | |
465 | * | |
022cdc9f | 466 | * @tgm: a ThrottleGroupMember that is a member of the group |
2ff1f2e3 AG |
467 | * @cfg: the configuration to set |
468 | */ | |
022cdc9f | 469 | void throttle_group_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg) |
2ff1f2e3 | 470 | { |
022cdc9f | 471 | ThrottleState *ts = tgm->throttle_state; |
2ff1f2e3 AG |
472 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
473 | qemu_mutex_lock(&tg->lock); | |
27e4cf13 | 474 | throttle_config(ts, tg->clock_type, cfg); |
2ff1f2e3 | 475 | qemu_mutex_unlock(&tg->lock); |
a72f6414 | 476 | |
022cdc9f | 477 | throttle_group_restart_tgm(tgm); |
2ff1f2e3 AG |
478 | } |
479 | ||
480 | /* Get the throttle configuration from a particular group. Similar to | |
481 | * throttle_get_config(), but guarantees atomicity within the | |
482 | * throttling group. | |
483 | * | |
022cdc9f | 484 | * @tgm: a ThrottleGroupMember that is a member of the group |
2ff1f2e3 AG |
485 | * @cfg: the configuration will be written here |
486 | */ | |
022cdc9f | 487 | void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg) |
2ff1f2e3 | 488 | { |
022cdc9f | 489 | ThrottleState *ts = tgm->throttle_state; |
2ff1f2e3 AG |
490 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
491 | qemu_mutex_lock(&tg->lock); | |
492 | throttle_get_config(ts, cfg); | |
493 | qemu_mutex_unlock(&tg->lock); | |
494 | } | |
495 | ||
76f4afb4 AG |
496 | /* ThrottleTimers callback. This wakes up a request that was waiting |
497 | * because it had been throttled. | |
498 | * | |
c61791fc | 499 | * @tgm: the ThrottleGroupMember whose request had been throttled |
76f4afb4 AG |
500 | * @is_write: the type of operation (read/write) |
501 | */ | |
c61791fc | 502 | static void timer_cb(ThrottleGroupMember *tgm, bool is_write) |
76f4afb4 | 503 | { |
022cdc9f | 504 | ThrottleState *ts = tgm->throttle_state; |
76f4afb4 | 505 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
76f4afb4 AG |
506 | |
507 | /* The timer has just been fired, so we can update the flag */ | |
508 | qemu_mutex_lock(&tg->lock); | |
509 | tg->any_timer_armed[is_write] = false; | |
510 | qemu_mutex_unlock(&tg->lock); | |
511 | ||
512 | /* Run the request that was waiting for this timer */ | |
022cdc9f | 513 | throttle_group_restart_queue(tgm, is_write); |
76f4afb4 AG |
514 | } |
515 | ||
516 | static void read_timer_cb(void *opaque) | |
517 | { | |
518 | timer_cb(opaque, false); | |
519 | } | |
520 | ||
521 | static void write_timer_cb(void *opaque) | |
522 | { | |
523 | timer_cb(opaque, true); | |
524 | } | |
525 | ||
022cdc9f MP |
526 | /* Register a ThrottleGroupMember from the throttling group, also initializing |
527 | * its timers and updating its throttle_state pointer to point to it. If a | |
31dce3cc | 528 | * throttling group with that name does not exist yet, it will be created. |
2ff1f2e3 | 529 | * |
432d889e MP |
530 | * This function edits throttle_groups and must be called under the global |
531 | * mutex. | |
532 | * | |
022cdc9f | 533 | * @tgm: the ThrottleGroupMember to insert |
2ff1f2e3 | 534 | * @groupname: the name of the group |
c61791fc | 535 | * @ctx: the AioContext to use |
2ff1f2e3 | 536 | */ |
022cdc9f | 537 | void throttle_group_register_tgm(ThrottleGroupMember *tgm, |
c61791fc MP |
538 | const char *groupname, |
539 | AioContext *ctx) | |
2ff1f2e3 AG |
540 | { |
541 | int i; | |
973f2ddf HR |
542 | ThrottleState *ts = throttle_group_incref(groupname); |
543 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
022cdc9f MP |
544 | |
545 | tgm->throttle_state = ts; | |
c61791fc | 546 | tgm->aio_context = ctx; |
bc19a0a6 | 547 | atomic_set(&tgm->restart_pending, 0); |
2ff1f2e3 AG |
548 | |
549 | qemu_mutex_lock(&tg->lock); | |
022cdc9f | 550 | /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ |
2ff1f2e3 AG |
551 | for (i = 0; i < 2; i++) { |
552 | if (!tg->tokens[i]) { | |
022cdc9f | 553 | tg->tokens[i] = tgm; |
2ff1f2e3 AG |
554 | } |
555 | } | |
556 | ||
022cdc9f | 557 | QLIST_INSERT_HEAD(&tg->head, tgm, round_robin); |
76f4afb4 | 558 | |
022cdc9f | 559 | throttle_timers_init(&tgm->throttle_timers, |
c61791fc | 560 | tgm->aio_context, |
dbe824cc | 561 | tg->clock_type, |
76f4afb4 AG |
562 | read_timer_cb, |
563 | write_timer_cb, | |
c61791fc | 564 | tgm); |
f738cfc8 MP |
565 | qemu_co_mutex_init(&tgm->throttled_reqs_lock); |
566 | qemu_co_queue_init(&tgm->throttled_reqs[0]); | |
567 | qemu_co_queue_init(&tgm->throttled_reqs[1]); | |
76f4afb4 | 568 | |
2ff1f2e3 AG |
569 | qemu_mutex_unlock(&tg->lock); |
570 | } | |
571 | ||
022cdc9f | 572 | /* Unregister a ThrottleGroupMember from its group, removing it from the list, |
31dce3cc | 573 | * destroying the timers and setting the throttle_state pointer to NULL. |
2ff1f2e3 | 574 | * |
022cdc9f MP |
575 | * The ThrottleGroupMember must not have pending throttled requests, so the |
576 | * caller has to drain them first. | |
5ac72418 | 577 | * |
2ff1f2e3 AG |
578 | * The group will be destroyed if it's empty after this operation. |
579 | * | |
022cdc9f | 580 | * @tgm the ThrottleGroupMember to remove |
2ff1f2e3 | 581 | */ |
022cdc9f | 582 | void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) |
2ff1f2e3 | 583 | { |
022cdc9f MP |
584 | ThrottleState *ts = tgm->throttle_state; |
585 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
586 | ThrottleGroupMember *token; | |
2ff1f2e3 AG |
587 | int i; |
588 | ||
d8e7d87e MP |
589 | if (!ts) { |
590 | /* Discard already unregistered tgm */ | |
591 | return; | |
592 | } | |
593 | ||
bc19a0a6 SH |
594 | /* Wait for throttle_group_restart_queue_entry() coroutines to finish */ |
595 | AIO_WAIT_WHILE(tgm->aio_context, atomic_read(&tgm->restart_pending) > 0); | |
596 | ||
2ff1f2e3 AG |
597 | qemu_mutex_lock(&tg->lock); |
598 | for (i = 0; i < 2; i++) { | |
25b8e4db AG |
599 | assert(tgm->pending_reqs[i] == 0); |
600 | assert(qemu_co_queue_empty(&tgm->throttled_reqs[i])); | |
601 | assert(!timer_pending(tgm->throttle_timers.timers[i])); | |
022cdc9f MP |
602 | if (tg->tokens[i] == tgm) { |
603 | token = throttle_group_next_tgm(tgm); | |
604 | /* Take care of the case where this is the last tgm in the group */ | |
605 | if (token == tgm) { | |
2ff1f2e3 AG |
606 | token = NULL; |
607 | } | |
608 | tg->tokens[i] = token; | |
609 | } | |
610 | } | |
611 | ||
022cdc9f MP |
612 | /* remove the current tgm from the list */ |
613 | QLIST_REMOVE(tgm, round_robin); | |
614 | throttle_timers_destroy(&tgm->throttle_timers); | |
2ff1f2e3 AG |
615 | qemu_mutex_unlock(&tg->lock); |
616 | ||
973f2ddf | 617 | throttle_group_unref(&tg->ts); |
022cdc9f | 618 | tgm->throttle_state = NULL; |
2ff1f2e3 AG |
619 | } |
620 | ||
c61791fc MP |
621 | void throttle_group_attach_aio_context(ThrottleGroupMember *tgm, |
622 | AioContext *new_context) | |
623 | { | |
624 | ThrottleTimers *tt = &tgm->throttle_timers; | |
625 | throttle_timers_attach_aio_context(tt, new_context); | |
626 | tgm->aio_context = new_context; | |
627 | } | |
628 | ||
629 | void throttle_group_detach_aio_context(ThrottleGroupMember *tgm) | |
630 | { | |
341e0b56 | 631 | ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); |
c61791fc | 632 | ThrottleTimers *tt = &tgm->throttle_timers; |
341e0b56 | 633 | int i; |
dc868fb0 SH |
634 | |
635 | /* Requests must have been drained */ | |
636 | assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0); | |
637 | assert(qemu_co_queue_empty(&tgm->throttled_reqs[0])); | |
638 | assert(qemu_co_queue_empty(&tgm->throttled_reqs[1])); | |
639 | ||
341e0b56 SH |
640 | /* Kick off next ThrottleGroupMember, if necessary */ |
641 | qemu_mutex_lock(&tg->lock); | |
642 | for (i = 0; i < 2; i++) { | |
643 | if (timer_pending(tt->timers[i])) { | |
644 | tg->any_timer_armed[i] = false; | |
645 | schedule_next_request(tgm, i); | |
646 | } | |
647 | } | |
648 | qemu_mutex_unlock(&tg->lock); | |
649 | ||
c61791fc MP |
650 | throttle_timers_detach_aio_context(tt); |
651 | tgm->aio_context = NULL; | |
652 | } | |
653 | ||
432d889e MP |
654 | #undef THROTTLE_OPT_PREFIX |
655 | #define THROTTLE_OPT_PREFIX "x-" | |
656 | ||
657 | /* Helper struct and array for QOM property setter/getter */ | |
658 | typedef struct { | |
659 | const char *name; | |
660 | BucketType type; | |
661 | enum { | |
662 | AVG, | |
663 | MAX, | |
664 | BURST_LENGTH, | |
665 | IOPS_SIZE, | |
666 | } category; | |
667 | } ThrottleParamInfo; | |
668 | ||
669 | static ThrottleParamInfo properties[] = { | |
670 | { | |
671 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL, | |
672 | THROTTLE_OPS_TOTAL, AVG, | |
673 | }, | |
674 | { | |
675 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX, | |
676 | THROTTLE_OPS_TOTAL, MAX, | |
677 | }, | |
678 | { | |
679 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX_LENGTH, | |
680 | THROTTLE_OPS_TOTAL, BURST_LENGTH, | |
681 | }, | |
682 | { | |
683 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ, | |
684 | THROTTLE_OPS_READ, AVG, | |
685 | }, | |
686 | { | |
687 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX, | |
688 | THROTTLE_OPS_READ, MAX, | |
689 | }, | |
690 | { | |
691 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX_LENGTH, | |
692 | THROTTLE_OPS_READ, BURST_LENGTH, | |
693 | }, | |
694 | { | |
695 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE, | |
696 | THROTTLE_OPS_WRITE, AVG, | |
697 | }, | |
698 | { | |
699 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX, | |
700 | THROTTLE_OPS_WRITE, MAX, | |
701 | }, | |
702 | { | |
703 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX_LENGTH, | |
704 | THROTTLE_OPS_WRITE, BURST_LENGTH, | |
705 | }, | |
706 | { | |
707 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL, | |
708 | THROTTLE_BPS_TOTAL, AVG, | |
709 | }, | |
710 | { | |
711 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX, | |
712 | THROTTLE_BPS_TOTAL, MAX, | |
713 | }, | |
714 | { | |
715 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX_LENGTH, | |
716 | THROTTLE_BPS_TOTAL, BURST_LENGTH, | |
717 | }, | |
718 | { | |
719 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ, | |
720 | THROTTLE_BPS_READ, AVG, | |
721 | }, | |
722 | { | |
723 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX, | |
724 | THROTTLE_BPS_READ, MAX, | |
725 | }, | |
726 | { | |
727 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX_LENGTH, | |
728 | THROTTLE_BPS_READ, BURST_LENGTH, | |
729 | }, | |
730 | { | |
731 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE, | |
732 | THROTTLE_BPS_WRITE, AVG, | |
733 | }, | |
734 | { | |
735 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX, | |
736 | THROTTLE_BPS_WRITE, MAX, | |
737 | }, | |
738 | { | |
739 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX_LENGTH, | |
740 | THROTTLE_BPS_WRITE, BURST_LENGTH, | |
741 | }, | |
742 | { | |
743 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_SIZE, | |
744 | 0, IOPS_SIZE, | |
745 | } | |
746 | }; | |
747 | ||
748 | /* This function edits throttle_groups and must be called under the global | |
749 | * mutex */ | |
750 | static void throttle_group_obj_init(Object *obj) | |
751 | { | |
752 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
753 | ||
754 | tg->clock_type = QEMU_CLOCK_REALTIME; | |
755 | if (qtest_enabled()) { | |
756 | /* For testing block IO throttling only */ | |
757 | tg->clock_type = QEMU_CLOCK_VIRTUAL; | |
758 | } | |
759 | tg->is_initialized = false; | |
760 | qemu_mutex_init(&tg->lock); | |
761 | throttle_init(&tg->ts); | |
762 | QLIST_INIT(&tg->head); | |
763 | } | |
764 | ||
765 | /* This function edits throttle_groups and must be called under the global | |
766 | * mutex */ | |
767 | static void throttle_group_obj_complete(UserCreatable *obj, Error **errp) | |
768 | { | |
769 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
770 | ThrottleConfig cfg; | |
771 | ||
772 | /* set group name to object id if it exists */ | |
773 | if (!tg->name && tg->parent_obj.parent) { | |
774 | tg->name = object_get_canonical_path_component(OBJECT(obj)); | |
775 | } | |
776 | /* We must have a group name at this point */ | |
777 | assert(tg->name); | |
778 | ||
779 | /* error if name is duplicate */ | |
d8e7d87e | 780 | if (throttle_group_exists(tg->name)) { |
432d889e MP |
781 | error_setg(errp, "A group with this name already exists"); |
782 | return; | |
783 | } | |
784 | ||
785 | /* check validity */ | |
786 | throttle_get_config(&tg->ts, &cfg); | |
787 | if (!throttle_is_valid(&cfg, errp)) { | |
788 | return; | |
789 | } | |
790 | throttle_config(&tg->ts, tg->clock_type, &cfg); | |
791 | QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); | |
792 | tg->is_initialized = true; | |
793 | } | |
794 | ||
795 | /* This function edits throttle_groups and must be called under the global | |
796 | * mutex */ | |
797 | static void throttle_group_obj_finalize(Object *obj) | |
798 | { | |
799 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
800 | if (tg->is_initialized) { | |
801 | QTAILQ_REMOVE(&throttle_groups, tg, list); | |
802 | } | |
803 | qemu_mutex_destroy(&tg->lock); | |
804 | g_free(tg->name); | |
805 | } | |
806 | ||
807 | static void throttle_group_set(Object *obj, Visitor *v, const char * name, | |
808 | void *opaque, Error **errp) | |
809 | ||
810 | { | |
811 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
812 | ThrottleConfig *cfg; | |
813 | ThrottleParamInfo *info = opaque; | |
814 | Error *local_err = NULL; | |
815 | int64_t value; | |
816 | ||
817 | /* If we have finished initialization, don't accept individual property | |
818 | * changes through QOM. Throttle configuration limits must be set in one | |
819 | * transaction, as certain combinations are invalid. | |
820 | */ | |
821 | if (tg->is_initialized) { | |
822 | error_setg(&local_err, "Property cannot be set after initialization"); | |
823 | goto ret; | |
824 | } | |
825 | ||
826 | visit_type_int64(v, name, &value, &local_err); | |
827 | if (local_err) { | |
828 | goto ret; | |
829 | } | |
830 | if (value < 0) { | |
831 | error_setg(&local_err, "Property values cannot be negative"); | |
832 | goto ret; | |
833 | } | |
834 | ||
835 | cfg = &tg->ts.cfg; | |
836 | switch (info->category) { | |
837 | case AVG: | |
838 | cfg->buckets[info->type].avg = value; | |
839 | break; | |
840 | case MAX: | |
841 | cfg->buckets[info->type].max = value; | |
842 | break; | |
843 | case BURST_LENGTH: | |
844 | if (value > UINT_MAX) { | |
845 | error_setg(&local_err, "%s value must be in the" | |
846 | "range [0, %u]", info->name, UINT_MAX); | |
847 | goto ret; | |
848 | } | |
849 | cfg->buckets[info->type].burst_length = value; | |
850 | break; | |
851 | case IOPS_SIZE: | |
852 | cfg->op_size = value; | |
853 | break; | |
854 | } | |
855 | ||
856 | ret: | |
857 | error_propagate(errp, local_err); | |
858 | return; | |
859 | ||
860 | } | |
861 | ||
862 | static void throttle_group_get(Object *obj, Visitor *v, const char *name, | |
863 | void *opaque, Error **errp) | |
864 | { | |
865 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
866 | ThrottleConfig cfg; | |
867 | ThrottleParamInfo *info = opaque; | |
868 | int64_t value; | |
869 | ||
870 | throttle_get_config(&tg->ts, &cfg); | |
871 | switch (info->category) { | |
872 | case AVG: | |
873 | value = cfg.buckets[info->type].avg; | |
874 | break; | |
875 | case MAX: | |
876 | value = cfg.buckets[info->type].max; | |
877 | break; | |
878 | case BURST_LENGTH: | |
879 | value = cfg.buckets[info->type].burst_length; | |
880 | break; | |
881 | case IOPS_SIZE: | |
882 | value = cfg.op_size; | |
883 | break; | |
884 | } | |
885 | ||
886 | visit_type_int64(v, name, &value, errp); | |
887 | } | |
888 | ||
889 | static void throttle_group_set_limits(Object *obj, Visitor *v, | |
890 | const char *name, void *opaque, | |
891 | Error **errp) | |
892 | ||
893 | { | |
894 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
895 | ThrottleConfig cfg; | |
896 | ThrottleLimits arg = { 0 }; | |
897 | ThrottleLimits *argp = &arg; | |
898 | Error *local_err = NULL; | |
899 | ||
900 | visit_type_ThrottleLimits(v, name, &argp, &local_err); | |
901 | if (local_err) { | |
902 | goto ret; | |
903 | } | |
904 | qemu_mutex_lock(&tg->lock); | |
905 | throttle_get_config(&tg->ts, &cfg); | |
906 | throttle_limits_to_config(argp, &cfg, &local_err); | |
907 | if (local_err) { | |
908 | goto unlock; | |
909 | } | |
910 | throttle_config(&tg->ts, tg->clock_type, &cfg); | |
911 | ||
912 | unlock: | |
913 | qemu_mutex_unlock(&tg->lock); | |
914 | ret: | |
915 | error_propagate(errp, local_err); | |
916 | return; | |
917 | } | |
918 | ||
919 | static void throttle_group_get_limits(Object *obj, Visitor *v, | |
920 | const char *name, void *opaque, | |
921 | Error **errp) | |
922 | { | |
923 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
924 | ThrottleConfig cfg; | |
925 | ThrottleLimits arg = { 0 }; | |
926 | ThrottleLimits *argp = &arg; | |
927 | ||
928 | qemu_mutex_lock(&tg->lock); | |
929 | throttle_get_config(&tg->ts, &cfg); | |
930 | qemu_mutex_unlock(&tg->lock); | |
931 | ||
932 | throttle_config_to_limits(&cfg, argp); | |
933 | ||
934 | visit_type_ThrottleLimits(v, name, &argp, errp); | |
935 | } | |
936 | ||
937 | static bool throttle_group_can_be_deleted(UserCreatable *uc) | |
938 | { | |
939 | return OBJECT(uc)->ref == 1; | |
940 | } | |
941 | ||
942 | static void throttle_group_obj_class_init(ObjectClass *klass, void *class_data) | |
943 | { | |
944 | size_t i = 0; | |
945 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | |
946 | ||
947 | ucc->complete = throttle_group_obj_complete; | |
948 | ucc->can_be_deleted = throttle_group_can_be_deleted; | |
949 | ||
950 | /* individual properties */ | |
951 | for (i = 0; i < sizeof(properties) / sizeof(ThrottleParamInfo); i++) { | |
952 | object_class_property_add(klass, | |
953 | properties[i].name, | |
954 | "int", | |
955 | throttle_group_get, | |
956 | throttle_group_set, | |
957 | NULL, &properties[i], | |
958 | &error_abort); | |
959 | } | |
960 | ||
961 | /* ThrottleLimits */ | |
962 | object_class_property_add(klass, | |
963 | "limits", "ThrottleLimits", | |
964 | throttle_group_get_limits, | |
965 | throttle_group_set_limits, | |
966 | NULL, NULL, | |
967 | &error_abort); | |
968 | } | |
969 | ||
970 | static const TypeInfo throttle_group_info = { | |
971 | .name = TYPE_THROTTLE_GROUP, | |
972 | .parent = TYPE_OBJECT, | |
973 | .class_init = throttle_group_obj_class_init, | |
974 | .instance_size = sizeof(ThrottleGroup), | |
975 | .instance_init = throttle_group_obj_init, | |
976 | .instance_finalize = throttle_group_obj_finalize, | |
977 | .interfaces = (InterfaceInfo[]) { | |
978 | { TYPE_USER_CREATABLE }, | |
979 | { } | |
980 | }, | |
981 | }; | |
982 | ||
2ff1f2e3 AG |
983 | static void throttle_groups_init(void) |
984 | { | |
432d889e | 985 | type_register_static(&throttle_group_info); |
2ff1f2e3 AG |
986 | } |
987 | ||
432d889e | 988 | type_init(throttle_groups_init); |