]>
Commit | Line | Data |
---|---|---|
2ff1f2e3 AG |
1 | /* |
2 | * QEMU block throttling group infrastructure | |
3 | * | |
4 | * Copyright (C) Nodalink, EURL. 2014 | |
5 | * Copyright (C) Igalia, S.L. 2015 | |
6 | * | |
7 | * Authors: | |
8 | * BenoƮt Canet <[email protected]> | |
9 | * Alberto Garcia <[email protected]> | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU General Public License as | |
13 | * published by the Free Software Foundation; either version 2 or | |
14 | * (at your option) version 3 of the License. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
23 | */ | |
24 | ||
80c71a24 | 25 | #include "qemu/osdep.h" |
31dce3cc | 26 | #include "sysemu/block-backend.h" |
2ff1f2e3 | 27 | #include "block/throttle-groups.h" |
432d889e | 28 | #include "qemu/throttle-options.h" |
76f4afb4 AG |
29 | #include "qemu/queue.h" |
30 | #include "qemu/thread.h" | |
31 | #include "sysemu/qtest.h" | |
432d889e | 32 | #include "qapi/error.h" |
9af23989 | 33 | #include "qapi/qapi-visit-block-core.h" |
432d889e MP |
34 | #include "qom/object.h" |
35 | #include "qom/object_interfaces.h" | |
36 | ||
37 | static void throttle_group_obj_init(Object *obj); | |
38 | static void throttle_group_obj_complete(UserCreatable *obj, Error **errp); | |
25b8e4db | 39 | static void timer_cb(ThrottleGroupMember *tgm, bool is_write); |
2ff1f2e3 AG |
40 | |
41 | /* The ThrottleGroup structure (with its ThrottleState) is shared | |
022cdc9f | 42 | * among different ThrottleGroupMembers and it's independent from |
2ff1f2e3 AG |
43 | * AioContext, so in order to use it from different threads it needs |
44 | * its own locking. | |
45 | * | |
46 | * This locking is however handled internally in this file, so it's | |
d87d01e1 | 47 | * transparent to outside users. |
2ff1f2e3 AG |
48 | * |
49 | * The whole ThrottleGroup structure is private and invisible to | |
50 | * outside users, that only use it through its ThrottleState. | |
51 | * | |
022cdc9f | 52 | * In addition to the ThrottleGroup structure, ThrottleGroupMember has |
2ff1f2e3 | 53 | * fields that need to be accessed by other members of the group and |
27ccdd52 | 54 | * therefore also need to be protected by this lock. Once a |
022cdc9f | 55 | * ThrottleGroupMember is registered in a group those fields can be accessed |
27ccdd52 | 56 | * by other threads any time. |
2ff1f2e3 AG |
57 | * |
58 | * Again, all this is handled internally and is mostly transparent to | |
59 | * the outside. The 'throttle_timers' field however has an additional | |
60 | * constraint because it may be temporarily invalid (see for example | |
0d2fac8e | 61 | * blk_set_aio_context()). Therefore in this file a thread will |
022cdc9f MP |
62 | * access some other ThrottleGroupMember's timers only after verifying that |
63 | * that ThrottleGroupMember has throttled requests in the queue. | |
2ff1f2e3 AG |
64 | */ |
65 | typedef struct ThrottleGroup { | |
432d889e MP |
66 | Object parent_obj; |
67 | ||
68 | /* refuse individual property change if initialization is complete */ | |
69 | bool is_initialized; | |
2ff1f2e3 AG |
70 | char *name; /* This is constant during the lifetime of the group */ |
71 | ||
72 | QemuMutex lock; /* This lock protects the following four fields */ | |
73 | ThrottleState ts; | |
022cdc9f MP |
74 | QLIST_HEAD(, ThrottleGroupMember) head; |
75 | ThrottleGroupMember *tokens[2]; | |
2ff1f2e3 | 76 | bool any_timer_armed[2]; |
dbe824cc | 77 | QEMUClockType clock_type; |
2ff1f2e3 | 78 | |
432d889e | 79 | /* This field is protected by the global QEMU mutex */ |
2ff1f2e3 AG |
80 | QTAILQ_ENTRY(ThrottleGroup) list; |
81 | } ThrottleGroup; | |
82 | ||
432d889e | 83 | /* This is protected by the global QEMU mutex */ |
2ff1f2e3 AG |
84 | static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = |
85 | QTAILQ_HEAD_INITIALIZER(throttle_groups); | |
86 | ||
432d889e MP |
87 | |
88 | /* This function reads throttle_groups and must be called under the global | |
89 | * mutex. | |
90 | */ | |
91 | static ThrottleGroup *throttle_group_by_name(const char *name) | |
92 | { | |
93 | ThrottleGroup *iter; | |
94 | ||
95 | /* Look for an existing group with that name */ | |
96 | QTAILQ_FOREACH(iter, &throttle_groups, list) { | |
97 | if (!g_strcmp0(name, iter->name)) { | |
98 | return iter; | |
99 | } | |
100 | } | |
101 | ||
102 | return NULL; | |
103 | } | |
104 | ||
d8e7d87e MP |
105 | /* This function reads throttle_groups and must be called under the global |
106 | * mutex. | |
107 | */ | |
108 | bool throttle_group_exists(const char *name) | |
109 | { | |
110 | return throttle_group_by_name(name) != NULL; | |
111 | } | |
112 | ||
2ff1f2e3 AG |
113 | /* Increments the reference count of a ThrottleGroup given its name. |
114 | * | |
115 | * If no ThrottleGroup is found with the given name a new one is | |
116 | * created. | |
117 | * | |
432d889e MP |
118 | * This function edits throttle_groups and must be called under the global |
119 | * mutex. | |
120 | * | |
2ff1f2e3 | 121 | * @name: the name of the ThrottleGroup |
973f2ddf | 122 | * @ret: the ThrottleState member of the ThrottleGroup |
2ff1f2e3 | 123 | */ |
973f2ddf | 124 | ThrottleState *throttle_group_incref(const char *name) |
2ff1f2e3 AG |
125 | { |
126 | ThrottleGroup *tg = NULL; | |
2ff1f2e3 AG |
127 | |
128 | /* Look for an existing group with that name */ | |
432d889e MP |
129 | tg = throttle_group_by_name(name); |
130 | ||
131 | if (tg) { | |
132 | object_ref(OBJECT(tg)); | |
133 | } else { | |
134 | /* Create a new one if not found */ | |
135 | /* new ThrottleGroup obj will have a refcnt = 1 */ | |
136 | tg = THROTTLE_GROUP(object_new(TYPE_THROTTLE_GROUP)); | |
2ff1f2e3 | 137 | tg->name = g_strdup(name); |
432d889e | 138 | throttle_group_obj_complete(USER_CREATABLE(tg), &error_abort); |
2ff1f2e3 AG |
139 | } |
140 | ||
973f2ddf | 141 | return &tg->ts; |
2ff1f2e3 AG |
142 | } |
143 | ||
144 | /* Decrease the reference count of a ThrottleGroup. | |
145 | * | |
146 | * When the reference count reaches zero the ThrottleGroup is | |
147 | * destroyed. | |
148 | * | |
432d889e MP |
149 | * This function edits throttle_groups and must be called under the global |
150 | * mutex. | |
151 | * | |
973f2ddf | 152 | * @ts: The ThrottleGroup to unref, given by its ThrottleState member |
2ff1f2e3 | 153 | */ |
973f2ddf | 154 | void throttle_group_unref(ThrottleState *ts) |
2ff1f2e3 | 155 | { |
973f2ddf | 156 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
432d889e | 157 | object_unref(OBJECT(tg)); |
2ff1f2e3 AG |
158 | } |
159 | ||
022cdc9f | 160 | /* Get the name from a ThrottleGroupMember's group. The name (and the pointer) |
49d2165d | 161 | * is guaranteed to remain constant during the lifetime of the group. |
2ff1f2e3 | 162 | * |
022cdc9f | 163 | * @tgm: a ThrottleGroupMember |
2ff1f2e3 AG |
164 | * @ret: the name of the group. |
165 | */ | |
022cdc9f | 166 | const char *throttle_group_get_name(ThrottleGroupMember *tgm) |
2ff1f2e3 | 167 | { |
022cdc9f | 168 | ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); |
2ff1f2e3 AG |
169 | return tg->name; |
170 | } | |
171 | ||
022cdc9f MP |
172 | /* Return the next ThrottleGroupMember in the round-robin sequence, simulating |
173 | * a circular list. | |
2ff1f2e3 AG |
174 | * |
175 | * This assumes that tg->lock is held. | |
176 | * | |
022cdc9f MP |
177 | * @tgm: the current ThrottleGroupMember |
178 | * @ret: the next ThrottleGroupMember in the sequence | |
2ff1f2e3 | 179 | */ |
022cdc9f | 180 | static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm) |
2ff1f2e3 | 181 | { |
022cdc9f | 182 | ThrottleState *ts = tgm->throttle_state; |
2ff1f2e3 | 183 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
022cdc9f | 184 | ThrottleGroupMember *next = QLIST_NEXT(tgm, round_robin); |
2ff1f2e3 AG |
185 | |
186 | if (!next) { | |
31dce3cc | 187 | next = QLIST_FIRST(&tg->head); |
2ff1f2e3 AG |
188 | } |
189 | ||
022cdc9f | 190 | return next; |
2ff1f2e3 AG |
191 | } |
192 | ||
6bf77e1c | 193 | /* |
022cdc9f | 194 | * Return whether a ThrottleGroupMember has pending requests. |
6bf77e1c AG |
195 | * |
196 | * This assumes that tg->lock is held. | |
197 | * | |
022cdc9f MP |
198 | * @tgm: the ThrottleGroupMember |
199 | * @is_write: the type of operation (read/write) | |
200 | * @ret: whether the ThrottleGroupMember has pending requests. | |
6bf77e1c | 201 | */ |
022cdc9f | 202 | static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm, |
6bf77e1c AG |
203 | bool is_write) |
204 | { | |
022cdc9f | 205 | return tgm->pending_reqs[is_write]; |
6bf77e1c AG |
206 | } |
207 | ||
022cdc9f MP |
208 | /* Return the next ThrottleGroupMember in the round-robin sequence with pending |
209 | * I/O requests. | |
76f4afb4 AG |
210 | * |
211 | * This assumes that tg->lock is held. | |
212 | * | |
022cdc9f | 213 | * @tgm: the current ThrottleGroupMember |
76f4afb4 | 214 | * @is_write: the type of operation (read/write) |
022cdc9f MP |
215 | * @ret: the next ThrottleGroupMember with pending requests, or tgm if |
216 | * there is none. | |
76f4afb4 | 217 | */ |
022cdc9f MP |
218 | static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm, |
219 | bool is_write) | |
76f4afb4 | 220 | { |
022cdc9f MP |
221 | ThrottleState *ts = tgm->throttle_state; |
222 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
223 | ThrottleGroupMember *token, *start; | |
76f4afb4 | 224 | |
5d8e4ca0 AG |
225 | /* If this member has its I/O limits disabled then it means that |
226 | * it's being drained. Skip the round-robin search and return tgm | |
227 | * immediately if it has pending requests. Otherwise we could be | |
228 | * forcing it to wait for other member's throttled requests. */ | |
229 | if (tgm_has_pending_reqs(tgm, is_write) && | |
230 | atomic_read(&tgm->io_limits_disabled)) { | |
231 | return tgm; | |
232 | } | |
233 | ||
76f4afb4 AG |
234 | start = token = tg->tokens[is_write]; |
235 | ||
236 | /* get next bs round in round robin style */ | |
022cdc9f MP |
237 | token = throttle_group_next_tgm(token); |
238 | while (token != start && !tgm_has_pending_reqs(token, is_write)) { | |
239 | token = throttle_group_next_tgm(token); | |
76f4afb4 AG |
240 | } |
241 | ||
242 | /* If no IO are queued for scheduling on the next round robin token | |
022cdc9f MP |
243 | * then decide the token is the current tgm because chances are |
244 | * the current tgm got the current request queued. | |
76f4afb4 | 245 | */ |
022cdc9f MP |
246 | if (token == start && !tgm_has_pending_reqs(token, is_write)) { |
247 | token = tgm; | |
76f4afb4 AG |
248 | } |
249 | ||
022cdc9f MP |
250 | /* Either we return the original TGM, or one with pending requests */ |
251 | assert(token == tgm || tgm_has_pending_reqs(token, is_write)); | |
6bf77e1c | 252 | |
76f4afb4 AG |
253 | return token; |
254 | } | |
255 | ||
022cdc9f MP |
256 | /* Check if the next I/O request for a ThrottleGroupMember needs to be |
257 | * throttled or not. If there's no timer set in this group, set one and update | |
258 | * the token accordingly. | |
76f4afb4 AG |
259 | * |
260 | * This assumes that tg->lock is held. | |
261 | * | |
022cdc9f | 262 | * @tgm: the current ThrottleGroupMember |
76f4afb4 AG |
263 | * @is_write: the type of operation (read/write) |
264 | * @ret: whether the I/O request needs to be throttled or not | |
265 | */ | |
022cdc9f MP |
266 | static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm, |
267 | bool is_write) | |
76f4afb4 | 268 | { |
022cdc9f | 269 | ThrottleState *ts = tgm->throttle_state; |
76f4afb4 | 270 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
022cdc9f | 271 | ThrottleTimers *tt = &tgm->throttle_timers; |
76f4afb4 AG |
272 | bool must_wait; |
273 | ||
022cdc9f | 274 | if (atomic_read(&tgm->io_limits_disabled)) { |
ce0f1412 PB |
275 | return false; |
276 | } | |
277 | ||
76f4afb4 AG |
278 | /* Check if any of the timers in this group is already armed */ |
279 | if (tg->any_timer_armed[is_write]) { | |
280 | return true; | |
281 | } | |
282 | ||
283 | must_wait = throttle_schedule_timer(ts, tt, is_write); | |
284 | ||
022cdc9f | 285 | /* If a timer just got armed, set tgm as the current token */ |
76f4afb4 | 286 | if (must_wait) { |
022cdc9f | 287 | tg->tokens[is_write] = tgm; |
76f4afb4 AG |
288 | tg->any_timer_armed[is_write] = true; |
289 | } | |
290 | ||
291 | return must_wait; | |
292 | } | |
293 | ||
022cdc9f | 294 | /* Start the next pending I/O request for a ThrottleGroupMember. Return whether |
3b170dc8 PB |
295 | * any request was actually pending. |
296 | * | |
022cdc9f | 297 | * @tgm: the current ThrottleGroupMember |
3b170dc8 PB |
298 | * @is_write: the type of operation (read/write) |
299 | */ | |
022cdc9f | 300 | static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm, |
3b170dc8 PB |
301 | bool is_write) |
302 | { | |
93001e9d | 303 | bool ret; |
3b170dc8 | 304 | |
022cdc9f MP |
305 | qemu_co_mutex_lock(&tgm->throttled_reqs_lock); |
306 | ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]); | |
307 | qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); | |
93001e9d PB |
308 | |
309 | return ret; | |
3b170dc8 PB |
310 | } |
311 | ||
76f4afb4 AG |
312 | /* Look for the next pending I/O request and schedule it. |
313 | * | |
314 | * This assumes that tg->lock is held. | |
315 | * | |
022cdc9f | 316 | * @tgm: the current ThrottleGroupMember |
76f4afb4 AG |
317 | * @is_write: the type of operation (read/write) |
318 | */ | |
022cdc9f | 319 | static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write) |
76f4afb4 | 320 | { |
022cdc9f MP |
321 | ThrottleState *ts = tgm->throttle_state; |
322 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
76f4afb4 | 323 | bool must_wait; |
022cdc9f | 324 | ThrottleGroupMember *token; |
76f4afb4 AG |
325 | |
326 | /* Check if there's any pending request to schedule next */ | |
022cdc9f MP |
327 | token = next_throttle_token(tgm, is_write); |
328 | if (!tgm_has_pending_reqs(token, is_write)) { | |
76f4afb4 AG |
329 | return; |
330 | } | |
331 | ||
332 | /* Set a timer for the request if it needs to be throttled */ | |
333 | must_wait = throttle_group_schedule_timer(token, is_write); | |
334 | ||
335 | /* If it doesn't have to wait, queue it for immediate execution */ | |
336 | if (!must_wait) { | |
022cdc9f | 337 | /* Give preference to requests from the current tgm */ |
76f4afb4 | 338 | if (qemu_in_coroutine() && |
022cdc9f MP |
339 | throttle_group_co_restart_queue(tgm, is_write)) { |
340 | token = tgm; | |
76f4afb4 | 341 | } else { |
022cdc9f | 342 | ThrottleTimers *tt = &token->throttle_timers; |
dbe824cc | 343 | int64_t now = qemu_clock_get_ns(tg->clock_type); |
7258ed93 | 344 | timer_mod(tt->timers[is_write], now); |
76f4afb4 AG |
345 | tg->any_timer_armed[is_write] = true; |
346 | } | |
347 | tg->tokens[is_write] = token; | |
348 | } | |
349 | } | |
350 | ||
351 | /* Check if an I/O request needs to be throttled, wait and set a timer | |
352 | * if necessary, and schedule the next request using a round robin | |
353 | * algorithm. | |
354 | * | |
022cdc9f | 355 | * @tgm: the current ThrottleGroupMember |
76f4afb4 AG |
356 | * @bytes: the number of bytes for this I/O |
357 | * @is_write: the type of operation (read/write) | |
358 | */ | |
022cdc9f | 359 | void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm, |
76f4afb4 AG |
360 | unsigned int bytes, |
361 | bool is_write) | |
362 | { | |
363 | bool must_wait; | |
022cdc9f MP |
364 | ThrottleGroupMember *token; |
365 | ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); | |
76f4afb4 AG |
366 | qemu_mutex_lock(&tg->lock); |
367 | ||
368 | /* First we check if this I/O has to be throttled. */ | |
022cdc9f | 369 | token = next_throttle_token(tgm, is_write); |
76f4afb4 AG |
370 | must_wait = throttle_group_schedule_timer(token, is_write); |
371 | ||
372 | /* Wait if there's a timer set or queued requests of this type */ | |
022cdc9f MP |
373 | if (must_wait || tgm->pending_reqs[is_write]) { |
374 | tgm->pending_reqs[is_write]++; | |
76f4afb4 | 375 | qemu_mutex_unlock(&tg->lock); |
022cdc9f MP |
376 | qemu_co_mutex_lock(&tgm->throttled_reqs_lock); |
377 | qemu_co_queue_wait(&tgm->throttled_reqs[is_write], | |
378 | &tgm->throttled_reqs_lock); | |
379 | qemu_co_mutex_unlock(&tgm->throttled_reqs_lock); | |
76f4afb4 | 380 | qemu_mutex_lock(&tg->lock); |
022cdc9f | 381 | tgm->pending_reqs[is_write]--; |
76f4afb4 AG |
382 | } |
383 | ||
384 | /* The I/O will be executed, so do the accounting */ | |
022cdc9f | 385 | throttle_account(tgm->throttle_state, is_write, bytes); |
76f4afb4 AG |
386 | |
387 | /* Schedule the next request */ | |
022cdc9f | 388 | schedule_next_request(tgm, is_write); |
76f4afb4 AG |
389 | |
390 | qemu_mutex_unlock(&tg->lock); | |
391 | } | |
392 | ||
3b170dc8 | 393 | typedef struct { |
022cdc9f | 394 | ThrottleGroupMember *tgm; |
3b170dc8 PB |
395 | bool is_write; |
396 | } RestartData; | |
397 | ||
398 | static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) | |
7258ed93 | 399 | { |
3b170dc8 | 400 | RestartData *data = opaque; |
022cdc9f MP |
401 | ThrottleGroupMember *tgm = data->tgm; |
402 | ThrottleState *ts = tgm->throttle_state; | |
403 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
3b170dc8 | 404 | bool is_write = data->is_write; |
7258ed93 PB |
405 | bool empty_queue; |
406 | ||
022cdc9f | 407 | empty_queue = !throttle_group_co_restart_queue(tgm, is_write); |
7258ed93 PB |
408 | |
409 | /* If the request queue was empty then we have to take care of | |
410 | * scheduling the next one */ | |
411 | if (empty_queue) { | |
412 | qemu_mutex_lock(&tg->lock); | |
022cdc9f | 413 | schedule_next_request(tgm, is_write); |
7258ed93 PB |
414 | qemu_mutex_unlock(&tg->lock); |
415 | } | |
43a5dc02 MP |
416 | |
417 | g_free(data); | |
7258ed93 PB |
418 | } |
419 | ||
022cdc9f | 420 | static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) |
3b170dc8 PB |
421 | { |
422 | Coroutine *co; | |
43a5dc02 MP |
423 | RestartData *rd = g_new0(RestartData, 1); |
424 | ||
425 | rd->tgm = tgm; | |
426 | rd->is_write = is_write; | |
3b170dc8 | 427 | |
25b8e4db AG |
428 | /* This function is called when a timer is fired or when |
429 | * throttle_group_restart_tgm() is called. Either way, there can | |
430 | * be no timer pending on this tgm at this point */ | |
431 | assert(!timer_pending(tgm->throttle_timers.timers[is_write])); | |
432 | ||
43a5dc02 | 433 | co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); |
c61791fc | 434 | aio_co_enter(tgm->aio_context, co); |
3b170dc8 PB |
435 | } |
436 | ||
022cdc9f | 437 | void throttle_group_restart_tgm(ThrottleGroupMember *tgm) |
a72f6414 | 438 | { |
25b8e4db AG |
439 | int i; |
440 | ||
022cdc9f | 441 | if (tgm->throttle_state) { |
25b8e4db AG |
442 | for (i = 0; i < 2; i++) { |
443 | QEMUTimer *t = tgm->throttle_timers.timers[i]; | |
444 | if (timer_pending(t)) { | |
445 | /* If there's a pending timer on this tgm, fire it now */ | |
446 | timer_del(t); | |
447 | timer_cb(tgm, i); | |
448 | } else { | |
449 | /* Else run the next request from the queue manually */ | |
450 | throttle_group_restart_queue(tgm, i); | |
451 | } | |
452 | } | |
a72f6414 PB |
453 | } |
454 | } | |
455 | ||
2ff1f2e3 AG |
456 | /* Update the throttle configuration for a particular group. Similar |
457 | * to throttle_config(), but guarantees atomicity within the | |
458 | * throttling group. | |
459 | * | |
022cdc9f | 460 | * @tgm: a ThrottleGroupMember that is a member of the group |
2ff1f2e3 AG |
461 | * @cfg: the configuration to set |
462 | */ | |
022cdc9f | 463 | void throttle_group_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg) |
2ff1f2e3 | 464 | { |
022cdc9f | 465 | ThrottleState *ts = tgm->throttle_state; |
2ff1f2e3 AG |
466 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
467 | qemu_mutex_lock(&tg->lock); | |
27e4cf13 | 468 | throttle_config(ts, tg->clock_type, cfg); |
2ff1f2e3 | 469 | qemu_mutex_unlock(&tg->lock); |
a72f6414 | 470 | |
022cdc9f | 471 | throttle_group_restart_tgm(tgm); |
2ff1f2e3 AG |
472 | } |
473 | ||
474 | /* Get the throttle configuration from a particular group. Similar to | |
475 | * throttle_get_config(), but guarantees atomicity within the | |
476 | * throttling group. | |
477 | * | |
022cdc9f | 478 | * @tgm: a ThrottleGroupMember that is a member of the group |
2ff1f2e3 AG |
479 | * @cfg: the configuration will be written here |
480 | */ | |
022cdc9f | 481 | void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg) |
2ff1f2e3 | 482 | { |
022cdc9f | 483 | ThrottleState *ts = tgm->throttle_state; |
2ff1f2e3 AG |
484 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
485 | qemu_mutex_lock(&tg->lock); | |
486 | throttle_get_config(ts, cfg); | |
487 | qemu_mutex_unlock(&tg->lock); | |
488 | } | |
489 | ||
76f4afb4 AG |
490 | /* ThrottleTimers callback. This wakes up a request that was waiting |
491 | * because it had been throttled. | |
492 | * | |
c61791fc | 493 | * @tgm: the ThrottleGroupMember whose request had been throttled |
76f4afb4 AG |
494 | * @is_write: the type of operation (read/write) |
495 | */ | |
c61791fc | 496 | static void timer_cb(ThrottleGroupMember *tgm, bool is_write) |
76f4afb4 | 497 | { |
022cdc9f | 498 | ThrottleState *ts = tgm->throttle_state; |
76f4afb4 | 499 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); |
76f4afb4 AG |
500 | |
501 | /* The timer has just been fired, so we can update the flag */ | |
502 | qemu_mutex_lock(&tg->lock); | |
503 | tg->any_timer_armed[is_write] = false; | |
504 | qemu_mutex_unlock(&tg->lock); | |
505 | ||
506 | /* Run the request that was waiting for this timer */ | |
022cdc9f | 507 | throttle_group_restart_queue(tgm, is_write); |
76f4afb4 AG |
508 | } |
509 | ||
510 | static void read_timer_cb(void *opaque) | |
511 | { | |
512 | timer_cb(opaque, false); | |
513 | } | |
514 | ||
515 | static void write_timer_cb(void *opaque) | |
516 | { | |
517 | timer_cb(opaque, true); | |
518 | } | |
519 | ||
022cdc9f MP |
520 | /* Register a ThrottleGroupMember from the throttling group, also initializing |
521 | * its timers and updating its throttle_state pointer to point to it. If a | |
31dce3cc | 522 | * throttling group with that name does not exist yet, it will be created. |
2ff1f2e3 | 523 | * |
432d889e MP |
524 | * This function edits throttle_groups and must be called under the global |
525 | * mutex. | |
526 | * | |
022cdc9f | 527 | * @tgm: the ThrottleGroupMember to insert |
2ff1f2e3 | 528 | * @groupname: the name of the group |
c61791fc | 529 | * @ctx: the AioContext to use |
2ff1f2e3 | 530 | */ |
022cdc9f | 531 | void throttle_group_register_tgm(ThrottleGroupMember *tgm, |
c61791fc MP |
532 | const char *groupname, |
533 | AioContext *ctx) | |
2ff1f2e3 AG |
534 | { |
535 | int i; | |
973f2ddf HR |
536 | ThrottleState *ts = throttle_group_incref(groupname); |
537 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
022cdc9f MP |
538 | |
539 | tgm->throttle_state = ts; | |
c61791fc | 540 | tgm->aio_context = ctx; |
2ff1f2e3 AG |
541 | |
542 | qemu_mutex_lock(&tg->lock); | |
022cdc9f | 543 | /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ |
2ff1f2e3 AG |
544 | for (i = 0; i < 2; i++) { |
545 | if (!tg->tokens[i]) { | |
022cdc9f | 546 | tg->tokens[i] = tgm; |
2ff1f2e3 AG |
547 | } |
548 | } | |
549 | ||
022cdc9f | 550 | QLIST_INSERT_HEAD(&tg->head, tgm, round_robin); |
76f4afb4 | 551 | |
022cdc9f | 552 | throttle_timers_init(&tgm->throttle_timers, |
c61791fc | 553 | tgm->aio_context, |
dbe824cc | 554 | tg->clock_type, |
76f4afb4 AG |
555 | read_timer_cb, |
556 | write_timer_cb, | |
c61791fc | 557 | tgm); |
f738cfc8 MP |
558 | qemu_co_mutex_init(&tgm->throttled_reqs_lock); |
559 | qemu_co_queue_init(&tgm->throttled_reqs[0]); | |
560 | qemu_co_queue_init(&tgm->throttled_reqs[1]); | |
76f4afb4 | 561 | |
2ff1f2e3 AG |
562 | qemu_mutex_unlock(&tg->lock); |
563 | } | |
564 | ||
022cdc9f | 565 | /* Unregister a ThrottleGroupMember from its group, removing it from the list, |
31dce3cc | 566 | * destroying the timers and setting the throttle_state pointer to NULL. |
2ff1f2e3 | 567 | * |
022cdc9f MP |
568 | * The ThrottleGroupMember must not have pending throttled requests, so the |
569 | * caller has to drain them first. | |
5ac72418 | 570 | * |
2ff1f2e3 AG |
571 | * The group will be destroyed if it's empty after this operation. |
572 | * | |
022cdc9f | 573 | * @tgm the ThrottleGroupMember to remove |
2ff1f2e3 | 574 | */ |
022cdc9f | 575 | void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) |
2ff1f2e3 | 576 | { |
022cdc9f MP |
577 | ThrottleState *ts = tgm->throttle_state; |
578 | ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); | |
579 | ThrottleGroupMember *token; | |
2ff1f2e3 AG |
580 | int i; |
581 | ||
d8e7d87e MP |
582 | if (!ts) { |
583 | /* Discard already unregistered tgm */ | |
584 | return; | |
585 | } | |
586 | ||
2ff1f2e3 AG |
587 | qemu_mutex_lock(&tg->lock); |
588 | for (i = 0; i < 2; i++) { | |
25b8e4db AG |
589 | assert(tgm->pending_reqs[i] == 0); |
590 | assert(qemu_co_queue_empty(&tgm->throttled_reqs[i])); | |
591 | assert(!timer_pending(tgm->throttle_timers.timers[i])); | |
022cdc9f MP |
592 | if (tg->tokens[i] == tgm) { |
593 | token = throttle_group_next_tgm(tgm); | |
594 | /* Take care of the case where this is the last tgm in the group */ | |
595 | if (token == tgm) { | |
2ff1f2e3 AG |
596 | token = NULL; |
597 | } | |
598 | tg->tokens[i] = token; | |
599 | } | |
600 | } | |
601 | ||
022cdc9f MP |
602 | /* remove the current tgm from the list */ |
603 | QLIST_REMOVE(tgm, round_robin); | |
604 | throttle_timers_destroy(&tgm->throttle_timers); | |
2ff1f2e3 AG |
605 | qemu_mutex_unlock(&tg->lock); |
606 | ||
973f2ddf | 607 | throttle_group_unref(&tg->ts); |
022cdc9f | 608 | tgm->throttle_state = NULL; |
2ff1f2e3 AG |
609 | } |
610 | ||
c61791fc MP |
611 | void throttle_group_attach_aio_context(ThrottleGroupMember *tgm, |
612 | AioContext *new_context) | |
613 | { | |
614 | ThrottleTimers *tt = &tgm->throttle_timers; | |
615 | throttle_timers_attach_aio_context(tt, new_context); | |
616 | tgm->aio_context = new_context; | |
617 | } | |
618 | ||
619 | void throttle_group_detach_aio_context(ThrottleGroupMember *tgm) | |
620 | { | |
341e0b56 | 621 | ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts); |
c61791fc | 622 | ThrottleTimers *tt = &tgm->throttle_timers; |
341e0b56 | 623 | int i; |
dc868fb0 SH |
624 | |
625 | /* Requests must have been drained */ | |
626 | assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0); | |
627 | assert(qemu_co_queue_empty(&tgm->throttled_reqs[0])); | |
628 | assert(qemu_co_queue_empty(&tgm->throttled_reqs[1])); | |
629 | ||
341e0b56 SH |
630 | /* Kick off next ThrottleGroupMember, if necessary */ |
631 | qemu_mutex_lock(&tg->lock); | |
632 | for (i = 0; i < 2; i++) { | |
633 | if (timer_pending(tt->timers[i])) { | |
634 | tg->any_timer_armed[i] = false; | |
635 | schedule_next_request(tgm, i); | |
636 | } | |
637 | } | |
638 | qemu_mutex_unlock(&tg->lock); | |
639 | ||
c61791fc MP |
640 | throttle_timers_detach_aio_context(tt); |
641 | tgm->aio_context = NULL; | |
642 | } | |
643 | ||
432d889e MP |
644 | #undef THROTTLE_OPT_PREFIX |
645 | #define THROTTLE_OPT_PREFIX "x-" | |
646 | ||
647 | /* Helper struct and array for QOM property setter/getter */ | |
648 | typedef struct { | |
649 | const char *name; | |
650 | BucketType type; | |
651 | enum { | |
652 | AVG, | |
653 | MAX, | |
654 | BURST_LENGTH, | |
655 | IOPS_SIZE, | |
656 | } category; | |
657 | } ThrottleParamInfo; | |
658 | ||
659 | static ThrottleParamInfo properties[] = { | |
660 | { | |
661 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL, | |
662 | THROTTLE_OPS_TOTAL, AVG, | |
663 | }, | |
664 | { | |
665 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX, | |
666 | THROTTLE_OPS_TOTAL, MAX, | |
667 | }, | |
668 | { | |
669 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX_LENGTH, | |
670 | THROTTLE_OPS_TOTAL, BURST_LENGTH, | |
671 | }, | |
672 | { | |
673 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ, | |
674 | THROTTLE_OPS_READ, AVG, | |
675 | }, | |
676 | { | |
677 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX, | |
678 | THROTTLE_OPS_READ, MAX, | |
679 | }, | |
680 | { | |
681 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX_LENGTH, | |
682 | THROTTLE_OPS_READ, BURST_LENGTH, | |
683 | }, | |
684 | { | |
685 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE, | |
686 | THROTTLE_OPS_WRITE, AVG, | |
687 | }, | |
688 | { | |
689 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX, | |
690 | THROTTLE_OPS_WRITE, MAX, | |
691 | }, | |
692 | { | |
693 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX_LENGTH, | |
694 | THROTTLE_OPS_WRITE, BURST_LENGTH, | |
695 | }, | |
696 | { | |
697 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL, | |
698 | THROTTLE_BPS_TOTAL, AVG, | |
699 | }, | |
700 | { | |
701 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX, | |
702 | THROTTLE_BPS_TOTAL, MAX, | |
703 | }, | |
704 | { | |
705 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX_LENGTH, | |
706 | THROTTLE_BPS_TOTAL, BURST_LENGTH, | |
707 | }, | |
708 | { | |
709 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ, | |
710 | THROTTLE_BPS_READ, AVG, | |
711 | }, | |
712 | { | |
713 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX, | |
714 | THROTTLE_BPS_READ, MAX, | |
715 | }, | |
716 | { | |
717 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX_LENGTH, | |
718 | THROTTLE_BPS_READ, BURST_LENGTH, | |
719 | }, | |
720 | { | |
721 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE, | |
722 | THROTTLE_BPS_WRITE, AVG, | |
723 | }, | |
724 | { | |
725 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX, | |
726 | THROTTLE_BPS_WRITE, MAX, | |
727 | }, | |
728 | { | |
729 | THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX_LENGTH, | |
730 | THROTTLE_BPS_WRITE, BURST_LENGTH, | |
731 | }, | |
732 | { | |
733 | THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_SIZE, | |
734 | 0, IOPS_SIZE, | |
735 | } | |
736 | }; | |
737 | ||
738 | /* This function edits throttle_groups and must be called under the global | |
739 | * mutex */ | |
740 | static void throttle_group_obj_init(Object *obj) | |
741 | { | |
742 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
743 | ||
744 | tg->clock_type = QEMU_CLOCK_REALTIME; | |
745 | if (qtest_enabled()) { | |
746 | /* For testing block IO throttling only */ | |
747 | tg->clock_type = QEMU_CLOCK_VIRTUAL; | |
748 | } | |
749 | tg->is_initialized = false; | |
750 | qemu_mutex_init(&tg->lock); | |
751 | throttle_init(&tg->ts); | |
752 | QLIST_INIT(&tg->head); | |
753 | } | |
754 | ||
755 | /* This function edits throttle_groups and must be called under the global | |
756 | * mutex */ | |
757 | static void throttle_group_obj_complete(UserCreatable *obj, Error **errp) | |
758 | { | |
759 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
760 | ThrottleConfig cfg; | |
761 | ||
762 | /* set group name to object id if it exists */ | |
763 | if (!tg->name && tg->parent_obj.parent) { | |
764 | tg->name = object_get_canonical_path_component(OBJECT(obj)); | |
765 | } | |
766 | /* We must have a group name at this point */ | |
767 | assert(tg->name); | |
768 | ||
769 | /* error if name is duplicate */ | |
d8e7d87e | 770 | if (throttle_group_exists(tg->name)) { |
432d889e MP |
771 | error_setg(errp, "A group with this name already exists"); |
772 | return; | |
773 | } | |
774 | ||
775 | /* check validity */ | |
776 | throttle_get_config(&tg->ts, &cfg); | |
777 | if (!throttle_is_valid(&cfg, errp)) { | |
778 | return; | |
779 | } | |
780 | throttle_config(&tg->ts, tg->clock_type, &cfg); | |
781 | QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); | |
782 | tg->is_initialized = true; | |
783 | } | |
784 | ||
785 | /* This function edits throttle_groups and must be called under the global | |
786 | * mutex */ | |
787 | static void throttle_group_obj_finalize(Object *obj) | |
788 | { | |
789 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
790 | if (tg->is_initialized) { | |
791 | QTAILQ_REMOVE(&throttle_groups, tg, list); | |
792 | } | |
793 | qemu_mutex_destroy(&tg->lock); | |
794 | g_free(tg->name); | |
795 | } | |
796 | ||
797 | static void throttle_group_set(Object *obj, Visitor *v, const char * name, | |
798 | void *opaque, Error **errp) | |
799 | ||
800 | { | |
801 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
802 | ThrottleConfig *cfg; | |
803 | ThrottleParamInfo *info = opaque; | |
804 | Error *local_err = NULL; | |
805 | int64_t value; | |
806 | ||
807 | /* If we have finished initialization, don't accept individual property | |
808 | * changes through QOM. Throttle configuration limits must be set in one | |
809 | * transaction, as certain combinations are invalid. | |
810 | */ | |
811 | if (tg->is_initialized) { | |
812 | error_setg(&local_err, "Property cannot be set after initialization"); | |
813 | goto ret; | |
814 | } | |
815 | ||
816 | visit_type_int64(v, name, &value, &local_err); | |
817 | if (local_err) { | |
818 | goto ret; | |
819 | } | |
820 | if (value < 0) { | |
821 | error_setg(&local_err, "Property values cannot be negative"); | |
822 | goto ret; | |
823 | } | |
824 | ||
825 | cfg = &tg->ts.cfg; | |
826 | switch (info->category) { | |
827 | case AVG: | |
828 | cfg->buckets[info->type].avg = value; | |
829 | break; | |
830 | case MAX: | |
831 | cfg->buckets[info->type].max = value; | |
832 | break; | |
833 | case BURST_LENGTH: | |
834 | if (value > UINT_MAX) { | |
835 | error_setg(&local_err, "%s value must be in the" | |
836 | "range [0, %u]", info->name, UINT_MAX); | |
837 | goto ret; | |
838 | } | |
839 | cfg->buckets[info->type].burst_length = value; | |
840 | break; | |
841 | case IOPS_SIZE: | |
842 | cfg->op_size = value; | |
843 | break; | |
844 | } | |
845 | ||
846 | ret: | |
847 | error_propagate(errp, local_err); | |
848 | return; | |
849 | ||
850 | } | |
851 | ||
852 | static void throttle_group_get(Object *obj, Visitor *v, const char *name, | |
853 | void *opaque, Error **errp) | |
854 | { | |
855 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
856 | ThrottleConfig cfg; | |
857 | ThrottleParamInfo *info = opaque; | |
858 | int64_t value; | |
859 | ||
860 | throttle_get_config(&tg->ts, &cfg); | |
861 | switch (info->category) { | |
862 | case AVG: | |
863 | value = cfg.buckets[info->type].avg; | |
864 | break; | |
865 | case MAX: | |
866 | value = cfg.buckets[info->type].max; | |
867 | break; | |
868 | case BURST_LENGTH: | |
869 | value = cfg.buckets[info->type].burst_length; | |
870 | break; | |
871 | case IOPS_SIZE: | |
872 | value = cfg.op_size; | |
873 | break; | |
874 | } | |
875 | ||
876 | visit_type_int64(v, name, &value, errp); | |
877 | } | |
878 | ||
879 | static void throttle_group_set_limits(Object *obj, Visitor *v, | |
880 | const char *name, void *opaque, | |
881 | Error **errp) | |
882 | ||
883 | { | |
884 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
885 | ThrottleConfig cfg; | |
886 | ThrottleLimits arg = { 0 }; | |
887 | ThrottleLimits *argp = &arg; | |
888 | Error *local_err = NULL; | |
889 | ||
890 | visit_type_ThrottleLimits(v, name, &argp, &local_err); | |
891 | if (local_err) { | |
892 | goto ret; | |
893 | } | |
894 | qemu_mutex_lock(&tg->lock); | |
895 | throttle_get_config(&tg->ts, &cfg); | |
896 | throttle_limits_to_config(argp, &cfg, &local_err); | |
897 | if (local_err) { | |
898 | goto unlock; | |
899 | } | |
900 | throttle_config(&tg->ts, tg->clock_type, &cfg); | |
901 | ||
902 | unlock: | |
903 | qemu_mutex_unlock(&tg->lock); | |
904 | ret: | |
905 | error_propagate(errp, local_err); | |
906 | return; | |
907 | } | |
908 | ||
909 | static void throttle_group_get_limits(Object *obj, Visitor *v, | |
910 | const char *name, void *opaque, | |
911 | Error **errp) | |
912 | { | |
913 | ThrottleGroup *tg = THROTTLE_GROUP(obj); | |
914 | ThrottleConfig cfg; | |
915 | ThrottleLimits arg = { 0 }; | |
916 | ThrottleLimits *argp = &arg; | |
917 | ||
918 | qemu_mutex_lock(&tg->lock); | |
919 | throttle_get_config(&tg->ts, &cfg); | |
920 | qemu_mutex_unlock(&tg->lock); | |
921 | ||
922 | throttle_config_to_limits(&cfg, argp); | |
923 | ||
924 | visit_type_ThrottleLimits(v, name, &argp, errp); | |
925 | } | |
926 | ||
927 | static bool throttle_group_can_be_deleted(UserCreatable *uc) | |
928 | { | |
929 | return OBJECT(uc)->ref == 1; | |
930 | } | |
931 | ||
932 | static void throttle_group_obj_class_init(ObjectClass *klass, void *class_data) | |
933 | { | |
934 | size_t i = 0; | |
935 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); | |
936 | ||
937 | ucc->complete = throttle_group_obj_complete; | |
938 | ucc->can_be_deleted = throttle_group_can_be_deleted; | |
939 | ||
940 | /* individual properties */ | |
941 | for (i = 0; i < sizeof(properties) / sizeof(ThrottleParamInfo); i++) { | |
942 | object_class_property_add(klass, | |
943 | properties[i].name, | |
944 | "int", | |
945 | throttle_group_get, | |
946 | throttle_group_set, | |
947 | NULL, &properties[i], | |
948 | &error_abort); | |
949 | } | |
950 | ||
951 | /* ThrottleLimits */ | |
952 | object_class_property_add(klass, | |
953 | "limits", "ThrottleLimits", | |
954 | throttle_group_get_limits, | |
955 | throttle_group_set_limits, | |
956 | NULL, NULL, | |
957 | &error_abort); | |
958 | } | |
959 | ||
960 | static const TypeInfo throttle_group_info = { | |
961 | .name = TYPE_THROTTLE_GROUP, | |
962 | .parent = TYPE_OBJECT, | |
963 | .class_init = throttle_group_obj_class_init, | |
964 | .instance_size = sizeof(ThrottleGroup), | |
965 | .instance_init = throttle_group_obj_init, | |
966 | .instance_finalize = throttle_group_obj_finalize, | |
967 | .interfaces = (InterfaceInfo[]) { | |
968 | { TYPE_USER_CREATABLE }, | |
969 | { } | |
970 | }, | |
971 | }; | |
972 | ||
2ff1f2e3 AG |
973 | static void throttle_groups_init(void) |
974 | { | |
432d889e | 975 | type_register_static(&throttle_group_info); |
2ff1f2e3 AG |
976 | } |
977 | ||
432d889e | 978 | type_init(throttle_groups_init); |