]>
Commit | Line | Data |
---|---|---|
bd322087 | 1 | /* |
d4443cb6 | 2 | * Virtio Balloon Device |
bd322087 AL |
3 | * |
4 | * Copyright IBM, Corp. 2008 | |
d4443cb6 AS |
5 | * Copyright (C) 2011 Red Hat, Inc. |
6 | * Copyright (C) 2011 Amit Shah <[email protected]> | |
bd322087 AL |
7 | * |
8 | * Authors: | |
9 | * Anthony Liguori <[email protected]> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
12 | * the COPYING file in the top-level directory. | |
13 | * | |
14 | */ | |
15 | ||
9b8bfe21 | 16 | #include "qemu/osdep.h" |
1de7afc9 | 17 | #include "qemu/iov.h" |
0b8fa32f | 18 | #include "qemu/module.h" |
7e6ccd9c | 19 | #include "qemu/timer.h" |
0d09e41a | 20 | #include "hw/virtio/virtio.h" |
2070aaeb | 21 | #include "hw/mem/pc-dimm.h" |
9c17d615 | 22 | #include "sysemu/balloon.h" |
0d09e41a | 23 | #include "hw/virtio/virtio-balloon.h" |
022c62cb | 24 | #include "exec/address-spaces.h" |
e688df6b | 25 | #include "qapi/error.h" |
112ed241 | 26 | #include "qapi/qapi-events-misc.h" |
7e6ccd9c | 27 | #include "qapi/visitor.h" |
6adfdc5a | 28 | #include "trace.h" |
2ab4b135 | 29 | #include "qemu/error-report.h" |
c13c4153 | 30 | #include "migration/misc.h" |
bd322087 | 31 | |
0d09e41a | 32 | #include "hw/virtio/virtio-bus.h" |
8609d2a8 | 33 | #include "hw/virtio/virtio-access.h" |
1ab461b5 | 34 | |
01310e2a TH |
35 | #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) |
36 | ||
ed48c598 DG |
37 | struct PartiallyBalloonedPage { |
38 | RAMBlock *rb; | |
39 | ram_addr_t base; | |
40 | unsigned long bitmap[]; | |
41 | }; | |
42 | ||
e9550234 DG |
43 | static void balloon_inflate_page(VirtIOBalloon *balloon, |
44 | MemoryRegion *mr, hwaddr offset) | |
bd322087 | 45 | { |
e9550234 | 46 | void *addr = memory_region_get_ram_ptr(mr) + offset; |
dbe1a277 DG |
47 | RAMBlock *rb; |
48 | size_t rb_page_size; | |
ed48c598 DG |
49 | int subpages; |
50 | ram_addr_t ram_offset, host_page_base; | |
e9550234 | 51 | |
dbe1a277 DG |
52 | /* XXX is there a better way to get to the RAMBlock than via a |
53 | * host address? */ | |
54 | rb = qemu_ram_block_from_host(addr, false, &ram_offset); | |
55 | rb_page_size = qemu_ram_pagesize(rb); | |
ed48c598 DG |
56 | host_page_base = ram_offset & ~(rb_page_size - 1); |
57 | ||
58 | if (rb_page_size == BALLOON_PAGE_SIZE) { | |
59 | /* Easy case */ | |
dbe1a277 | 60 | |
ed48c598 DG |
61 | ram_block_discard_range(rb, ram_offset, rb_page_size); |
62 | /* We ignore errors from ram_block_discard_range(), because it | |
63 | * has already reported them, and failing to discard a balloon | |
64 | * page is not fatal */ | |
dbe1a277 DG |
65 | return; |
66 | } | |
67 | ||
ed48c598 DG |
68 | /* Hard case |
69 | * | |
70 | * We've put a piece of a larger host page into the balloon - we | |
71 | * need to keep track until we have a whole host page to | |
72 | * discard | |
73 | */ | |
74 | warn_report_once( | |
75 | "Balloon used with backing page size > 4kiB, this may not be reliable"); | |
76 | ||
77 | subpages = rb_page_size / BALLOON_PAGE_SIZE; | |
78 | ||
79 | if (balloon->pbp | |
80 | && (rb != balloon->pbp->rb | |
81 | || host_page_base != balloon->pbp->base)) { | |
82 | /* We've partially ballooned part of a host page, but now | |
83 | * we're trying to balloon part of a different one. Too hard, | |
84 | * give up on the old partial page */ | |
301cf2a8 | 85 | g_free(balloon->pbp); |
ed48c598 | 86 | balloon->pbp = NULL; |
dbe1a277 DG |
87 | } |
88 | ||
ed48c598 DG |
89 | if (!balloon->pbp) { |
90 | /* Starting on a new host page */ | |
91 | size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long); | |
92 | balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen); | |
93 | balloon->pbp->rb = rb; | |
94 | balloon->pbp->base = host_page_base; | |
95 | } | |
96 | ||
97 | bitmap_set(balloon->pbp->bitmap, | |
98 | (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, | |
99 | subpages); | |
100 | ||
101 | if (bitmap_full(balloon->pbp->bitmap, subpages)) { | |
102 | /* We've accumulated a full host page, we can actually discard | |
103 | * it now */ | |
104 | ||
105 | ram_block_discard_range(rb, balloon->pbp->base, rb_page_size); | |
106 | /* We ignore errors from ram_block_discard_range(), because it | |
107 | * has already reported them, and failing to discard a balloon | |
108 | * page is not fatal */ | |
109 | ||
301cf2a8 | 110 | g_free(balloon->pbp); |
ed48c598 DG |
111 | balloon->pbp = NULL; |
112 | } | |
bd322087 AL |
113 | } |
114 | ||
b27b3239 DG |
115 | static void balloon_deflate_page(VirtIOBalloon *balloon, |
116 | MemoryRegion *mr, hwaddr offset) | |
117 | { | |
118 | void *addr = memory_region_get_ram_ptr(mr) + offset; | |
119 | RAMBlock *rb; | |
120 | size_t rb_page_size; | |
121 | ram_addr_t ram_offset, host_page_base; | |
596546fe DG |
122 | void *host_addr; |
123 | int ret; | |
b27b3239 DG |
124 | |
125 | /* XXX is there a better way to get to the RAMBlock than via a | |
126 | * host address? */ | |
127 | rb = qemu_ram_block_from_host(addr, false, &ram_offset); | |
128 | rb_page_size = qemu_ram_pagesize(rb); | |
129 | host_page_base = ram_offset & ~(rb_page_size - 1); | |
130 | ||
131 | if (balloon->pbp | |
132 | && rb == balloon->pbp->rb | |
133 | && host_page_base == balloon->pbp->base) { | |
134 | int subpages = rb_page_size / BALLOON_PAGE_SIZE; | |
135 | ||
136 | /* | |
137 | * This means the guest has asked to discard some of the 4kiB | |
138 | * subpages of a host page, but then changed its mind and | |
139 | * asked to keep them after all. It's exceedingly unlikely | |
140 | * for a guest to do this in practice, but handle it anyway, | |
141 | * since getting it wrong could mean discarding memory the | |
142 | * guest is still using. */ | |
143 | bitmap_clear(balloon->pbp->bitmap, | |
144 | (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, | |
145 | subpages); | |
146 | ||
147 | if (bitmap_empty(balloon->pbp->bitmap, subpages)) { | |
148 | g_free(balloon->pbp); | |
149 | balloon->pbp = NULL; | |
150 | } | |
151 | } | |
596546fe DG |
152 | |
153 | host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); | |
154 | ||
155 | /* When a page is deflated, we hint the whole host page it lives | |
156 | * on, since we can't do anything smaller */ | |
157 | ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED); | |
158 | if (ret != 0) { | |
159 | warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s", | |
160 | strerror(errno)); | |
161 | /* Otherwise ignore, failing to page hint shouldn't be fatal */ | |
162 | } | |
b27b3239 DG |
163 | } |
164 | ||
7e6ccd9c LC |
165 | static const char *balloon_stat_names[] = { |
166 | [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in", | |
167 | [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out", | |
168 | [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults", | |
169 | [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults", | |
170 | [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory", | |
171 | [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory", | |
a0d06486 | 172 | [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory", |
bf1e7140 | 173 | [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches", |
b7b12644 JH |
174 | [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc", |
175 | [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail", | |
7e6ccd9c LC |
176 | [VIRTIO_BALLOON_S_NR] = NULL |
177 | }; | |
178 | ||
625a5bef AL |
179 | /* |
180 | * reset_stats - Mark all items in the stats array as unset | |
181 | * | |
52f35022 SW |
182 | * This function needs to be called at device initialization and before |
183 | * updating to a set of newly-generated stats. This will ensure that no | |
625a5bef AL |
184 | * stale values stick around in case the guest reports a subset of the supported |
185 | * statistics. | |
186 | */ | |
187 | static inline void reset_stats(VirtIOBalloon *dev) | |
188 | { | |
189 | int i; | |
190 | for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1); | |
191 | } | |
192 | ||
7e6ccd9c LC |
193 | static bool balloon_stats_supported(const VirtIOBalloon *s) |
194 | { | |
c96caced | 195 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
95129d6f | 196 | return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ); |
7e6ccd9c LC |
197 | } |
198 | ||
199 | static bool balloon_stats_enabled(const VirtIOBalloon *s) | |
200 | { | |
201 | return s->stats_poll_interval > 0; | |
202 | } | |
203 | ||
204 | static void balloon_stats_destroy_timer(VirtIOBalloon *s) | |
205 | { | |
206 | if (balloon_stats_enabled(s)) { | |
bc72ad67 AB |
207 | timer_del(s->stats_timer); |
208 | timer_free(s->stats_timer); | |
7e6ccd9c LC |
209 | s->stats_timer = NULL; |
210 | s->stats_poll_interval = 0; | |
211 | } | |
212 | } | |
213 | ||
1f9296b5 | 214 | static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs) |
7e6ccd9c | 215 | { |
bc72ad67 | 216 | timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000); |
7e6ccd9c LC |
217 | } |
218 | ||
219 | static void balloon_stats_poll_cb(void *opaque) | |
220 | { | |
221 | VirtIOBalloon *s = opaque; | |
c96caced | 222 | VirtIODevice *vdev = VIRTIO_DEVICE(s); |
7e6ccd9c | 223 | |
4eae2a65 | 224 | if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) { |
7e6ccd9c LC |
225 | /* re-schedule */ |
226 | balloon_stats_change_timer(s, s->stats_poll_interval); | |
227 | return; | |
228 | } | |
229 | ||
51b19ebe | 230 | virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset); |
c96caced | 231 | virtio_notify(vdev, s->svq); |
51b19ebe PB |
232 | g_free(s->stats_vq_elem); |
233 | s->stats_vq_elem = NULL; | |
7e6ccd9c LC |
234 | } |
235 | ||
d7bce999 EB |
236 | static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name, |
237 | void *opaque, Error **errp) | |
7e6ccd9c | 238 | { |
2ddb16a9 | 239 | Error *err = NULL; |
7e6ccd9c LC |
240 | VirtIOBalloon *s = opaque; |
241 | int i; | |
242 | ||
337283df | 243 | visit_start_struct(v, name, NULL, 0, &err); |
2ddb16a9 MA |
244 | if (err) { |
245 | goto out; | |
246 | } | |
51e72bc1 | 247 | visit_type_int(v, "last-update", &s->stats_last_update, &err); |
297a3646 MA |
248 | if (err) { |
249 | goto out_end; | |
250 | } | |
7e6ccd9c | 251 | |
337283df | 252 | visit_start_struct(v, "stats", NULL, 0, &err); |
2ddb16a9 MA |
253 | if (err) { |
254 | goto out_end; | |
255 | } | |
9dbb8fa7 | 256 | for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) { |
51e72bc1 | 257 | visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err); |
9dbb8fa7 | 258 | if (err) { |
15c2f669 | 259 | goto out_nested; |
9dbb8fa7 | 260 | } |
7e6ccd9c | 261 | } |
15c2f669 EB |
262 | visit_check_struct(v, &err); |
263 | out_nested: | |
1158bb2a | 264 | visit_end_struct(v, NULL); |
2ddb16a9 | 265 | |
15c2f669 EB |
266 | if (!err) { |
267 | visit_check_struct(v, &err); | |
268 | } | |
2ddb16a9 | 269 | out_end: |
1158bb2a | 270 | visit_end_struct(v, NULL); |
2ddb16a9 MA |
271 | out: |
272 | error_propagate(errp, err); | |
7e6ccd9c LC |
273 | } |
274 | ||
4fa45492 | 275 | static void balloon_stats_get_poll_interval(Object *obj, Visitor *v, |
d7bce999 | 276 | const char *name, void *opaque, |
7e6ccd9c LC |
277 | Error **errp) |
278 | { | |
279 | VirtIOBalloon *s = opaque; | |
51e72bc1 | 280 | visit_type_int(v, name, &s->stats_poll_interval, errp); |
7e6ccd9c LC |
281 | } |
282 | ||
4fa45492 | 283 | static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, |
d7bce999 | 284 | const char *name, void *opaque, |
7e6ccd9c LC |
285 | Error **errp) |
286 | { | |
287 | VirtIOBalloon *s = opaque; | |
65cd9064 | 288 | Error *local_err = NULL; |
7e6ccd9c LC |
289 | int64_t value; |
290 | ||
51e72bc1 | 291 | visit_type_int(v, name, &value, &local_err); |
65cd9064 MA |
292 | if (local_err) { |
293 | error_propagate(errp, local_err); | |
7e6ccd9c LC |
294 | return; |
295 | } | |
296 | ||
297 | if (value < 0) { | |
298 | error_setg(errp, "timer value must be greater than zero"); | |
299 | return; | |
300 | } | |
301 | ||
22644cd2 | 302 | if (value > UINT32_MAX) { |
1f9296b5 LC |
303 | error_setg(errp, "timer value is too big"); |
304 | return; | |
305 | } | |
306 | ||
7e6ccd9c LC |
307 | if (value == s->stats_poll_interval) { |
308 | return; | |
309 | } | |
310 | ||
311 | if (value == 0) { | |
312 | /* timer=0 disables the timer */ | |
313 | balloon_stats_destroy_timer(s); | |
314 | return; | |
315 | } | |
316 | ||
317 | if (balloon_stats_enabled(s)) { | |
318 | /* timer interval change */ | |
319 | s->stats_poll_interval = value; | |
320 | balloon_stats_change_timer(s, value); | |
321 | return; | |
322 | } | |
323 | ||
324 | /* create a new timer */ | |
325 | g_assert(s->stats_timer == NULL); | |
bc72ad67 | 326 | s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s); |
7e6ccd9c LC |
327 | s->stats_poll_interval = value; |
328 | balloon_stats_change_timer(s, 0); | |
329 | } | |
330 | ||
bd322087 AL |
331 | static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) |
332 | { | |
c96caced | 333 | VirtIOBalloon *s = VIRTIO_BALLOON(vdev); |
51b19ebe | 334 | VirtQueueElement *elem; |
b7c28c74 | 335 | MemoryRegionSection section; |
bd322087 | 336 | |
51b19ebe | 337 | for (;;) { |
bd322087 AL |
338 | size_t offset = 0; |
339 | uint32_t pfn; | |
51b19ebe PB |
340 | elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); |
341 | if (!elem) { | |
342 | return; | |
343 | } | |
bd322087 | 344 | |
51b19ebe | 345 | while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { |
b218a70e | 346 | hwaddr pa; |
8609d2a8 | 347 | int p = virtio_ldl_p(vdev, &pfn); |
bd322087 | 348 | |
b218a70e | 349 | pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT; |
bd322087 AL |
350 | offset += 4; |
351 | ||
b218a70e DG |
352 | section = memory_region_find(get_system_memory(), pa, |
353 | BALLOON_PAGE_SIZE); | |
354 | if (!section.mr) { | |
355 | trace_virtio_balloon_bad_addr(pa); | |
356 | continue; | |
357 | } | |
358 | if (!memory_region_is_ram(section.mr) || | |
f2fd57db DDAG |
359 | memory_region_is_rom(section.mr) || |
360 | memory_region_is_romd(section.mr)) { | |
361 | trace_virtio_balloon_bad_addr(pa); | |
b86107ab | 362 | memory_region_unref(section.mr); |
bd322087 | 363 | continue; |
f2fd57db | 364 | } |
bd322087 | 365 | |
6adfdc5a HZ |
366 | trace_virtio_balloon_handle_output(memory_region_name(section.mr), |
367 | pa); | |
b27b3239 DG |
368 | if (!qemu_balloon_is_inhibited()) { |
369 | if (vq == s->ivq) { | |
370 | balloon_inflate_page(s, section.mr, | |
371 | section.offset_within_region); | |
372 | } else if (vq == s->dvq) { | |
373 | balloon_deflate_page(s, section.mr, section.offset_within_region); | |
374 | } else { | |
375 | g_assert_not_reached(); | |
376 | } | |
e9550234 | 377 | } |
dfde4e6e | 378 | memory_region_unref(section.mr); |
bd322087 AL |
379 | } |
380 | ||
51b19ebe | 381 | virtqueue_push(vq, elem, offset); |
bd322087 | 382 | virtio_notify(vdev, vq); |
51b19ebe | 383 | g_free(elem); |
bd322087 AL |
384 | } |
385 | } | |
386 | ||
625a5bef AL |
387 | static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) |
388 | { | |
c96caced | 389 | VirtIOBalloon *s = VIRTIO_BALLOON(vdev); |
51b19ebe | 390 | VirtQueueElement *elem; |
625a5bef AL |
391 | VirtIOBalloonStat stat; |
392 | size_t offset = 0; | |
7e6ccd9c | 393 | qemu_timeval tv; |
625a5bef | 394 | |
4eae2a65 | 395 | elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); |
51b19ebe | 396 | if (!elem) { |
7e6ccd9c | 397 | goto out; |
625a5bef AL |
398 | } |
399 | ||
4eae2a65 LP |
400 | if (s->stats_vq_elem != NULL) { |
401 | /* This should never happen if the driver follows the spec. */ | |
402 | virtqueue_push(vq, s->stats_vq_elem, 0); | |
403 | virtio_notify(vdev, vq); | |
404 | g_free(s->stats_vq_elem); | |
405 | } | |
406 | ||
407 | s->stats_vq_elem = elem; | |
408 | ||
625a5bef AL |
409 | /* Initialize the stats to get rid of any stale values. This is only |
410 | * needed to handle the case where a guest supports fewer stats than it | |
411 | * used to (ie. it has booted into an old kernel). | |
412 | */ | |
413 | reset_stats(s); | |
414 | ||
dcf6f5e1 | 415 | while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat)) |
fa6111f2 | 416 | == sizeof(stat)) { |
8609d2a8 RR |
417 | uint16_t tag = virtio_tswap16(vdev, stat.tag); |
418 | uint64_t val = virtio_tswap64(vdev, stat.val); | |
625a5bef AL |
419 | |
420 | offset += sizeof(stat); | |
421 | if (tag < VIRTIO_BALLOON_S_NR) | |
422 | s->stats[tag] = val; | |
423 | } | |
424 | s->stats_vq_offset = offset; | |
7e6ccd9c LC |
425 | |
426 | if (qemu_gettimeofday(&tv) < 0) { | |
2ab4b135 | 427 | warn_report("%s: failed to get time of day", __func__); |
7e6ccd9c LC |
428 | goto out; |
429 | } | |
430 | ||
431 | s->stats_last_update = tv.tv_sec; | |
432 | ||
433 | out: | |
434 | if (balloon_stats_enabled(s)) { | |
435 | balloon_stats_change_timer(s, s->stats_poll_interval); | |
436 | } | |
625a5bef AL |
437 | } |
438 | ||
c13c4153 WW |
439 | static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev, |
440 | VirtQueue *vq) | |
441 | { | |
442 | VirtIOBalloon *s = VIRTIO_BALLOON(vdev); | |
443 | qemu_bh_schedule(s->free_page_bh); | |
444 | } | |
445 | ||
446 | static bool get_free_page_hints(VirtIOBalloon *dev) | |
447 | { | |
448 | VirtQueueElement *elem; | |
449 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
450 | VirtQueue *vq = dev->free_page_vq; | |
ae440bd1 | 451 | bool ret = true; |
c13c4153 WW |
452 | |
453 | while (dev->block_iothread) { | |
454 | qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock); | |
455 | } | |
456 | ||
457 | elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); | |
458 | if (!elem) { | |
459 | return false; | |
460 | } | |
461 | ||
462 | if (elem->out_num) { | |
463 | uint32_t id; | |
464 | size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0, | |
465 | &id, sizeof(id)); | |
c13c4153 WW |
466 | |
467 | virtio_tswap32s(vdev, &id); | |
468 | if (unlikely(size != sizeof(id))) { | |
469 | virtio_error(vdev, "received an incorrect cmd id"); | |
ae440bd1 WW |
470 | ret = false; |
471 | goto out; | |
c13c4153 WW |
472 | } |
473 | if (id == dev->free_page_report_cmd_id) { | |
474 | dev->free_page_report_status = FREE_PAGE_REPORT_S_START; | |
475 | } else { | |
476 | /* | |
477 | * Stop the optimization only when it has started. This | |
478 | * avoids a stale stop sign for the previous command. | |
479 | */ | |
480 | if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { | |
481 | dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP; | |
482 | } | |
483 | } | |
484 | } | |
485 | ||
486 | if (elem->in_num) { | |
487 | if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) { | |
488 | qemu_guest_free_page_hint(elem->in_sg[0].iov_base, | |
489 | elem->in_sg[0].iov_len); | |
490 | } | |
c13c4153 WW |
491 | } |
492 | ||
ae440bd1 WW |
493 | out: |
494 | virtqueue_push(vq, elem, 1); | |
495 | g_free(elem); | |
496 | return ret; | |
c13c4153 WW |
497 | } |
498 | ||
499 | static void virtio_ballloon_get_free_page_hints(void *opaque) | |
500 | { | |
501 | VirtIOBalloon *dev = opaque; | |
502 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
503 | VirtQueue *vq = dev->free_page_vq; | |
504 | bool continue_to_get_hints; | |
505 | ||
506 | do { | |
507 | qemu_mutex_lock(&dev->free_page_lock); | |
508 | virtio_queue_set_notification(vq, 0); | |
509 | continue_to_get_hints = get_free_page_hints(dev); | |
510 | qemu_mutex_unlock(&dev->free_page_lock); | |
511 | virtio_notify(vdev, vq); | |
512 | /* | |
513 | * Start to poll the vq once the reporting started. Otherwise, continue | |
514 | * only when there are entries on the vq, which need to be given back. | |
515 | */ | |
516 | } while (continue_to_get_hints || | |
517 | dev->free_page_report_status == FREE_PAGE_REPORT_S_START); | |
518 | virtio_queue_set_notification(vq, 1); | |
519 | } | |
520 | ||
521 | static bool virtio_balloon_free_page_support(void *opaque) | |
522 | { | |
523 | VirtIOBalloon *s = opaque; | |
524 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
525 | ||
526 | return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT); | |
527 | } | |
528 | ||
529 | static void virtio_balloon_free_page_start(VirtIOBalloon *s) | |
530 | { | |
531 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
532 | ||
533 | /* For the stop and copy phase, we don't need to start the optimization */ | |
534 | if (!vdev->vm_running) { | |
535 | return; | |
536 | } | |
537 | ||
538 | if (s->free_page_report_cmd_id == UINT_MAX) { | |
539 | s->free_page_report_cmd_id = | |
540 | VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; | |
541 | } else { | |
542 | s->free_page_report_cmd_id++; | |
543 | } | |
544 | ||
545 | s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED; | |
546 | virtio_notify_config(vdev); | |
547 | } | |
548 | ||
549 | static void virtio_balloon_free_page_stop(VirtIOBalloon *s) | |
550 | { | |
551 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
552 | ||
553 | if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) { | |
554 | /* | |
555 | * The lock also guarantees us that the | |
556 | * virtio_ballloon_get_free_page_hints exits after the | |
557 | * free_page_report_status is set to S_STOP. | |
558 | */ | |
559 | qemu_mutex_lock(&s->free_page_lock); | |
560 | /* | |
561 | * The guest hasn't done the reporting, so host sends a notification | |
562 | * to the guest to actively stop the reporting. | |
563 | */ | |
564 | s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; | |
565 | qemu_mutex_unlock(&s->free_page_lock); | |
566 | virtio_notify_config(vdev); | |
567 | } | |
568 | } | |
569 | ||
570 | static void virtio_balloon_free_page_done(VirtIOBalloon *s) | |
571 | { | |
572 | VirtIODevice *vdev = VIRTIO_DEVICE(s); | |
573 | ||
574 | s->free_page_report_status = FREE_PAGE_REPORT_S_DONE; | |
575 | virtio_notify_config(vdev); | |
576 | } | |
577 | ||
578 | static int | |
579 | virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data) | |
580 | { | |
581 | VirtIOBalloon *dev = container_of(n, VirtIOBalloon, | |
582 | free_page_report_notify); | |
583 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
584 | PrecopyNotifyData *pnd = data; | |
585 | ||
586 | if (!virtio_balloon_free_page_support(dev)) { | |
587 | /* | |
588 | * This is an optimization provided to migration, so just return 0 to | |
589 | * have the normal migration process not affected when this feature is | |
590 | * not supported. | |
591 | */ | |
592 | return 0; | |
593 | } | |
594 | ||
595 | switch (pnd->reason) { | |
596 | case PRECOPY_NOTIFY_SETUP: | |
597 | precopy_enable_free_page_optimization(); | |
598 | break; | |
599 | case PRECOPY_NOTIFY_COMPLETE: | |
600 | case PRECOPY_NOTIFY_CLEANUP: | |
601 | case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC: | |
602 | virtio_balloon_free_page_stop(dev); | |
603 | break; | |
604 | case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: | |
605 | if (vdev->vm_running) { | |
606 | virtio_balloon_free_page_start(dev); | |
607 | } else { | |
608 | virtio_balloon_free_page_done(dev); | |
609 | } | |
610 | break; | |
611 | default: | |
612 | virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason); | |
613 | } | |
614 | ||
615 | return 0; | |
616 | } | |
617 | ||
bd322087 AL |
618 | static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) |
619 | { | |
c96caced | 620 | VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); |
5385a598 | 621 | struct virtio_balloon_config config = {}; |
bd322087 AL |
622 | |
623 | config.num_pages = cpu_to_le32(dev->num_pages); | |
624 | config.actual = cpu_to_le32(dev->actual); | |
625 | ||
c13c4153 WW |
626 | if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { |
627 | config.free_page_report_cmd_id = | |
628 | cpu_to_le32(dev->free_page_report_cmd_id); | |
629 | } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) { | |
630 | config.free_page_report_cmd_id = | |
631 | cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP); | |
632 | } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) { | |
633 | config.free_page_report_cmd_id = | |
634 | cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE); | |
635 | } | |
636 | ||
6adfdc5a | 637 | trace_virtio_balloon_get_config(config.num_pages, config.actual); |
e6baf613 | 638 | memcpy(config_data, &config, sizeof(struct virtio_balloon_config)); |
bd322087 AL |
639 | } |
640 | ||
2b75f848 VSO |
641 | static int build_dimm_list(Object *obj, void *opaque) |
642 | { | |
643 | GSList **list = opaque; | |
644 | ||
645 | if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { | |
646 | DeviceState *dev = DEVICE(obj); | |
647 | if (dev->realized) { /* only realized DIMMs matter */ | |
648 | *list = g_slist_prepend(*list, dev); | |
649 | } | |
650 | } | |
651 | ||
652 | object_child_foreach(obj, build_dimm_list, opaque); | |
653 | return 0; | |
654 | } | |
655 | ||
39de9984 VSO |
656 | static ram_addr_t get_current_ram_size(void) |
657 | { | |
e8dc06d2 | 658 | GSList *list = NULL, *item; |
39de9984 VSO |
659 | ram_addr_t size = ram_size; |
660 | ||
2b75f848 | 661 | build_dimm_list(qdev_get_machine(), &list); |
e8dc06d2 VSO |
662 | for (item = list; item; item = g_slist_next(item)) { |
663 | Object *obj = OBJECT(item->data); | |
2b75f848 VSO |
664 | if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) { |
665 | size += object_property_get_int(obj, PC_DIMM_SIZE_PROP, | |
666 | &error_abort); | |
667 | } | |
39de9984 | 668 | } |
e8dc06d2 | 669 | g_slist_free(list); |
39de9984 VSO |
670 | |
671 | return size; | |
672 | } | |
673 | ||
bd322087 AL |
674 | static void virtio_balloon_set_config(VirtIODevice *vdev, |
675 | const uint8_t *config_data) | |
676 | { | |
c96caced | 677 | VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); |
bd322087 | 678 | struct virtio_balloon_config config; |
973603a8 | 679 | uint32_t oldactual = dev->actual; |
463756d0 HZ |
680 | ram_addr_t vm_ram_size = get_current_ram_size(); |
681 | ||
e6baf613 | 682 | memcpy(&config, config_data, sizeof(struct virtio_balloon_config)); |
e54f1771 | 683 | dev->actual = le32_to_cpu(config.actual); |
973603a8 | 684 | if (dev->actual != oldactual) { |
463756d0 | 685 | qapi_event_send_balloon_change(vm_ram_size - |
3ab72385 | 686 | ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); |
973603a8 | 687 | } |
6adfdc5a | 688 | trace_virtio_balloon_set_config(dev->actual, oldactual); |
bd322087 AL |
689 | } |
690 | ||
9d5b731d JW |
691 | static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, |
692 | Error **errp) | |
bd322087 | 693 | { |
e3816255 DL |
694 | VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); |
695 | f |= dev->host_features; | |
40de55af | 696 | virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); |
c13c4153 | 697 | |
8172539d | 698 | return f; |
bd322087 AL |
699 | } |
700 | ||
96637bcd | 701 | static void virtio_balloon_stat(void *opaque, BalloonInfo *info) |
dce911c7 AS |
702 | { |
703 | VirtIOBalloon *dev = opaque; | |
463756d0 HZ |
704 | info->actual = get_current_ram_size() - ((uint64_t) dev->actual << |
705 | VIRTIO_BALLOON_PFN_SHIFT); | |
dce911c7 AS |
706 | } |
707 | ||
30fb2ca6 | 708 | static void virtio_balloon_to_target(void *opaque, ram_addr_t target) |
bd322087 | 709 | { |
c96caced FK |
710 | VirtIOBalloon *dev = VIRTIO_BALLOON(opaque); |
711 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); | |
463756d0 | 712 | ram_addr_t vm_ram_size = get_current_ram_size(); |
bd322087 | 713 | |
463756d0 HZ |
714 | if (target > vm_ram_size) { |
715 | target = vm_ram_size; | |
dce911c7 | 716 | } |
bd322087 | 717 | if (target) { |
463756d0 | 718 | dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT; |
c96caced | 719 | virtio_notify_config(vdev); |
bd322087 | 720 | } |
6adfdc5a | 721 | trace_virtio_balloon_to_target(target, dev->num_pages); |
bd322087 AL |
722 | } |
723 | ||
019518a8 | 724 | static int virtio_balloon_post_load_device(void *opaque, int version_id) |
9ea2511c | 725 | { |
019518a8 | 726 | VirtIOBalloon *s = VIRTIO_BALLOON(opaque); |
fecb48f7 PB |
727 | |
728 | if (balloon_stats_enabled(s)) { | |
729 | balloon_stats_change_timer(s, s->stats_poll_interval); | |
730 | } | |
bd322087 AL |
731 | return 0; |
732 | } | |
733 | ||
c13c4153 WW |
734 | static const VMStateDescription vmstate_virtio_balloon_free_page_report = { |
735 | .name = "virtio-balloon-device/free-page-report", | |
736 | .version_id = 1, | |
737 | .minimum_version_id = 1, | |
738 | .needed = virtio_balloon_free_page_support, | |
739 | .fields = (VMStateField[]) { | |
740 | VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon), | |
741 | VMSTATE_UINT32(free_page_report_status, VirtIOBalloon), | |
742 | VMSTATE_END_OF_LIST() | |
743 | } | |
744 | }; | |
745 | ||
019518a8 DDAG |
746 | static const VMStateDescription vmstate_virtio_balloon_device = { |
747 | .name = "virtio-balloon-device", | |
748 | .version_id = 1, | |
749 | .minimum_version_id = 1, | |
750 | .post_load = virtio_balloon_post_load_device, | |
751 | .fields = (VMStateField[]) { | |
752 | VMSTATE_UINT32(num_pages, VirtIOBalloon), | |
753 | VMSTATE_UINT32(actual, VirtIOBalloon), | |
754 | VMSTATE_END_OF_LIST() | |
755 | }, | |
c13c4153 WW |
756 | .subsections = (const VMStateDescription * []) { |
757 | &vmstate_virtio_balloon_free_page_report, | |
758 | NULL | |
759 | } | |
019518a8 DDAG |
760 | }; |
761 | ||
74def47c | 762 | static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) |
bd322087 | 763 | { |
74def47c | 764 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); |
a546fb17 | 765 | VirtIOBalloon *s = VIRTIO_BALLOON(dev); |
f76f6655 | 766 | int ret; |
bd322087 | 767 | |
e6baf613 LC |
768 | virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, |
769 | sizeof(struct virtio_balloon_config)); | |
bd322087 | 770 | |
f76f6655 AS |
771 | ret = qemu_add_balloon_handler(virtio_balloon_to_target, |
772 | virtio_balloon_stat, s); | |
5c7d0962 | 773 | |
1ab461b5 | 774 | if (ret < 0) { |
46abb812 | 775 | error_setg(errp, "Only one balloon device is supported"); |
a546fb17 | 776 | virtio_cleanup(vdev); |
74def47c | 777 | return; |
1ab461b5 | 778 | } |
f76f6655 | 779 | |
5c7d0962 FK |
780 | s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); |
781 | s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output); | |
782 | s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats); | |
bd322087 | 783 | |
c13c4153 WW |
784 | if (virtio_has_feature(s->host_features, |
785 | VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { | |
786 | s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, | |
787 | virtio_balloon_handle_free_page_vq); | |
788 | s->free_page_report_status = FREE_PAGE_REPORT_S_STOP; | |
789 | s->free_page_report_cmd_id = | |
790 | VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; | |
791 | s->free_page_report_notify.notify = | |
792 | virtio_balloon_free_page_report_notify; | |
793 | precopy_add_notifier(&s->free_page_report_notify); | |
794 | if (s->iothread) { | |
795 | object_ref(OBJECT(s->iothread)); | |
796 | s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), | |
797 | virtio_ballloon_get_free_page_hints, s); | |
798 | qemu_mutex_init(&s->free_page_lock); | |
799 | qemu_cond_init(&s->free_page_cond); | |
800 | s->block_iothread = false; | |
801 | } else { | |
802 | /* Simply disable this feature if the iothread wasn't created. */ | |
803 | s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT); | |
804 | virtio_error(vdev, "iothread is missing"); | |
805 | } | |
806 | } | |
38dbd48b | 807 | reset_stats(s); |
1ab461b5 FK |
808 | } |
809 | ||
306ec6c3 | 810 | static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp) |
1ab461b5 | 811 | { |
306ec6c3 AF |
812 | VirtIODevice *vdev = VIRTIO_DEVICE(dev); |
813 | VirtIOBalloon *s = VIRTIO_BALLOON(dev); | |
1ab461b5 | 814 | |
c13c4153 WW |
815 | if (virtio_balloon_free_page_support(s)) { |
816 | qemu_bh_delete(s->free_page_bh); | |
817 | virtio_balloon_free_page_stop(s); | |
818 | precopy_remove_notifier(&s->free_page_report_notify); | |
819 | } | |
1ab461b5 FK |
820 | balloon_stats_destroy_timer(s); |
821 | qemu_remove_balloon_handler(s); | |
6a1a8cc7 | 822 | virtio_cleanup(vdev); |
1ab461b5 FK |
823 | } |
824 | ||
4eae2a65 LP |
825 | static void virtio_balloon_device_reset(VirtIODevice *vdev) |
826 | { | |
827 | VirtIOBalloon *s = VIRTIO_BALLOON(vdev); | |
828 | ||
c13c4153 WW |
829 | if (virtio_balloon_free_page_support(s)) { |
830 | virtio_balloon_free_page_stop(s); | |
831 | } | |
832 | ||
4eae2a65 | 833 | if (s->stats_vq_elem != NULL) { |
27e57efe | 834 | virtqueue_unpop(s->svq, s->stats_vq_elem, 0); |
4eae2a65 LP |
835 | g_free(s->stats_vq_elem); |
836 | s->stats_vq_elem = NULL; | |
837 | } | |
838 | } | |
839 | ||
4a1e48be LP |
840 | static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) |
841 | { | |
842 | VirtIOBalloon *s = VIRTIO_BALLOON(vdev); | |
843 | ||
844 | if (!s->stats_vq_elem && vdev->vm_running && | |
845 | (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) { | |
846 | /* poll stats queue for the element we have discarded when the VM | |
847 | * was stopped */ | |
848 | virtio_balloon_receive_stats(vdev, s->svq); | |
849 | } | |
c13c4153 WW |
850 | |
851 | if (virtio_balloon_free_page_support(s)) { | |
852 | /* | |
853 | * The VM is woken up and the iothread was blocked, so signal it to | |
854 | * continue. | |
855 | */ | |
856 | if (vdev->vm_running && s->block_iothread) { | |
857 | qemu_mutex_lock(&s->free_page_lock); | |
858 | s->block_iothread = false; | |
859 | qemu_cond_signal(&s->free_page_cond); | |
860 | qemu_mutex_unlock(&s->free_page_lock); | |
861 | } | |
862 | ||
863 | /* The VM is stopped, block the iothread. */ | |
864 | if (!vdev->vm_running) { | |
865 | qemu_mutex_lock(&s->free_page_lock); | |
866 | s->block_iothread = true; | |
867 | qemu_mutex_unlock(&s->free_page_lock); | |
868 | } | |
869 | } | |
4a1e48be LP |
870 | } |
871 | ||
1190044e SZ |
872 | static void virtio_balloon_instance_init(Object *obj) |
873 | { | |
874 | VirtIOBalloon *s = VIRTIO_BALLOON(obj); | |
875 | ||
876 | object_property_add(obj, "guest-stats", "guest statistics", | |
877 | balloon_stats_get_all, NULL, NULL, s, NULL); | |
878 | ||
879 | object_property_add(obj, "guest-stats-polling-interval", "int", | |
880 | balloon_stats_get_poll_interval, | |
881 | balloon_stats_set_poll_interval, | |
882 | NULL, s, NULL); | |
883 | } | |
884 | ||
c5dc16b7 HP |
885 | static const VMStateDescription vmstate_virtio_balloon = { |
886 | .name = "virtio-balloon", | |
887 | .minimum_version_id = 1, | |
888 | .version_id = 1, | |
889 | .fields = (VMStateField[]) { | |
890 | VMSTATE_VIRTIO_DEVICE, | |
891 | VMSTATE_END_OF_LIST() | |
892 | }, | |
893 | }; | |
7f1ca9b2 | 894 | |
1ab461b5 | 895 | static Property virtio_balloon_properties[] = { |
e3816255 DL |
896 | DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features, |
897 | VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), | |
c13c4153 WW |
898 | DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, |
899 | VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), | |
900 | DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD, | |
901 | IOThread *), | |
1ab461b5 FK |
902 | DEFINE_PROP_END_OF_LIST(), |
903 | }; | |
904 | ||
905 | static void virtio_balloon_class_init(ObjectClass *klass, void *data) | |
906 | { | |
907 | DeviceClass *dc = DEVICE_CLASS(klass); | |
908 | VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); | |
74def47c | 909 | |
1ab461b5 | 910 | dc->props = virtio_balloon_properties; |
7f1ca9b2 | 911 | dc->vmsd = &vmstate_virtio_balloon; |
125ee0ed | 912 | set_bit(DEVICE_CATEGORY_MISC, dc->categories); |
74def47c | 913 | vdc->realize = virtio_balloon_device_realize; |
306ec6c3 | 914 | vdc->unrealize = virtio_balloon_device_unrealize; |
4eae2a65 | 915 | vdc->reset = virtio_balloon_device_reset; |
1ab461b5 FK |
916 | vdc->get_config = virtio_balloon_get_config; |
917 | vdc->set_config = virtio_balloon_set_config; | |
918 | vdc->get_features = virtio_balloon_get_features; | |
4a1e48be | 919 | vdc->set_status = virtio_balloon_set_status; |
019518a8 | 920 | vdc->vmsd = &vmstate_virtio_balloon_device; |
1ab461b5 FK |
921 | } |
922 | ||
923 | static const TypeInfo virtio_balloon_info = { | |
924 | .name = TYPE_VIRTIO_BALLOON, | |
925 | .parent = TYPE_VIRTIO_DEVICE, | |
926 | .instance_size = sizeof(VirtIOBalloon), | |
1190044e | 927 | .instance_init = virtio_balloon_instance_init, |
1ab461b5 FK |
928 | .class_init = virtio_balloon_class_init, |
929 | }; | |
930 | ||
931 | static void virtio_register_types(void) | |
932 | { | |
933 | type_register_static(&virtio_balloon_info); | |
934 | } | |
935 | ||
936 | type_init(virtio_register_types) |