]>
Commit | Line | Data |
---|---|---|
bd322087 AL |
1 | /* |
2 | * Virtio Block Device | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include "qemu-common.h" | |
15 | #include "virtio.h" | |
16 | #include "pc.h" | |
17 | #include "sysemu.h" | |
18 | #include "cpu.h" | |
625a5bef | 19 | #include "monitor.h" |
bd322087 AL |
20 | #include "balloon.h" |
21 | #include "virtio-balloon.h" | |
22 | #include "kvm.h" | |
625a5bef AL |
23 | #include "qlist.h" |
24 | #include "qint.h" | |
25 | #include "qstring.h" | |
bd322087 AL |
26 | |
27 | #if defined(__linux__) | |
28 | #include <sys/mman.h> | |
29 | #endif | |
30 | ||
31 | typedef struct VirtIOBalloon | |
32 | { | |
33 | VirtIODevice vdev; | |
625a5bef | 34 | VirtQueue *ivq, *dvq, *svq; |
bd322087 AL |
35 | uint32_t num_pages; |
36 | uint32_t actual; | |
625a5bef AL |
37 | uint64_t stats[VIRTIO_BALLOON_S_NR]; |
38 | VirtQueueElement stats_vq_elem; | |
39 | size_t stats_vq_offset; | |
40 | MonitorCompletion *stats_callback; | |
41 | void *stats_opaque_callback_data; | |
bd322087 AL |
42 | } VirtIOBalloon; |
43 | ||
44 | static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev) | |
45 | { | |
46 | return (VirtIOBalloon *)vdev; | |
47 | } | |
48 | ||
49 | static void balloon_page(void *addr, int deflate) | |
50 | { | |
51 | #if defined(__linux__) | |
52 | if (!kvm_enabled() || kvm_has_sync_mmu()) | |
53 | madvise(addr, TARGET_PAGE_SIZE, | |
54 | deflate ? MADV_WILLNEED : MADV_DONTNEED); | |
55 | #endif | |
56 | } | |
57 | ||
625a5bef AL |
58 | /* |
59 | * reset_stats - Mark all items in the stats array as unset | |
60 | * | |
61 | * This function needs to be called at device intialization and before | |
62 | * before updating to a set of newly-generated stats. This will ensure that no | |
63 | * stale values stick around in case the guest reports a subset of the supported | |
64 | * statistics. | |
65 | */ | |
66 | static inline void reset_stats(VirtIOBalloon *dev) | |
67 | { | |
68 | int i; | |
69 | for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1); | |
70 | } | |
71 | ||
72 | static void stat_put(QDict *dict, const char *label, uint64_t val) | |
73 | { | |
74 | if (val != -1) | |
75 | qdict_put(dict, label, qint_from_int(val)); | |
76 | } | |
77 | ||
78 | static QObject *get_stats_qobject(VirtIOBalloon *dev) | |
79 | { | |
80 | QDict *dict = qdict_new(); | |
81 | uint32_t actual = ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT); | |
82 | ||
83 | stat_put(dict, "actual", actual); | |
84 | stat_put(dict, "mem_swapped_in", dev->stats[VIRTIO_BALLOON_S_SWAP_IN]); | |
85 | stat_put(dict, "mem_swapped_out", dev->stats[VIRTIO_BALLOON_S_SWAP_OUT]); | |
86 | stat_put(dict, "major_page_faults", dev->stats[VIRTIO_BALLOON_S_MAJFLT]); | |
87 | stat_put(dict, "minor_page_faults", dev->stats[VIRTIO_BALLOON_S_MINFLT]); | |
88 | stat_put(dict, "free_mem", dev->stats[VIRTIO_BALLOON_S_MEMFREE]); | |
89 | stat_put(dict, "total_mem", dev->stats[VIRTIO_BALLOON_S_MEMTOT]); | |
90 | ||
91 | return QOBJECT(dict); | |
92 | } | |
93 | ||
bd322087 AL |
94 | /* FIXME: once we do a virtio refactoring, this will get subsumed into common |
95 | * code */ | |
96 | static size_t memcpy_from_iovector(void *data, size_t offset, size_t size, | |
97 | struct iovec *iov, int iovlen) | |
98 | { | |
99 | int i; | |
100 | uint8_t *ptr = data; | |
101 | size_t iov_off = 0; | |
102 | size_t data_off = 0; | |
103 | ||
104 | for (i = 0; i < iovlen && size; i++) { | |
105 | if (offset < (iov_off + iov[i].iov_len)) { | |
106 | size_t len = MIN((iov_off + iov[i].iov_len) - offset , size); | |
107 | ||
108 | memcpy(ptr + data_off, iov[i].iov_base + (offset - iov_off), len); | |
109 | ||
110 | data_off += len; | |
111 | offset += len; | |
112 | size -= len; | |
113 | } | |
114 | ||
115 | iov_off += iov[i].iov_len; | |
116 | } | |
117 | ||
118 | return data_off; | |
119 | } | |
120 | ||
121 | static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) | |
122 | { | |
123 | VirtIOBalloon *s = to_virtio_balloon(vdev); | |
124 | VirtQueueElement elem; | |
125 | ||
126 | while (virtqueue_pop(vq, &elem)) { | |
127 | size_t offset = 0; | |
128 | uint32_t pfn; | |
129 | ||
130 | while (memcpy_from_iovector(&pfn, offset, 4, | |
131 | elem.out_sg, elem.out_num) == 4) { | |
c227f099 AL |
132 | ram_addr_t pa; |
133 | ram_addr_t addr; | |
bd322087 | 134 | |
c227f099 | 135 | pa = (ram_addr_t)ldl_p(&pfn) << VIRTIO_BALLOON_PFN_SHIFT; |
bd322087 AL |
136 | offset += 4; |
137 | ||
138 | addr = cpu_get_physical_page_desc(pa); | |
139 | if ((addr & ~TARGET_PAGE_MASK) != IO_MEM_RAM) | |
140 | continue; | |
141 | ||
5c130f65 PB |
142 | /* Using qemu_get_ram_ptr is bending the rules a bit, but |
143 | should be OK because we only want a single page. */ | |
144 | balloon_page(qemu_get_ram_ptr(addr), !!(vq == s->dvq)); | |
bd322087 AL |
145 | } |
146 | ||
147 | virtqueue_push(vq, &elem, offset); | |
148 | virtio_notify(vdev, vq); | |
149 | } | |
150 | } | |
151 | ||
625a5bef AL |
152 | static void complete_stats_request(VirtIOBalloon *vb) |
153 | { | |
154 | QObject *stats; | |
155 | ||
156 | if (!vb->stats_opaque_callback_data) | |
157 | return; | |
158 | ||
159 | stats = get_stats_qobject(vb); | |
160 | vb->stats_callback(vb->stats_opaque_callback_data, stats); | |
161 | qobject_decref(stats); | |
162 | vb->stats_opaque_callback_data = NULL; | |
163 | vb->stats_callback = NULL; | |
164 | } | |
165 | ||
166 | static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) | |
167 | { | |
168 | VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev); | |
169 | VirtQueueElement *elem = &s->stats_vq_elem; | |
170 | VirtIOBalloonStat stat; | |
171 | size_t offset = 0; | |
172 | ||
173 | if (!virtqueue_pop(vq, elem)) { | |
174 | return; | |
175 | } | |
176 | ||
177 | /* Initialize the stats to get rid of any stale values. This is only | |
178 | * needed to handle the case where a guest supports fewer stats than it | |
179 | * used to (ie. it has booted into an old kernel). | |
180 | */ | |
181 | reset_stats(s); | |
182 | ||
183 | while (memcpy_from_iovector(&stat, offset, sizeof(stat), elem->out_sg, | |
184 | elem->out_num) == sizeof(stat)) { | |
185 | uint16_t tag = tswap16(stat.tag); | |
186 | uint64_t val = tswap64(stat.val); | |
187 | ||
188 | offset += sizeof(stat); | |
189 | if (tag < VIRTIO_BALLOON_S_NR) | |
190 | s->stats[tag] = val; | |
191 | } | |
192 | s->stats_vq_offset = offset; | |
193 | ||
194 | complete_stats_request(s); | |
195 | } | |
196 | ||
bd322087 AL |
197 | static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) |
198 | { | |
199 | VirtIOBalloon *dev = to_virtio_balloon(vdev); | |
200 | struct virtio_balloon_config config; | |
201 | ||
202 | config.num_pages = cpu_to_le32(dev->num_pages); | |
203 | config.actual = cpu_to_le32(dev->actual); | |
204 | ||
205 | memcpy(config_data, &config, 8); | |
206 | } | |
207 | ||
208 | static void virtio_balloon_set_config(VirtIODevice *vdev, | |
209 | const uint8_t *config_data) | |
210 | { | |
211 | VirtIOBalloon *dev = to_virtio_balloon(vdev); | |
212 | struct virtio_balloon_config config; | |
213 | memcpy(&config, config_data, 8); | |
214 | dev->actual = config.actual; | |
215 | } | |
216 | ||
8172539d | 217 | static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f) |
bd322087 | 218 | { |
625a5bef | 219 | f |= (1 << VIRTIO_BALLOON_F_STATS_VQ); |
8172539d | 220 | return f; |
bd322087 AL |
221 | } |
222 | ||
625a5bef AL |
223 | static void virtio_balloon_to_target(void *opaque, ram_addr_t target, |
224 | MonitorCompletion cb, void *cb_data) | |
bd322087 AL |
225 | { |
226 | VirtIOBalloon *dev = opaque; | |
227 | ||
228 | if (target > ram_size) | |
229 | target = ram_size; | |
230 | ||
231 | if (target) { | |
232 | dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT; | |
233 | virtio_notify_config(&dev->vdev); | |
625a5bef AL |
234 | } else { |
235 | /* For now, only allow one request at a time. This restriction can be | |
236 | * removed later by queueing callback and data pairs. | |
237 | */ | |
238 | if (dev->stats_callback != NULL) { | |
239 | return; | |
240 | } | |
241 | dev->stats_callback = cb; | |
242 | dev->stats_opaque_callback_data = cb_data; | |
243 | if (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) { | |
244 | virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset); | |
245 | virtio_notify(&dev->vdev, dev->svq); | |
246 | } else { | |
247 | /* Stats are not supported. Clear out any stale values that might | |
248 | * have been set by a more featureful guest kernel. | |
249 | */ | |
250 | reset_stats(dev); | |
251 | complete_stats_request(dev); | |
252 | } | |
bd322087 | 253 | } |
bd322087 AL |
254 | } |
255 | ||
256 | static void virtio_balloon_save(QEMUFile *f, void *opaque) | |
257 | { | |
258 | VirtIOBalloon *s = opaque; | |
259 | ||
260 | virtio_save(&s->vdev, f); | |
261 | ||
262 | qemu_put_be32(f, s->num_pages); | |
263 | qemu_put_be32(f, s->actual); | |
625a5bef AL |
264 | qemu_put_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement)); |
265 | qemu_put_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t)); | |
266 | qemu_put_buffer(f, (uint8_t *)&s->stats_callback, sizeof(MonitorCompletion)); | |
267 | qemu_put_buffer(f, (uint8_t *)&s->stats_opaque_callback_data, sizeof(void)); | |
bd322087 AL |
268 | } |
269 | ||
270 | static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) | |
271 | { | |
272 | VirtIOBalloon *s = opaque; | |
273 | ||
274 | if (version_id != 1) | |
275 | return -EINVAL; | |
276 | ||
277 | virtio_load(&s->vdev, f); | |
278 | ||
279 | s->num_pages = qemu_get_be32(f); | |
280 | s->actual = qemu_get_be32(f); | |
625a5bef AL |
281 | qemu_get_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement)); |
282 | qemu_get_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t)); | |
283 | qemu_get_buffer(f, (uint8_t *)&s->stats_callback, sizeof(MonitorCompletion)); | |
284 | qemu_get_buffer(f, (uint8_t *)&s->stats_opaque_callback_data, sizeof(void)); | |
bd322087 AL |
285 | |
286 | return 0; | |
287 | } | |
288 | ||
53c25cea | 289 | VirtIODevice *virtio_balloon_init(DeviceState *dev) |
bd322087 AL |
290 | { |
291 | VirtIOBalloon *s; | |
292 | ||
53c25cea PB |
293 | s = (VirtIOBalloon *)virtio_common_init("virtio-balloon", |
294 | VIRTIO_ID_BALLOON, | |
295 | 8, sizeof(VirtIOBalloon)); | |
bd322087 AL |
296 | |
297 | s->vdev.get_config = virtio_balloon_get_config; | |
298 | s->vdev.set_config = virtio_balloon_set_config; | |
299 | s->vdev.get_features = virtio_balloon_get_features; | |
300 | ||
301 | s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); | |
302 | s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output); | |
625a5bef | 303 | s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats); |
bd322087 | 304 | |
625a5bef | 305 | reset_stats(s); |
bd322087 AL |
306 | qemu_add_balloon_handler(virtio_balloon_to_target, s); |
307 | ||
308 | register_savevm("virtio-balloon", -1, 1, virtio_balloon_save, virtio_balloon_load, s); | |
309 | ||
53c25cea | 310 | return &s->vdev; |
bd322087 | 311 | } |