]>
Commit | Line | Data |
---|---|---|
1f070489 IM |
1 | /* |
2 | * QEMU Host Memory Backend | |
3 | * | |
4 | * Copyright (C) 2013-2014 Red Hat Inc | |
5 | * | |
6 | * Authors: | |
7 | * Igor Mammedov <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
9c058332 | 12 | #include "qemu/osdep.h" |
1f070489 | 13 | #include "sysemu/hostmem.h" |
6b269967 | 14 | #include "hw/boards.h" |
da34e65c | 15 | #include "qapi/error.h" |
1f070489 | 16 | #include "qapi/visitor.h" |
4cf1b76b HT |
17 | #include "qapi-types.h" |
18 | #include "qapi-visit.h" | |
1f070489 IM |
19 | #include "qemu/config-file.h" |
20 | #include "qom/object_interfaces.h" | |
21 | ||
4cf1b76b HT |
22 | #ifdef CONFIG_NUMA |
23 | #include <numaif.h> | |
24 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); | |
25 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); | |
26 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); | |
27 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); | |
28 | #endif | |
29 | ||
1f070489 | 30 | static void |
d7bce999 EB |
31 | host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, |
32 | void *opaque, Error **errp) | |
1f070489 IM |
33 | { |
34 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
35 | uint64_t value = backend->size; | |
36 | ||
51e72bc1 | 37 | visit_type_size(v, name, &value, errp); |
1f070489 IM |
38 | } |
39 | ||
40 | static void | |
d7bce999 EB |
41 | host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, |
42 | void *opaque, Error **errp) | |
1f070489 IM |
43 | { |
44 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
45 | Error *local_err = NULL; | |
46 | uint64_t value; | |
47 | ||
6f4c60e4 | 48 | if (host_memory_backend_mr_inited(backend)) { |
1f070489 IM |
49 | error_setg(&local_err, "cannot change property value"); |
50 | goto out; | |
51 | } | |
52 | ||
51e72bc1 | 53 | visit_type_size(v, name, &value, &local_err); |
1f070489 IM |
54 | if (local_err) { |
55 | goto out; | |
56 | } | |
57 | if (!value) { | |
58 | error_setg(&local_err, "Property '%s.%s' doesn't take value '%" | |
59 | PRIu64 "'", object_get_typename(obj), name, value); | |
60 | goto out; | |
61 | } | |
62 | backend->size = value; | |
63 | out: | |
64 | error_propagate(errp, local_err); | |
65 | } | |
66 | ||
4cf1b76b | 67 | static void |
d7bce999 EB |
68 | host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, |
69 | void *opaque, Error **errp) | |
4cf1b76b HT |
70 | { |
71 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
72 | uint16List *host_nodes = NULL; | |
73 | uint16List **node = &host_nodes; | |
74 | unsigned long value; | |
75 | ||
76 | value = find_first_bit(backend->host_nodes, MAX_NODES); | |
77 | if (value == MAX_NODES) { | |
658ae5a7 | 78 | return; |
4cf1b76b HT |
79 | } |
80 | ||
658ae5a7 MA |
81 | *node = g_malloc0(sizeof(**node)); |
82 | (*node)->value = value; | |
83 | node = &(*node)->next; | |
84 | ||
4cf1b76b HT |
85 | do { |
86 | value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); | |
87 | if (value == MAX_NODES) { | |
88 | break; | |
89 | } | |
90 | ||
658ae5a7 MA |
91 | *node = g_malloc0(sizeof(**node)); |
92 | (*node)->value = value; | |
93 | node = &(*node)->next; | |
4cf1b76b HT |
94 | } while (true); |
95 | ||
51e72bc1 | 96 | visit_type_uint16List(v, name, &host_nodes, errp); |
4cf1b76b HT |
97 | } |
98 | ||
99 | static void | |
d7bce999 EB |
100 | host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, |
101 | void *opaque, Error **errp) | |
4cf1b76b HT |
102 | { |
103 | #ifdef CONFIG_NUMA | |
104 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
105 | uint16List *l = NULL; | |
106 | ||
51e72bc1 | 107 | visit_type_uint16List(v, name, &l, errp); |
4cf1b76b HT |
108 | |
109 | while (l) { | |
110 | bitmap_set(backend->host_nodes, l->value, 1); | |
111 | l = l->next; | |
112 | } | |
113 | #else | |
114 | error_setg(errp, "NUMA node binding are not supported by this QEMU"); | |
115 | #endif | |
116 | } | |
117 | ||
a3590dac DB |
118 | static int |
119 | host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) | |
4cf1b76b HT |
120 | { |
121 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
a3590dac | 122 | return backend->policy; |
4cf1b76b HT |
123 | } |
124 | ||
125 | static void | |
a3590dac | 126 | host_memory_backend_set_policy(Object *obj, int policy, Error **errp) |
4cf1b76b HT |
127 | { |
128 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
4cf1b76b HT |
129 | backend->policy = policy; |
130 | ||
131 | #ifndef CONFIG_NUMA | |
132 | if (policy != HOST_MEM_POLICY_DEFAULT) { | |
133 | error_setg(errp, "NUMA policies are not supported by this QEMU"); | |
134 | } | |
135 | #endif | |
136 | } | |
137 | ||
605d0a94 PB |
138 | static bool host_memory_backend_get_merge(Object *obj, Error **errp) |
139 | { | |
140 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
141 | ||
142 | return backend->merge; | |
143 | } | |
144 | ||
145 | static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) | |
146 | { | |
147 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
148 | ||
6f4c60e4 | 149 | if (!host_memory_backend_mr_inited(backend)) { |
605d0a94 PB |
150 | backend->merge = value; |
151 | return; | |
152 | } | |
153 | ||
154 | if (value != backend->merge) { | |
155 | void *ptr = memory_region_get_ram_ptr(&backend->mr); | |
156 | uint64_t sz = memory_region_size(&backend->mr); | |
157 | ||
158 | qemu_madvise(ptr, sz, | |
159 | value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); | |
160 | backend->merge = value; | |
161 | } | |
162 | } | |
163 | ||
164 | static bool host_memory_backend_get_dump(Object *obj, Error **errp) | |
165 | { | |
166 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
167 | ||
168 | return backend->dump; | |
169 | } | |
170 | ||
171 | static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) | |
172 | { | |
173 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
174 | ||
6f4c60e4 | 175 | if (!host_memory_backend_mr_inited(backend)) { |
605d0a94 PB |
176 | backend->dump = value; |
177 | return; | |
178 | } | |
179 | ||
180 | if (value != backend->dump) { | |
181 | void *ptr = memory_region_get_ram_ptr(&backend->mr); | |
182 | uint64_t sz = memory_region_size(&backend->mr); | |
183 | ||
184 | qemu_madvise(ptr, sz, | |
185 | value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); | |
186 | backend->dump = value; | |
187 | } | |
188 | } | |
189 | ||
a35ba7be PB |
190 | static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) |
191 | { | |
192 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
193 | ||
194 | return backend->prealloc || backend->force_prealloc; | |
195 | } | |
196 | ||
197 | static void host_memory_backend_set_prealloc(Object *obj, bool value, | |
198 | Error **errp) | |
199 | { | |
056b68af | 200 | Error *local_err = NULL; |
a35ba7be PB |
201 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
202 | ||
203 | if (backend->force_prealloc) { | |
204 | if (value) { | |
205 | error_setg(errp, | |
206 | "remove -mem-prealloc to use the prealloc property"); | |
207 | return; | |
208 | } | |
209 | } | |
210 | ||
6f4c60e4 | 211 | if (!host_memory_backend_mr_inited(backend)) { |
a35ba7be PB |
212 | backend->prealloc = value; |
213 | return; | |
214 | } | |
215 | ||
216 | if (value && !backend->prealloc) { | |
217 | int fd = memory_region_get_fd(&backend->mr); | |
218 | void *ptr = memory_region_get_ram_ptr(&backend->mr); | |
219 | uint64_t sz = memory_region_size(&backend->mr); | |
220 | ||
1e356fc1 | 221 | os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err); |
056b68af IM |
222 | if (local_err) { |
223 | error_propagate(errp, local_err); | |
224 | return; | |
225 | } | |
a35ba7be PB |
226 | backend->prealloc = true; |
227 | } | |
228 | } | |
229 | ||
58f4662c | 230 | static void host_memory_backend_init(Object *obj) |
1f070489 | 231 | { |
605d0a94 | 232 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
6b269967 | 233 | MachineState *machine = MACHINE(qdev_get_machine()); |
605d0a94 | 234 | |
6b269967 EH |
235 | backend->merge = machine_mem_merge(machine); |
236 | backend->dump = machine_dump_guest_core(machine); | |
a35ba7be | 237 | backend->prealloc = mem_prealloc; |
1f070489 IM |
238 | } |
239 | ||
4728b574 PX |
240 | bool host_memory_backend_mr_inited(HostMemoryBackend *backend) |
241 | { | |
242 | /* | |
243 | * NOTE: We forbid zero-length memory backend, so here zero means | |
244 | * "we haven't inited the backend memory region yet". | |
245 | */ | |
246 | return memory_region_size(&backend->mr) != 0; | |
247 | } | |
248 | ||
1f070489 IM |
249 | MemoryRegion * |
250 | host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) | |
251 | { | |
6f4c60e4 | 252 | return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; |
1f070489 IM |
253 | } |
254 | ||
2aece63c XG |
255 | void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) |
256 | { | |
257 | backend->is_mapped = mapped; | |
258 | } | |
259 | ||
260 | bool host_memory_backend_is_mapped(HostMemoryBackend *backend) | |
261 | { | |
262 | return backend->is_mapped; | |
263 | } | |
264 | ||
bd9262d9 HT |
265 | static void |
266 | host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) | |
267 | { | |
268 | HostMemoryBackend *backend = MEMORY_BACKEND(uc); | |
269 | HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); | |
605d0a94 PB |
270 | Error *local_err = NULL; |
271 | void *ptr; | |
272 | uint64_t sz; | |
bd9262d9 HT |
273 | |
274 | if (bc->alloc) { | |
605d0a94 PB |
275 | bc->alloc(backend, &local_err); |
276 | if (local_err) { | |
056b68af | 277 | goto out; |
605d0a94 PB |
278 | } |
279 | ||
280 | ptr = memory_region_get_ram_ptr(&backend->mr); | |
281 | sz = memory_region_size(&backend->mr); | |
282 | ||
283 | if (backend->merge) { | |
284 | qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); | |
285 | } | |
286 | if (!backend->dump) { | |
287 | qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); | |
288 | } | |
4cf1b76b HT |
289 | #ifdef CONFIG_NUMA |
290 | unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); | |
291 | /* lastbit == MAX_NODES means maxnode = 0 */ | |
292 | unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); | |
293 | /* ensure policy won't be ignored in case memory is preallocated | |
294 | * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so | |
295 | * this doesn't catch hugepage case. */ | |
288d3322 | 296 | unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; |
4cf1b76b HT |
297 | |
298 | /* check for invalid host-nodes and policies and give more verbose | |
299 | * error messages than mbind(). */ | |
300 | if (maxnode && backend->policy == MPOL_DEFAULT) { | |
301 | error_setg(errp, "host-nodes must be empty for policy default," | |
302 | " or you should explicitly specify a policy other" | |
303 | " than default"); | |
304 | return; | |
305 | } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { | |
306 | error_setg(errp, "host-nodes must be set for policy %s", | |
977c736f | 307 | HostMemPolicy_str(backend->policy)); |
4cf1b76b HT |
308 | return; |
309 | } | |
310 | ||
311 | /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 | |
312 | * as argument to mbind() due to an old Linux bug (feature?) which | |
313 | * cuts off the last specified node. This means backend->host_nodes | |
314 | * must have MAX_NODES+1 bits available. | |
315 | */ | |
316 | assert(sizeof(backend->host_nodes) >= | |
317 | BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); | |
318 | assert(maxnode <= MAX_NODES); | |
319 | if (mbind(ptr, sz, backend->policy, | |
320 | maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { | |
a3567ba1 PF |
321 | if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { |
322 | error_setg_errno(errp, errno, | |
323 | "cannot bind memory to host NUMA nodes"); | |
324 | return; | |
325 | } | |
4cf1b76b HT |
326 | } |
327 | #endif | |
328 | /* Preallocate memory after the NUMA policy has been instantiated. | |
329 | * This is necessary to guarantee memory is allocated with | |
330 | * specified NUMA policy in place. | |
331 | */ | |
a35ba7be | 332 | if (backend->prealloc) { |
056b68af | 333 | os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, |
1e356fc1 | 334 | smp_cpus, &local_err); |
056b68af IM |
335 | if (local_err) { |
336 | goto out; | |
337 | } | |
a35ba7be | 338 | } |
bd9262d9 | 339 | } |
056b68af IM |
340 | out: |
341 | error_propagate(errp, local_err); | |
bd9262d9 HT |
342 | } |
343 | ||
36bce5ca | 344 | static bool |
3beacfb9 | 345 | host_memory_backend_can_be_deleted(UserCreatable *uc) |
36bce5ca | 346 | { |
2aece63c | 347 | if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { |
36bce5ca LM |
348 | return false; |
349 | } else { | |
350 | return true; | |
351 | } | |
352 | } | |
353 | ||
e1ff3c67 IM |
354 | static char *get_id(Object *o, Error **errp) |
355 | { | |
356 | HostMemoryBackend *backend = MEMORY_BACKEND(o); | |
357 | ||
358 | return g_strdup(backend->id); | |
359 | } | |
360 | ||
361 | static void set_id(Object *o, const char *str, Error **errp) | |
362 | { | |
363 | HostMemoryBackend *backend = MEMORY_BACKEND(o); | |
364 | ||
365 | if (backend->id) { | |
366 | error_setg(errp, "cannot change property value"); | |
367 | return; | |
368 | } | |
369 | backend->id = g_strdup(str); | |
370 | } | |
371 | ||
bd9262d9 HT |
372 | static void |
373 | host_memory_backend_class_init(ObjectClass *oc, void *data) | |
374 | { | |
375 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); | |
376 | ||
377 | ucc->complete = host_memory_backend_memory_complete; | |
36bce5ca | 378 | ucc->can_be_deleted = host_memory_backend_can_be_deleted; |
e62834ca EH |
379 | |
380 | object_class_property_add_bool(oc, "merge", | |
381 | host_memory_backend_get_merge, | |
382 | host_memory_backend_set_merge, &error_abort); | |
383 | object_class_property_add_bool(oc, "dump", | |
384 | host_memory_backend_get_dump, | |
385 | host_memory_backend_set_dump, &error_abort); | |
386 | object_class_property_add_bool(oc, "prealloc", | |
387 | host_memory_backend_get_prealloc, | |
388 | host_memory_backend_set_prealloc, &error_abort); | |
389 | object_class_property_add(oc, "size", "int", | |
390 | host_memory_backend_get_size, | |
391 | host_memory_backend_set_size, | |
392 | NULL, NULL, &error_abort); | |
393 | object_class_property_add(oc, "host-nodes", "int", | |
394 | host_memory_backend_get_host_nodes, | |
395 | host_memory_backend_set_host_nodes, | |
396 | NULL, NULL, &error_abort); | |
397 | object_class_property_add_enum(oc, "policy", "HostMemPolicy", | |
f7abe0ec | 398 | &HostMemPolicy_lookup, |
e62834ca EH |
399 | host_memory_backend_get_policy, |
400 | host_memory_backend_set_policy, &error_abort); | |
e1ff3c67 IM |
401 | object_class_property_add_str(oc, "id", get_id, set_id, &error_abort); |
402 | } | |
403 | ||
404 | static void host_memory_backend_finalize(Object *o) | |
405 | { | |
406 | HostMemoryBackend *backend = MEMORY_BACKEND(o); | |
407 | g_free(backend->id); | |
bd9262d9 HT |
408 | } |
409 | ||
58f4662c | 410 | static const TypeInfo host_memory_backend_info = { |
1f070489 IM |
411 | .name = TYPE_MEMORY_BACKEND, |
412 | .parent = TYPE_OBJECT, | |
413 | .abstract = true, | |
414 | .class_size = sizeof(HostMemoryBackendClass), | |
bd9262d9 | 415 | .class_init = host_memory_backend_class_init, |
1f070489 | 416 | .instance_size = sizeof(HostMemoryBackend), |
58f4662c | 417 | .instance_init = host_memory_backend_init, |
e1ff3c67 | 418 | .instance_finalize = host_memory_backend_finalize, |
1f070489 IM |
419 | .interfaces = (InterfaceInfo[]) { |
420 | { TYPE_USER_CREATABLE }, | |
421 | { } | |
422 | } | |
423 | }; | |
424 | ||
425 | static void register_types(void) | |
426 | { | |
58f4662c | 427 | type_register_static(&host_memory_backend_info); |
1f070489 IM |
428 | } |
429 | ||
430 | type_init(register_types); |