]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a50777c7 DM |
2 | /****************************************************************************** |
3 | * Xen selfballoon driver (and optional frontswap self-shrinking driver) | |
4 | * | |
5 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. | |
6 | * | |
7 | * This code complements the cleancache and frontswap patchsets to optimize | |
8 | * support for Xen Transcendent Memory ("tmem"). The policy it implements | |
9 | * is rudimentary and will likely improve over time, but it does work well | |
10 | * enough today. | |
11 | * | |
12 | * Two functionalities are implemented here which both use "control theory" | |
13 | * (feedback) to optimize memory utilization. In a virtualized environment | |
14 | * such as Xen, RAM is often a scarce resource and we would like to ensure | |
15 | * that each of a possibly large number of virtual machines is using RAM | |
16 | * efficiently, i.e. using as little as possible when under light load | |
17 | * and obtaining as much as possible when memory demands are high. | |
18 | * Since RAM needs vary highly dynamically and sometimes dramatically, | |
19 | * "hysteresis" is used, that is, memory target is determined not just | |
20 | * on current data but also on past data stored in the system. | |
21 | * | |
22 | * "Selfballooning" creates memory pressure by managing the Xen balloon | |
23 | * driver to decrease and increase available kernel memory, driven | |
24 | * largely by the target value of "Committed_AS" (see /proc/meminfo). | |
25 | * Since Committed_AS does not account for clean mapped pages (i.e. pages | |
26 | * in RAM that are identical to pages on disk), selfballooning has the | |
27 | * affect of pushing less frequently used clean pagecache pages out of | |
28 | * kernel RAM and, presumably using cleancache, into Xen tmem where | |
29 | * Xen can more efficiently optimize RAM utilization for such pages. | |
30 | * | |
31 | * When kernel memory demand unexpectedly increases faster than Xen, via | |
32 | * the selfballoon driver, is able to (or chooses to) provide usable RAM, | |
33 | * the kernel may invoke swapping. In most cases, frontswap is able | |
34 | * to absorb this swapping into Xen tmem. However, due to the fact | |
35 | * that the kernel swap subsystem assumes swapping occurs to a disk, | |
36 | * swapped pages may sit on the disk for a very long time; even if | |
37 | * the kernel knows the page will never be used again. This is because | |
38 | * the disk space costs very little and can be overwritten when | |
39 | * necessary. When such stale pages are in frontswap, however, they | |
40 | * are taking up valuable real estate. "Frontswap selfshrinking" works | |
41 | * to resolve this: When frontswap activity is otherwise stable | |
42 | * and the guest kernel is not under memory pressure, the "frontswap | |
43 | * selfshrinking" accounts for this by providing pressure to remove some | |
44 | * pages from frontswap and return them to kernel memory. | |
45 | * | |
46 | * For both "selfballooning" and "frontswap-selfshrinking", a worker | |
47 | * thread is used and sysfs tunables are provided to adjust the frequency | |
48 | * and rate of adjustments to achieve the goal, as well as to disable one | |
49 | * or both functions independently. | |
50 | * | |
51 | * While some argue that this functionality can and should be implemented | |
52 | * in userspace, it has been observed that bad things happen (e.g. OOMs). | |
53 | * | |
54 | * System configuration note: Selfballooning should not be enabled on | |
55 | * systems without a sufficiently large swap device configured; for best | |
56 | * results, it is recommended that total swap be increased by the size | |
37d46e15 KRW |
57 | * of the guest memory. Note, that selfballooning should be disabled by default |
58 | * if frontswap is not configured. Similarly selfballooning should be enabled | |
59 | * by default if frontswap is configured and can be disabled with the | |
60 | * "tmem.selfballooning=0" kernel boot option. Finally, when frontswap is | |
61 | * configured, frontswap-selfshrinking can be disabled with the | |
62 | * "tmem.selfshrink=0" kernel boot option. | |
a50777c7 DM |
63 | * |
64 | * Selfballooning is disallowed in domain0 and force-disabled. | |
65 | * | |
66 | */ | |
67 | ||
283c0972 JP |
68 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
69 | ||
a50777c7 | 70 | #include <linux/kernel.h> |
38a1ed4f DM |
71 | #include <linux/bootmem.h> |
72 | #include <linux/swap.h> | |
a50777c7 DM |
73 | #include <linux/mm.h> |
74 | #include <linux/mman.h> | |
0642d2ed | 75 | #include <linux/workqueue.h> |
cb0c05c5 | 76 | #include <linux/device.h> |
a50777c7 | 77 | #include <xen/balloon.h> |
a50777c7 | 78 | #include <xen/tmem.h> |
0642d2ed | 79 | #include <xen/xen.h> |
a50777c7 DM |
80 | |
81 | /* Enable/disable with sysfs. */ | |
82 | static int xen_selfballooning_enabled __read_mostly; | |
83 | ||
84 | /* | |
85 | * Controls rate at which memory target (this iteration) approaches | |
86 | * ultimate goal when memory need is increasing (up-hysteresis) or | |
87 | * decreasing (down-hysteresis). Higher values of hysteresis cause | |
88 | * slower increases/decreases. The default values for the various | |
89 | * parameters were deemed reasonable by experimentation, may be | |
90 | * workload-dependent, and can all be adjusted via sysfs. | |
91 | */ | |
92 | static unsigned int selfballoon_downhysteresis __read_mostly = 8; | |
93 | static unsigned int selfballoon_uphysteresis __read_mostly = 1; | |
94 | ||
95 | /* In HZ, controls frequency of worker invocation. */ | |
96 | static unsigned int selfballoon_interval __read_mostly = 5; | |
97 | ||
38a1ed4f DM |
98 | /* |
99 | * Minimum usable RAM in MB for selfballooning target for balloon. | |
100 | * If non-zero, it is added to totalreserve_pages and self-ballooning | |
101 | * will not balloon below the sum. If zero, a piecewise linear function | |
102 | * is calculated as a minimum and added to totalreserve_pages. Note that | |
103 | * setting this value indiscriminately may cause OOMs and crashes. | |
104 | */ | |
105 | static unsigned int selfballoon_min_usable_mb; | |
106 | ||
d79d5959 JS |
107 | /* |
108 | * Amount of RAM in MB to add to the target number of pages. | |
109 | * Can be used to reserve some more room for caches and the like. | |
110 | */ | |
111 | static unsigned int selfballoon_reserved_mb; | |
112 | ||
a50777c7 DM |
113 | static void selfballoon_process(struct work_struct *work); |
114 | static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); | |
115 | ||
116 | #ifdef CONFIG_FRONTSWAP | |
117 | #include <linux/frontswap.h> | |
118 | ||
119 | /* Enable/disable with sysfs. */ | |
120 | static bool frontswap_selfshrinking __read_mostly; | |
121 | ||
a50777c7 DM |
122 | /* |
123 | * The default values for the following parameters were deemed reasonable | |
124 | * by experimentation, may be workload-dependent, and can all be | |
125 | * adjusted via sysfs. | |
126 | */ | |
127 | ||
128 | /* Control rate for frontswap shrinking. Higher hysteresis is slower. */ | |
129 | static unsigned int frontswap_hysteresis __read_mostly = 20; | |
130 | ||
131 | /* | |
132 | * Number of selfballoon worker invocations to wait before observing that | |
133 | * frontswap selfshrinking should commence. Note that selfshrinking does | |
134 | * not use a separate worker thread. | |
135 | */ | |
136 | static unsigned int frontswap_inertia __read_mostly = 3; | |
137 | ||
138 | /* Countdown to next invocation of frontswap_shrink() */ | |
139 | static unsigned long frontswap_inertia_counter; | |
140 | ||
141 | /* | |
142 | * Invoked by the selfballoon worker thread, uses current number of pages | |
143 | * in frontswap (frontswap_curr_pages()), previous status, and control | |
144 | * values (hysteresis and inertia) to determine if frontswap should be | |
145 | * shrunk and what the new frontswap size should be. Note that | |
146 | * frontswap_shrink is essentially a partial swapoff that immediately | |
147 | * transfers pages from the "swap device" (frontswap) back into kernel | |
148 | * RAM; despite the name, frontswap "shrinking" is very different from | |
149 | * the "shrinker" interface used by the kernel MM subsystem to reclaim | |
150 | * memory. | |
151 | */ | |
152 | static void frontswap_selfshrink(void) | |
153 | { | |
154 | static unsigned long cur_frontswap_pages; | |
03993730 GS |
155 | unsigned long last_frontswap_pages; |
156 | unsigned long tgt_frontswap_pages; | |
a50777c7 DM |
157 | |
158 | last_frontswap_pages = cur_frontswap_pages; | |
159 | cur_frontswap_pages = frontswap_curr_pages(); | |
160 | if (!cur_frontswap_pages || | |
161 | (cur_frontswap_pages > last_frontswap_pages)) { | |
162 | frontswap_inertia_counter = frontswap_inertia; | |
163 | return; | |
164 | } | |
165 | if (frontswap_inertia_counter && --frontswap_inertia_counter) | |
166 | return; | |
167 | if (cur_frontswap_pages <= frontswap_hysteresis) | |
168 | tgt_frontswap_pages = 0; | |
169 | else | |
170 | tgt_frontswap_pages = cur_frontswap_pages - | |
171 | (cur_frontswap_pages / frontswap_hysteresis); | |
172 | frontswap_shrink(tgt_frontswap_pages); | |
d4c7abdf | 173 | frontswap_inertia_counter = frontswap_inertia; |
a50777c7 DM |
174 | } |
175 | ||
a50777c7 DM |
176 | #endif /* CONFIG_FRONTSWAP */ |
177 | ||
38a1ed4f | 178 | #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) |
bc1b0df5 | 179 | #define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT)) |
38a1ed4f | 180 | |
a50777c7 DM |
181 | /* |
182 | * Use current balloon size, the goal (vm_committed_as), and hysteresis | |
183 | * parameters to set a new target balloon size | |
184 | */ | |
185 | static void selfballoon_process(struct work_struct *work) | |
186 | { | |
38a1ed4f DM |
187 | unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; |
188 | unsigned long useful_pages; | |
a50777c7 DM |
189 | bool reset_timer = false; |
190 | ||
191 | if (xen_selfballooning_enabled) { | |
38a1ed4f | 192 | cur_pages = totalram_pages; |
a50777c7 | 193 | tgt_pages = cur_pages; /* default is no change */ |
997071bc | 194 | goal_pages = vm_memory_committed() + |
d79d5959 JS |
195 | totalreserve_pages + |
196 | MB2PAGES(selfballoon_reserved_mb); | |
a50777c7 DM |
197 | #ifdef CONFIG_FRONTSWAP |
198 | /* allow space for frontswap pages to be repatriated */ | |
8ea1d2a1 | 199 | if (frontswap_selfshrinking) |
a50777c7 DM |
200 | goal_pages += frontswap_curr_pages(); |
201 | #endif | |
202 | if (cur_pages > goal_pages) | |
203 | tgt_pages = cur_pages - | |
204 | ((cur_pages - goal_pages) / | |
205 | selfballoon_downhysteresis); | |
206 | else if (cur_pages < goal_pages) | |
207 | tgt_pages = cur_pages + | |
208 | ((goal_pages - cur_pages) / | |
209 | selfballoon_uphysteresis); | |
210 | /* else if cur_pages == goal_pages, no change */ | |
38a1ed4f DM |
211 | useful_pages = max_pfn - totalreserve_pages; |
212 | if (selfballoon_min_usable_mb != 0) | |
213 | floor_pages = totalreserve_pages + | |
214 | MB2PAGES(selfballoon_min_usable_mb); | |
215 | /* piecewise linear function ending in ~3% slope */ | |
216 | else if (useful_pages < MB2PAGES(16)) | |
217 | floor_pages = max_pfn; /* not worth ballooning */ | |
218 | else if (useful_pages < MB2PAGES(64)) | |
219 | floor_pages = totalreserve_pages + MB2PAGES(16) + | |
220 | ((useful_pages - MB2PAGES(16)) >> 1); | |
221 | else if (useful_pages < MB2PAGES(512)) | |
222 | floor_pages = totalreserve_pages + MB2PAGES(40) + | |
223 | ((useful_pages - MB2PAGES(40)) >> 3); | |
224 | else /* useful_pages >= MB2PAGES(512) */ | |
225 | floor_pages = totalreserve_pages + MB2PAGES(99) + | |
226 | ((useful_pages - MB2PAGES(99)) >> 5); | |
227 | if (tgt_pages < floor_pages) | |
228 | tgt_pages = floor_pages; | |
229 | balloon_set_new_target(tgt_pages + | |
230 | balloon_stats.current_pages - totalram_pages); | |
a50777c7 DM |
231 | reset_timer = true; |
232 | } | |
233 | #ifdef CONFIG_FRONTSWAP | |
8ea1d2a1 | 234 | if (frontswap_selfshrinking) { |
a50777c7 DM |
235 | frontswap_selfshrink(); |
236 | reset_timer = true; | |
237 | } | |
238 | #endif | |
239 | if (reset_timer) | |
240 | schedule_delayed_work(&selfballoon_worker, | |
241 | selfballoon_interval * HZ); | |
242 | } | |
243 | ||
244 | #ifdef CONFIG_SYSFS | |
245 | ||
a50777c7 DM |
246 | #include <linux/capability.h> |
247 | ||
248 | #define SELFBALLOON_SHOW(name, format, args...) \ | |
07068021 KS |
249 | static ssize_t show_##name(struct device *dev, \ |
250 | struct device_attribute *attr, \ | |
251 | char *buf) \ | |
a50777c7 DM |
252 | { \ |
253 | return sprintf(buf, format, ##args); \ | |
254 | } | |
255 | ||
256 | SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); | |
257 | ||
07068021 KS |
258 | static ssize_t store_selfballooning(struct device *dev, |
259 | struct device_attribute *attr, | |
a50777c7 DM |
260 | const char *buf, |
261 | size_t count) | |
262 | { | |
263 | bool was_enabled = xen_selfballooning_enabled; | |
264 | unsigned long tmp; | |
265 | int err; | |
266 | ||
267 | if (!capable(CAP_SYS_ADMIN)) | |
268 | return -EPERM; | |
269 | ||
d3dbd93d JH |
270 | err = kstrtoul(buf, 10, &tmp); |
271 | if (err) | |
272 | return err; | |
273 | if ((tmp != 0) && (tmp != 1)) | |
a50777c7 DM |
274 | return -EINVAL; |
275 | ||
276 | xen_selfballooning_enabled = !!tmp; | |
277 | if (!was_enabled && xen_selfballooning_enabled) | |
278 | schedule_delayed_work(&selfballoon_worker, | |
279 | selfballoon_interval * HZ); | |
280 | ||
281 | return count; | |
282 | } | |
283 | ||
07068021 | 284 | static DEVICE_ATTR(selfballooning, S_IRUGO | S_IWUSR, |
a50777c7 DM |
285 | show_selfballooning, store_selfballooning); |
286 | ||
287 | SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); | |
288 | ||
07068021 KS |
289 | static ssize_t store_selfballoon_interval(struct device *dev, |
290 | struct device_attribute *attr, | |
a50777c7 DM |
291 | const char *buf, |
292 | size_t count) | |
293 | { | |
294 | unsigned long val; | |
295 | int err; | |
296 | ||
297 | if (!capable(CAP_SYS_ADMIN)) | |
298 | return -EPERM; | |
d3dbd93d JH |
299 | err = kstrtoul(buf, 10, &val); |
300 | if (err) | |
301 | return err; | |
302 | if (val == 0) | |
a50777c7 DM |
303 | return -EINVAL; |
304 | selfballoon_interval = val; | |
305 | return count; | |
306 | } | |
307 | ||
07068021 | 308 | static DEVICE_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, |
a50777c7 DM |
309 | show_selfballoon_interval, store_selfballoon_interval); |
310 | ||
311 | SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); | |
312 | ||
07068021 KS |
313 | static ssize_t store_selfballoon_downhys(struct device *dev, |
314 | struct device_attribute *attr, | |
a50777c7 DM |
315 | const char *buf, |
316 | size_t count) | |
317 | { | |
318 | unsigned long val; | |
319 | int err; | |
320 | ||
321 | if (!capable(CAP_SYS_ADMIN)) | |
322 | return -EPERM; | |
d3dbd93d JH |
323 | err = kstrtoul(buf, 10, &val); |
324 | if (err) | |
325 | return err; | |
326 | if (val == 0) | |
a50777c7 DM |
327 | return -EINVAL; |
328 | selfballoon_downhysteresis = val; | |
329 | return count; | |
330 | } | |
331 | ||
07068021 | 332 | static DEVICE_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, |
a50777c7 DM |
333 | show_selfballoon_downhys, store_selfballoon_downhys); |
334 | ||
335 | ||
336 | SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); | |
337 | ||
07068021 KS |
338 | static ssize_t store_selfballoon_uphys(struct device *dev, |
339 | struct device_attribute *attr, | |
a50777c7 DM |
340 | const char *buf, |
341 | size_t count) | |
342 | { | |
343 | unsigned long val; | |
344 | int err; | |
345 | ||
346 | if (!capable(CAP_SYS_ADMIN)) | |
347 | return -EPERM; | |
d3dbd93d JH |
348 | err = kstrtoul(buf, 10, &val); |
349 | if (err) | |
350 | return err; | |
351 | if (val == 0) | |
a50777c7 DM |
352 | return -EINVAL; |
353 | selfballoon_uphysteresis = val; | |
354 | return count; | |
355 | } | |
356 | ||
07068021 | 357 | static DEVICE_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, |
a50777c7 DM |
358 | show_selfballoon_uphys, store_selfballoon_uphys); |
359 | ||
38a1ed4f DM |
360 | SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", |
361 | selfballoon_min_usable_mb); | |
362 | ||
07068021 KS |
363 | static ssize_t store_selfballoon_min_usable_mb(struct device *dev, |
364 | struct device_attribute *attr, | |
38a1ed4f DM |
365 | const char *buf, |
366 | size_t count) | |
367 | { | |
368 | unsigned long val; | |
369 | int err; | |
370 | ||
371 | if (!capable(CAP_SYS_ADMIN)) | |
372 | return -EPERM; | |
d3dbd93d JH |
373 | err = kstrtoul(buf, 10, &val); |
374 | if (err) | |
375 | return err; | |
376 | if (val == 0) | |
38a1ed4f DM |
377 | return -EINVAL; |
378 | selfballoon_min_usable_mb = val; | |
379 | return count; | |
380 | } | |
381 | ||
07068021 | 382 | static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, |
38a1ed4f DM |
383 | show_selfballoon_min_usable_mb, |
384 | store_selfballoon_min_usable_mb); | |
385 | ||
d79d5959 JS |
386 | SELFBALLOON_SHOW(selfballoon_reserved_mb, "%d\n", |
387 | selfballoon_reserved_mb); | |
388 | ||
389 | static ssize_t store_selfballoon_reserved_mb(struct device *dev, | |
390 | struct device_attribute *attr, | |
391 | const char *buf, | |
392 | size_t count) | |
393 | { | |
394 | unsigned long val; | |
395 | int err; | |
396 | ||
397 | if (!capable(CAP_SYS_ADMIN)) | |
398 | return -EPERM; | |
d3dbd93d JH |
399 | err = kstrtoul(buf, 10, &val); |
400 | if (err) | |
401 | return err; | |
402 | if (val == 0) | |
d79d5959 JS |
403 | return -EINVAL; |
404 | selfballoon_reserved_mb = val; | |
405 | return count; | |
406 | } | |
407 | ||
408 | static DEVICE_ATTR(selfballoon_reserved_mb, S_IRUGO | S_IWUSR, | |
409 | show_selfballoon_reserved_mb, | |
410 | store_selfballoon_reserved_mb); | |
411 | ||
38a1ed4f | 412 | |
a50777c7 DM |
413 | #ifdef CONFIG_FRONTSWAP |
414 | SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); | |
415 | ||
07068021 KS |
416 | static ssize_t store_frontswap_selfshrinking(struct device *dev, |
417 | struct device_attribute *attr, | |
a50777c7 DM |
418 | const char *buf, |
419 | size_t count) | |
420 | { | |
421 | bool was_enabled = frontswap_selfshrinking; | |
422 | unsigned long tmp; | |
423 | int err; | |
424 | ||
425 | if (!capable(CAP_SYS_ADMIN)) | |
426 | return -EPERM; | |
d3dbd93d JH |
427 | err = kstrtoul(buf, 10, &tmp); |
428 | if (err) | |
429 | return err; | |
430 | if ((tmp != 0) && (tmp != 1)) | |
a50777c7 DM |
431 | return -EINVAL; |
432 | frontswap_selfshrinking = !!tmp; | |
433 | if (!was_enabled && !xen_selfballooning_enabled && | |
434 | frontswap_selfshrinking) | |
435 | schedule_delayed_work(&selfballoon_worker, | |
436 | selfballoon_interval * HZ); | |
437 | ||
438 | return count; | |
439 | } | |
440 | ||
07068021 | 441 | static DEVICE_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, |
a50777c7 DM |
442 | show_frontswap_selfshrinking, store_frontswap_selfshrinking); |
443 | ||
444 | SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); | |
445 | ||
07068021 KS |
446 | static ssize_t store_frontswap_inertia(struct device *dev, |
447 | struct device_attribute *attr, | |
a50777c7 DM |
448 | const char *buf, |
449 | size_t count) | |
450 | { | |
451 | unsigned long val; | |
452 | int err; | |
453 | ||
454 | if (!capable(CAP_SYS_ADMIN)) | |
455 | return -EPERM; | |
d3dbd93d JH |
456 | err = kstrtoul(buf, 10, &val); |
457 | if (err) | |
458 | return err; | |
459 | if (val == 0) | |
a50777c7 DM |
460 | return -EINVAL; |
461 | frontswap_inertia = val; | |
462 | frontswap_inertia_counter = val; | |
463 | return count; | |
464 | } | |
465 | ||
07068021 | 466 | static DEVICE_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, |
a50777c7 DM |
467 | show_frontswap_inertia, store_frontswap_inertia); |
468 | ||
469 | SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); | |
470 | ||
07068021 KS |
471 | static ssize_t store_frontswap_hysteresis(struct device *dev, |
472 | struct device_attribute *attr, | |
a50777c7 DM |
473 | const char *buf, |
474 | size_t count) | |
475 | { | |
476 | unsigned long val; | |
477 | int err; | |
478 | ||
479 | if (!capable(CAP_SYS_ADMIN)) | |
480 | return -EPERM; | |
d3dbd93d JH |
481 | err = kstrtoul(buf, 10, &val); |
482 | if (err) | |
483 | return err; | |
484 | if (val == 0) | |
a50777c7 DM |
485 | return -EINVAL; |
486 | frontswap_hysteresis = val; | |
487 | return count; | |
488 | } | |
489 | ||
07068021 | 490 | static DEVICE_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, |
a50777c7 DM |
491 | show_frontswap_hysteresis, store_frontswap_hysteresis); |
492 | ||
493 | #endif /* CONFIG_FRONTSWAP */ | |
494 | ||
495 | static struct attribute *selfballoon_attrs[] = { | |
07068021 KS |
496 | &dev_attr_selfballooning.attr, |
497 | &dev_attr_selfballoon_interval.attr, | |
498 | &dev_attr_selfballoon_downhysteresis.attr, | |
499 | &dev_attr_selfballoon_uphysteresis.attr, | |
500 | &dev_attr_selfballoon_min_usable_mb.attr, | |
d79d5959 | 501 | &dev_attr_selfballoon_reserved_mb.attr, |
a50777c7 | 502 | #ifdef CONFIG_FRONTSWAP |
07068021 KS |
503 | &dev_attr_frontswap_selfshrinking.attr, |
504 | &dev_attr_frontswap_hysteresis.attr, | |
505 | &dev_attr_frontswap_inertia.attr, | |
a50777c7 DM |
506 | #endif |
507 | NULL | |
508 | }; | |
509 | ||
ead1d014 | 510 | static const struct attribute_group selfballoon_group = { |
a50777c7 DM |
511 | .name = "selfballoon", |
512 | .attrs = selfballoon_attrs | |
513 | }; | |
514 | #endif | |
515 | ||
07068021 | 516 | int register_xen_selfballooning(struct device *dev) |
a50777c7 DM |
517 | { |
518 | int error = -1; | |
519 | ||
520 | #ifdef CONFIG_SYSFS | |
07068021 | 521 | error = sysfs_create_group(&dev->kobj, &selfballoon_group); |
a50777c7 DM |
522 | #endif |
523 | return error; | |
524 | } | |
525 | EXPORT_SYMBOL(register_xen_selfballooning); | |
526 | ||
10a7a077 | 527 | int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) |
a50777c7 DM |
528 | { |
529 | bool enable = false; | |
bc1b0df5 | 530 | unsigned long reserve_pages; |
a50777c7 DM |
531 | |
532 | if (!xen_domain()) | |
533 | return -ENODEV; | |
534 | ||
535 | if (xen_initial_domain()) { | |
283c0972 | 536 | pr_info("Xen selfballooning driver disabled for domain0\n"); |
a50777c7 DM |
537 | return -ENODEV; |
538 | } | |
539 | ||
540 | xen_selfballooning_enabled = tmem_enabled && use_selfballooning; | |
541 | if (xen_selfballooning_enabled) { | |
283c0972 | 542 | pr_info("Initializing Xen selfballooning driver\n"); |
a50777c7 DM |
543 | enable = true; |
544 | } | |
545 | #ifdef CONFIG_FRONTSWAP | |
546 | frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; | |
547 | if (frontswap_selfshrinking) { | |
283c0972 | 548 | pr_info("Initializing frontswap selfshrinking driver\n"); |
a50777c7 DM |
549 | enable = true; |
550 | } | |
551 | #endif | |
552 | if (!enable) | |
553 | return -ENODEV; | |
554 | ||
bc1b0df5 BL |
555 | /* |
556 | * Give selfballoon_reserved_mb a default value(10% of total ram pages) | |
557 | * to make selfballoon not so aggressive. | |
558 | * | |
559 | * There are mainly two reasons: | |
560 | * 1) The original goal_page didn't consider some pages used by kernel | |
561 | * space, like slab pages and memory used by device drivers. | |
562 | * | |
563 | * 2) The balloon driver may not give back memory to guest OS fast | |
564 | * enough when the workload suddenly aquries a lot of physical memory. | |
565 | * | |
566 | * In both cases, the guest OS will suffer from memory pressure and | |
567 | * OOM killer may be triggered. | |
568 | * By reserving extra 10% of total ram pages, we can keep the system | |
569 | * much more reliably and response faster in some cases. | |
570 | */ | |
571 | if (!selfballoon_reserved_mb) { | |
572 | reserve_pages = totalram_pages / 10; | |
573 | selfballoon_reserved_mb = PAGES2MB(reserve_pages); | |
574 | } | |
a50777c7 DM |
575 | schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); |
576 | ||
577 | return 0; | |
578 | } | |
10a7a077 | 579 | EXPORT_SYMBOL(xen_selfballoon_init); |