]> Git Repo - linux.git/blame - drivers/misc/vmw_balloon.c
vmw_balloon: change batch/single lock abstractions
[linux.git] / drivers / misc / vmw_balloon.c
CommitLineData
8b4770ec 1// SPDX-License-Identifier: GPL-2.0
453dc659
DT
2/*
3 * VMware Balloon driver.
4 *
8b4770ec 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
453dc659 6 *
453dc659
DT
7 * This is VMware physical memory management driver for Linux. The driver
8 * acts like a "balloon" that can be inflated to reclaim physical pages by
9 * reserving them in the guest and invalidating them in the monitor,
10 * freeing up the underlying machine pages so they can be allocated to
11 * other guests. The balloon can also be deflated to allow the guest to
12 * use more physical memory. Higher level policies can control the sizes
13 * of balloons in VMs in order to manage physical memory resources.
14 */
15
16//#define DEBUG
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
f220a80f 22#include <linux/vmalloc.h>
453dc659
DT
23#include <linux/sched.h>
24#include <linux/module.h>
25#include <linux/workqueue.h>
26#include <linux/debugfs.h>
27#include <linux/seq_file.h>
48e3d668
PM
28#include <linux/vmw_vmci_defs.h>
29#include <linux/vmw_vmci_api.h>
a10a5698 30#include <asm/hypervisor.h>
453dc659
DT
31
32MODULE_AUTHOR("VMware, Inc.");
33MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
48e3d668 34MODULE_VERSION("1.5.0.0-k");
453dc659
DT
35MODULE_ALIAS("dmi:*:svnVMware*:*");
36MODULE_ALIAS("vmware_vmmemctl");
37MODULE_LICENSE("GPL");
38
453dc659 39/*
622074a9
NA
40 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait
41 * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page
42 * allocation failure warnings. Disallow access to emergency low-memory pools.
453dc659 43 */
622074a9
NA
44#define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
45 __GFP_NOMEMALLOC)
453dc659
DT
46
47/*
622074a9
NA
48 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight
49 * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation
50 * failure warnings. Disallow access to emergency low-memory pools.
453dc659 51 */
622074a9
NA
52#define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
53 __GFP_NOMEMALLOC|__GFP_NORETRY)
453dc659 54
55adaa49
DT
55/* Maximum number of refused pages we accumulate during inflation cycle */
56#define VMW_BALLOON_MAX_REFUSED 16
453dc659
DT
57
58/*
59 * Hypervisor communication port definitions.
60 */
61#define VMW_BALLOON_HV_PORT 0x5670
62#define VMW_BALLOON_HV_MAGIC 0x456c6d6f
453dc659
DT
63#define VMW_BALLOON_GUEST_ID 1 /* Linux */
64
eb79100f
XD
65enum vmwballoon_capabilities {
66 /*
67 * Bit 0 is reserved and not associated to any capability.
68 */
48e3d668
PM
69 VMW_BALLOON_BASIC_CMDS = (1 << 1),
70 VMW_BALLOON_BATCHED_CMDS = (1 << 2),
71 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
72 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
eb79100f
XD
73};
74
f220a80f 75#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
365bd7ef 76 | VMW_BALLOON_BATCHED_CMDS \
48e3d668
PM
77 | VMW_BALLOON_BATCHED_2M_CMDS \
78 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
365bd7ef
PM
79
80#define VMW_BALLOON_2M_SHIFT (9)
81#define VMW_BALLOON_NUM_PAGE_SIZES (2)
eb79100f 82
f220a80f
XD
83/*
84 * Backdoor commands availability:
85 *
86 * START, GET_TARGET and GUEST_ID are always available,
87 *
88 * VMW_BALLOON_BASIC_CMDS:
89 * LOCK and UNLOCK commands,
90 * VMW_BALLOON_BATCHED_CMDS:
91 * BATCHED_LOCK and BATCHED_UNLOCK commands.
365bd7ef 92 * VMW BALLOON_BATCHED_2M_CMDS:
48e3d668
PM
93 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
94 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
95 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
f220a80f 96 */
365bd7ef
PM
97#define VMW_BALLOON_CMD_START 0
98#define VMW_BALLOON_CMD_GET_TARGET 1
99#define VMW_BALLOON_CMD_LOCK 2
100#define VMW_BALLOON_CMD_UNLOCK 3
101#define VMW_BALLOON_CMD_GUEST_ID 4
102#define VMW_BALLOON_CMD_BATCHED_LOCK 6
103#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
104#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
105#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
48e3d668 106#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
365bd7ef 107
68131184 108#define VMW_BALLOON_CMD_NUM 11
453dc659
DT
109
110/* error codes */
eb79100f
XD
111#define VMW_BALLOON_SUCCESS 0
112#define VMW_BALLOON_FAILURE -1
113#define VMW_BALLOON_ERROR_CMD_INVALID 1
114#define VMW_BALLOON_ERROR_PPN_INVALID 2
115#define VMW_BALLOON_ERROR_PPN_LOCKED 3
116#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
117#define VMW_BALLOON_ERROR_PPN_PINNED 5
118#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
119#define VMW_BALLOON_ERROR_RESET 7
120#define VMW_BALLOON_ERROR_BUSY 8
121
122#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
123
10a95d5d
NA
124#define VMW_BALLOON_CMD_WITH_TARGET_MASK \
125 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
126 (1UL << VMW_BALLOON_CMD_LOCK) | \
127 (1UL << VMW_BALLOON_CMD_UNLOCK) | \
128 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
129 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
130 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
131 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
132
68131184
NA
133static const char * const vmballoon_cmd_names[] = {
134 [VMW_BALLOON_CMD_START] = "start",
135 [VMW_BALLOON_CMD_GET_TARGET] = "target",
136 [VMW_BALLOON_CMD_LOCK] = "lock",
137 [VMW_BALLOON_CMD_UNLOCK] = "unlock",
138 [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
139 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
140 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
141 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
142 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
143 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
144};
145
453dc659
DT
146#ifdef CONFIG_DEBUG_FS
147struct vmballoon_stats {
148 unsigned int timer;
48e3d668 149 unsigned int doorbell;
453dc659 150
2ca02df6 151 /* allocation statistics */
365bd7ef
PM
152 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
153 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
365bd7ef
PM
154 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
155 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
156 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
453dc659 157
68131184
NA
158 /* Monitor operations. */
159 unsigned long ops[VMW_BALLOON_CMD_NUM];
160 unsigned long ops_fail[VMW_BALLOON_CMD_NUM];
453dc659
DT
161};
162
163#define STATS_INC(stat) (stat)++
164#else
165#define STATS_INC(stat)
166#endif
167
df8d0d42 168static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
f220a80f 169
365bd7ef 170struct vmballoon_page_size {
453dc659
DT
171 /* list of reserved physical pages */
172 struct list_head pages;
173
174 /* transient list of non-balloonable pages */
175 struct list_head refused_pages;
55adaa49 176 unsigned int n_refused_pages;
365bd7ef
PM
177};
178
6c948757
NA
179/**
180 * struct vmballoon_batch_entry - a batch entry for lock or unlock.
181 *
182 * @status: the status of the operation, which is written by the hypervisor.
183 * @reserved: reserved for future use. Must be set to zero.
184 * @pfn: the physical frame number of the page to be locked or unlocked.
185 */
186struct vmballoon_batch_entry {
187 u64 status : 5;
188 u64 reserved : PAGE_SHIFT - 5;
189 u64 pfn : 52;
190} __packed;
191
365bd7ef
PM
192struct vmballoon {
193 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
194
195 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
196 unsigned supported_page_sizes;
453dc659
DT
197
198 /* balloon size in pages */
199 unsigned int size;
200 unsigned int target;
201
202 /* reset flag */
203 bool reset_required;
204
f220a80f
XD
205 unsigned long capabilities;
206
6c948757
NA
207 /**
208 * @batch_page: pointer to communication batch page.
209 *
210 * When batching is used, batch_page points to a page, which holds up to
211 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
212 */
213 struct vmballoon_batch_entry *batch_page;
214
f220a80f
XD
215 unsigned int batch_max_pages;
216 struct page *page;
217
453dc659
DT
218#ifdef CONFIG_DEBUG_FS
219 /* statistics */
220 struct vmballoon_stats stats;
221
222 /* debugfs file exporting statistics */
223 struct dentry *dbg_entry;
224#endif
225
226 struct sysinfo sysinfo;
227
228 struct delayed_work dwork;
48e3d668
PM
229
230 struct vmci_handle vmci_doorbell;
453dc659
DT
231};
232
233static struct vmballoon balloon;
453dc659 234
10a95d5d
NA
235static inline unsigned long
236__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
237 unsigned long arg2, unsigned long *result)
238{
239 unsigned long status, dummy1, dummy2, dummy3, local_result;
240
68131184
NA
241 STATS_INC(b->stats.ops[cmd]);
242
10a95d5d
NA
243 asm volatile ("inl %%dx" :
244 "=a"(status),
245 "=c"(dummy1),
246 "=d"(dummy2),
247 "=b"(local_result),
248 "=S"(dummy3) :
249 "0"(VMW_BALLOON_HV_MAGIC),
250 "1"(cmd),
251 "2"(VMW_BALLOON_HV_PORT),
252 "3"(arg1),
253 "4"(arg2) :
254 "memory");
255
256 /* update the result if needed */
257 if (result)
258 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
259 local_result;
260
261 /* update target when applicable */
262 if (status == VMW_BALLOON_SUCCESS &&
263 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
264 b->target = local_result;
265
68131184
NA
266 if (status != VMW_BALLOON_SUCCESS &&
267 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
268 STATS_INC(b->stats.ops_fail[cmd]);
269 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
270 __func__, vmballoon_cmd_names[cmd], arg1, arg2,
271 status);
272 }
273
10a95d5d
NA
274 /* mark reset required accordingly */
275 if (status == VMW_BALLOON_ERROR_RESET)
276 b->reset_required = true;
277
278 return status;
279}
280
281static __always_inline unsigned long
282vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
283 unsigned long arg2)
284{
285 unsigned long dummy;
286
287 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
288}
289
453dc659
DT
290/*
291 * Send "start" command to the host, communicating supported version
292 * of the protocol.
293 */
f220a80f 294static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
453dc659 295{
10a95d5d 296 unsigned long status, capabilities;
365bd7ef 297 bool success;
453dc659 298
10a95d5d
NA
299 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
300 &capabilities);
f220a80f
XD
301
302 switch (status) {
303 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
304 b->capabilities = capabilities;
365bd7ef
PM
305 success = true;
306 break;
f220a80f
XD
307 case VMW_BALLOON_SUCCESS:
308 b->capabilities = VMW_BALLOON_BASIC_CMDS;
365bd7ef
PM
309 success = true;
310 break;
311 default:
312 success = false;
f220a80f 313 }
453dc659 314
5081efd1
NA
315 /*
316 * 2MB pages are only supported with batching. If batching is for some
317 * reason disabled, do not use 2MB pages, since otherwise the legacy
318 * mechanism is used with 2MB pages, causing a failure.
319 */
320 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
321 (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
365bd7ef
PM
322 b->supported_page_sizes = 2;
323 else
324 b->supported_page_sizes = 1;
325
365bd7ef 326 return success;
453dc659
DT
327}
328
453dc659
DT
329/*
330 * Communicate guest type to the host so that it can adjust ballooning
331 * algorithm to the one most appropriate for the guest. This command
332 * is normally issued after sending "start" command and is part of
333 * standard reset sequence.
334 */
335static bool vmballoon_send_guest_id(struct vmballoon *b)
336{
10a95d5d 337 unsigned long status;
453dc659 338
10a95d5d
NA
339 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
340 VMW_BALLOON_GUEST_ID, 0);
453dc659 341
10a95d5d 342 if (status == VMW_BALLOON_SUCCESS)
453dc659
DT
343 return true;
344
453dc659
DT
345 return false;
346}
347
365bd7ef
PM
348static u16 vmballoon_page_size(bool is_2m_page)
349{
350 if (is_2m_page)
351 return 1 << VMW_BALLOON_2M_SHIFT;
352
353 return 1;
354}
355
453dc659
DT
356/*
357 * Retrieve desired balloon size from the host.
358 */
10a95d5d 359static bool vmballoon_send_get_target(struct vmballoon *b)
453dc659
DT
360{
361 unsigned long status;
453dc659
DT
362 unsigned long limit;
363 u32 limit32;
364
365 /*
366 * si_meminfo() is cheap. Moreover, we want to provide dynamic
367 * max balloon size later. So let us call si_meminfo() every
368 * iteration.
369 */
370 si_meminfo(&b->sysinfo);
371 limit = b->sysinfo.totalram;
372
373 /* Ensure limit fits in 32-bits */
374 limit32 = (u32)limit;
375 if (limit != limit32)
376 return false;
377
10a95d5d
NA
378 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
379
380 if (status == VMW_BALLOON_SUCCESS)
453dc659 381 return true;
453dc659 382
453dc659
DT
383 return false;
384}
385
622074a9 386static struct page *vmballoon_alloc_page(bool is_2m_page)
365bd7ef
PM
387{
388 if (is_2m_page)
622074a9
NA
389 return alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS,
390 VMW_BALLOON_2M_SHIFT);
365bd7ef 391
622074a9 392 return alloc_page(VMW_PAGE_ALLOC_FLAGS);
365bd7ef
PM
393}
394
395static void vmballoon_free_page(struct page *page, bool is_2m_page)
396{
397 if (is_2m_page)
398 __free_pages(page, VMW_BALLOON_2M_SHIFT);
399 else
400 __free_page(page);
401}
402
453dc659
DT
403/*
404 * Quickly release all pages allocated for the balloon. This function is
405 * called when host decides to "reset" balloon for one reason or another.
406 * Unlike normal "deflate" we do not (shall not) notify host of the pages
407 * being released.
408 */
409static void vmballoon_pop(struct vmballoon *b)
410{
411 struct page *page, *next;
365bd7ef
PM
412 unsigned is_2m_pages;
413
414 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
415 is_2m_pages++) {
416 struct vmballoon_page_size *page_size =
417 &b->page_sizes[is_2m_pages];
418 u16 size_per_page = vmballoon_page_size(is_2m_pages);
419
420 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
421 list_del(&page->lru);
422 vmballoon_free_page(page, is_2m_pages);
423 STATS_INC(b->stats.free[is_2m_pages]);
424 b->size -= size_per_page;
425 cond_resched();
426 }
453dc659 427 }
453dc659 428
b23220fe
GK
429 /* Clearing the batch_page unconditionally has no adverse effect */
430 free_page((unsigned long)b->batch_page);
431 b->batch_page = NULL;
453dc659
DT
432}
433
df8d0d42
NA
434/**
435 * vmballoon_status_page - returns the status of (un)lock operation
436 *
437 * @b: pointer to the balloon.
438 * @idx: index for the page for which the operation is performed.
439 * @p: pointer to where the page struct is returned.
440 *
441 * Following a lock or unlock operation, returns the status of the operation for
442 * an individual page. Provides the page that the operation was performed on on
443 * the @page argument.
444 *
445 * Returns: The status of a lock or unlock operation for an individual page.
453dc659 446 */
df8d0d42
NA
447static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
448 struct page **p)
453dc659 449{
df8d0d42
NA
450 if (static_branch_likely(&vmw_balloon_batching)) {
451 /* batching mode */
452 *p = pfn_to_page(b->batch_page[idx].pfn);
453 return b->batch_page[idx].status;
454 }
10a95d5d 455
df8d0d42
NA
456 /* non-batching mode */
457 *p = b->page;
453dc659 458
df8d0d42
NA
459 /*
460 * If a failure occurs, the indication will be provided in the status
461 * of the entire operation, which is considered before the individual
462 * page status. So for non-batching mode, the indication is always of
463 * success.
464 */
465 return VMW_BALLOON_SUCCESS;
466}
453dc659 467
df8d0d42
NA
468/**
469 * vmballoon_lock_op - notifies the host about inflated/deflated pages.
470 * @b: pointer to the balloon.
471 * @num_pages: number of inflated/deflated pages.
472 * @is_2m_pages: whether the page(s) are 2M (or 4k).
473 * @lock: whether the operation is lock (or unlock).
474 *
475 * Notify the host about page(s) that were ballooned (or removed from the
476 * balloon) so that host can use it without fear that guest will need it (or
477 * stop using them since the VM does). Host may reject some pages, we need to
478 * check the return value and maybe submit a different page. The pages that are
479 * inflated/deflated are pointed by @b->page.
480 *
481 * Return: result as provided by the hypervisor.
482 */
483static unsigned long vmballoon_lock_op(struct vmballoon *b,
484 unsigned int num_pages,
485 bool is_2m_pages, bool lock)
486{
487 unsigned long cmd, pfn;
488
489 if (static_branch_likely(&vmw_balloon_batching)) {
490 if (lock)
491 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK :
492 VMW_BALLOON_CMD_BATCHED_LOCK;
493 else
494 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
495 VMW_BALLOON_CMD_BATCHED_UNLOCK;
496
497 pfn = PHYS_PFN(virt_to_phys(b->batch_page));
498 } else {
499 cmd = lock ? VMW_BALLOON_CMD_LOCK : VMW_BALLOON_CMD_UNLOCK;
500 pfn = page_to_pfn(b->page);
501
502 /* In non-batching mode, PFNs must fit in 32-bit */
503 if (unlikely(pfn != (u32)pfn))
504 return VMW_BALLOON_ERROR_PPN_INVALID;
ef0f8f11 505 }
453dc659 506
df8d0d42 507 return vmballoon_cmd(b, cmd, pfn, num_pages);
453dc659
DT
508}
509
df8d0d42
NA
510static int vmballoon_lock(struct vmballoon *b, unsigned int num_pages,
511 bool is_2m_pages)
f220a80f 512{
df8d0d42
NA
513 unsigned long batch_status;
514 int i;
365bd7ef 515 u16 size_per_page = vmballoon_page_size(is_2m_pages);
f220a80f 516
df8d0d42 517 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, true);
f220a80f
XD
518
519 for (i = 0; i < num_pages; i++) {
df8d0d42
NA
520 unsigned long status;
521 struct page *p;
365bd7ef
PM
522 struct vmballoon_page_size *page_size =
523 &b->page_sizes[is_2m_pages];
f220a80f 524
df8d0d42
NA
525 status = vmballoon_status_page(b, i, &p);
526
527 /*
528 * Failure of the whole batch overrides a single operation
529 * results.
530 */
531 if (batch_status != VMW_BALLOON_SUCCESS)
532 status = batch_status;
f220a80f 533
df8d0d42
NA
534 if (status == VMW_BALLOON_SUCCESS) {
535 /* track allocated page */
365bd7ef 536 list_add(&p->lru, &page_size->pages);
df8d0d42
NA
537
538 /* update balloon size */
365bd7ef 539 b->size += size_per_page;
df8d0d42
NA
540 continue;
541 }
542
543 /* Error occurred */
544 STATS_INC(b->stats.refused_alloc[is_2m_pages]);
545
546 switch (status) {
f220a80f
XD
547 case VMW_BALLOON_ERROR_PPN_PINNED:
548 case VMW_BALLOON_ERROR_PPN_INVALID:
df8d0d42
NA
549 /*
550 * Place page on the list of non-balloonable pages
551 * and retry allocation, unless we already accumulated
552 * too many of them, in which case take a breather.
553 */
365bd7ef
PM
554 if (page_size->n_refused_pages
555 < VMW_BALLOON_MAX_REFUSED) {
556 list_add(&p->lru, &page_size->refused_pages);
557 page_size->n_refused_pages++;
f220a80f
XD
558 break;
559 }
560 /* Fallthrough */
561 case VMW_BALLOON_ERROR_RESET:
562 case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
365bd7ef 563 vmballoon_free_page(p, is_2m_pages);
f220a80f
XD
564 break;
565 default:
566 /* This should never happen */
567 WARN_ON_ONCE(true);
568 }
569 }
570
df8d0d42 571 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
f220a80f
XD
572}
573
453dc659
DT
574/*
575 * Release the page allocated for the balloon. Note that we first notify
576 * the host so it can make sure the page will be available for the guest
577 * to use, if needed.
578 */
df8d0d42
NA
579static int vmballoon_unlock(struct vmballoon *b, unsigned int num_pages,
580 bool is_2m_pages)
453dc659 581{
df8d0d42
NA
582 int i;
583 unsigned long batch_status;
365bd7ef 584 u16 size_per_page = vmballoon_page_size(is_2m_pages);
f220a80f 585
df8d0d42 586 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, false);
f220a80f
XD
587
588 for (i = 0; i < num_pages; i++) {
df8d0d42
NA
589 struct vmballoon_page_size *page_size;
590 unsigned long status;
591 struct page *p;
592
593 status = vmballoon_status_page(b, i, &p);
594 page_size = &b->page_sizes[is_2m_pages];
f220a80f 595
df8d0d42
NA
596 /*
597 * Failure of the whole batch overrides a single operation
598 * results.
599 */
600 if (batch_status != VMW_BALLOON_SUCCESS)
601 status = batch_status;
602
603 if (status != VMW_BALLOON_SUCCESS) {
f220a80f
XD
604 /*
605 * That page wasn't successfully unlocked by the
606 * hypervisor, re-add it to the list of pages owned by
607 * the balloon driver.
608 */
365bd7ef 609 list_add(&p->lru, &page_size->pages);
f220a80f
XD
610 } else {
611 /* deallocate page */
365bd7ef
PM
612 vmballoon_free_page(p, is_2m_pages);
613 STATS_INC(b->stats.free[is_2m_pages]);
f220a80f
XD
614
615 /* update balloon size */
365bd7ef 616 b->size -= size_per_page;
f220a80f
XD
617 }
618 }
619
df8d0d42 620 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
f220a80f
XD
621}
622
453dc659
DT
623/*
624 * Release pages that were allocated while attempting to inflate the
625 * balloon but were refused by the host for one reason or another.
626 */
365bd7ef
PM
627static void vmballoon_release_refused_pages(struct vmballoon *b,
628 bool is_2m_pages)
453dc659
DT
629{
630 struct page *page, *next;
365bd7ef
PM
631 struct vmballoon_page_size *page_size =
632 &b->page_sizes[is_2m_pages];
453dc659 633
365bd7ef 634 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
453dc659 635 list_del(&page->lru);
365bd7ef
PM
636 vmballoon_free_page(page, is_2m_pages);
637 STATS_INC(b->stats.refused_free[is_2m_pages]);
453dc659 638 }
55adaa49 639
365bd7ef 640 page_size->n_refused_pages = 0;
453dc659
DT
641}
642
f220a80f
XD
643static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
644{
df8d0d42
NA
645 if (static_branch_likely(&vmw_balloon_batching))
646 b->batch_page[idx] = (struct vmballoon_batch_entry)
6c948757 647 { .pfn = page_to_pfn(p) };
df8d0d42
NA
648 else
649 b->page = p;
f220a80f
XD
650}
651
453dc659
DT
652/*
653 * Inflate the balloon towards its target size. Note that we try to limit
654 * the rate of allocation to make sure we are not choking the rest of the
655 * system.
656 */
657static void vmballoon_inflate(struct vmballoon *b)
658{
f220a80f 659 unsigned int num_pages = 0;
453dc659 660 int error = 0;
365bd7ef 661 bool is_2m_pages;
453dc659
DT
662
663 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
664
665 /*
666 * First try NOSLEEP page allocations to inflate balloon.
667 *
668 * If we do not throttle nosleep allocations, we can drain all
669 * free pages in the guest quickly (if the balloon target is high).
670 * As a side-effect, draining free pages helps to inform (force)
671 * the guest to start swapping if balloon target is not met yet,
672 * which is a desired behavior. However, balloon driver can consume
673 * all available CPU cycles if too many pages are allocated in a
674 * second. Therefore, we throttle nosleep allocations even when
675 * the guest is not under memory pressure. OTOH, if we have already
676 * predicted that the guest is under memory pressure, then we
677 * slowdown page allocations considerably.
678 */
679
453dc659
DT
680 /*
681 * Start with no sleep allocation rate which may be higher
682 * than sleeping allocation rate.
683 */
ec992cc7 684 is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
453dc659 685
ec992cc7 686 pr_debug("%s - goal: %d", __func__, b->target - b->size);
453dc659 687
33d268ed 688 while (!b->reset_required &&
365bd7ef
PM
689 b->size + num_pages * vmballoon_page_size(is_2m_pages)
690 < b->target) {
4670de4d 691 struct page *page;
453dc659 692
622074a9
NA
693 STATS_INC(b->stats.alloc[is_2m_pages]);
694 page = vmballoon_alloc_page(is_2m_pages);
ef0f8f11 695 if (!page) {
365bd7ef 696 STATS_INC(b->stats.alloc_fail[is_2m_pages]);
365bd7ef 697 if (is_2m_pages) {
df8d0d42 698 vmballoon_lock(b, num_pages, true);
365bd7ef
PM
699
700 /*
701 * ignore errors from locking as we now switch
702 * to 4k pages and we might get different
703 * errors.
704 */
705
706 num_pages = 0;
707 is_2m_pages = false;
708 continue;
709 }
622074a9 710 break;
453dc659
DT
711 }
712
df8d0d42 713 vmballoon_add_page(b, num_pages++, page);
f220a80f 714 if (num_pages == b->batch_max_pages) {
df8d0d42 715 error = vmballoon_lock(b, num_pages, is_2m_pages);
10a95d5d 716
f220a80f
XD
717 num_pages = 0;
718 if (error)
719 break;
720 }
ef0f8f11 721
33d268ed 722 cond_resched();
453dc659
DT
723 }
724
f220a80f 725 if (num_pages > 0)
df8d0d42 726 vmballoon_lock(b, num_pages, is_2m_pages);
f220a80f 727
365bd7ef
PM
728 vmballoon_release_refused_pages(b, true);
729 vmballoon_release_refused_pages(b, false);
453dc659
DT
730}
731
732/*
733 * Decrease the size of the balloon allowing guest to use more memory.
734 */
735static void vmballoon_deflate(struct vmballoon *b)
736{
365bd7ef 737 unsigned is_2m_pages;
453dc659 738
33d268ed 739 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
453dc659
DT
740
741 /* free pages to reach target */
365bd7ef
PM
742 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
743 is_2m_pages++) {
744 struct page *page, *next;
745 unsigned int num_pages = 0;
746 struct vmballoon_page_size *page_size =
747 &b->page_sizes[is_2m_pages];
748
749 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
750 if (b->reset_required ||
751 (b->target > 0 &&
752 b->size - num_pages
753 * vmballoon_page_size(is_2m_pages)
754 < b->target + vmballoon_page_size(true)))
755 break;
f220a80f 756
365bd7ef 757 list_del(&page->lru);
df8d0d42 758 vmballoon_add_page(b, num_pages++, page);
33d268ed 759
365bd7ef
PM
760 if (num_pages == b->batch_max_pages) {
761 int error;
453dc659 762
df8d0d42 763 error = vmballoon_unlock(b, num_pages,
10a95d5d 764 is_2m_pages);
365bd7ef
PM
765 num_pages = 0;
766 if (error)
767 return;
768 }
33d268ed 769
365bd7ef
PM
770 cond_resched();
771 }
453dc659 772
365bd7ef 773 if (num_pages > 0)
df8d0d42 774 vmballoon_unlock(b, num_pages, is_2m_pages);
365bd7ef 775 }
f220a80f
XD
776}
777
df8d0d42
NA
778/**
779 * vmballoon_deinit_batching - disables batching mode.
780 *
781 * @b: pointer to &struct vmballoon.
782 *
783 * Disables batching, by deallocating the page for communication with the
784 * hypervisor and disabling the static key to indicate that batching is off.
785 */
786static void vmballoon_deinit_batching(struct vmballoon *b)
787{
788 free_page((unsigned long)b->batch_page);
789 b->batch_page = NULL;
790 static_branch_disable(&vmw_balloon_batching);
791 b->batch_max_pages = 1;
792}
f220a80f 793
df8d0d42
NA
794/**
795 * vmballoon_init_batching - enable batching mode.
796 *
797 * @b: pointer to &struct vmballoon.
798 *
799 * Enables batching, by allocating a page for communication with the hypervisor
800 * and enabling the static_key to use batching.
801 *
802 * Return: zero on success or an appropriate error-code.
803 */
804static int vmballoon_init_batching(struct vmballoon *b)
f220a80f 805{
b23220fe 806 struct page *page;
f220a80f 807
b23220fe
GK
808 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
809 if (!page)
df8d0d42 810 return -ENOMEM;
f220a80f 811
b23220fe 812 b->batch_page = page_address(page);
df8d0d42
NA
813 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
814
815 static_branch_enable(&vmw_balloon_batching);
816
817 return 0;
f220a80f
XD
818}
819
48e3d668
PM
820/*
821 * Receive notification and resize balloon
822 */
823static void vmballoon_doorbell(void *client_data)
824{
825 struct vmballoon *b = client_data;
826
827 STATS_INC(b->stats.doorbell);
828
829 mod_delayed_work(system_freezable_wq, &b->dwork, 0);
830}
831
832/*
833 * Clean up vmci doorbell
834 */
835static void vmballoon_vmci_cleanup(struct vmballoon *b)
836{
10a95d5d
NA
837 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
838 VMCI_INVALID_ID, VMCI_INVALID_ID);
48e3d668 839
48e3d668
PM
840 if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
841 vmci_doorbell_destroy(b->vmci_doorbell);
842 b->vmci_doorbell = VMCI_INVALID_HANDLE;
843 }
844}
845
846/*
847 * Initialize vmci doorbell, to get notified as soon as balloon changes
848 */
849static int vmballoon_vmci_init(struct vmballoon *b)
850{
10a95d5d 851 unsigned long error;
48e3d668 852
ce664331
NA
853 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
854 return 0;
48e3d668 855
ce664331
NA
856 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
857 VMCI_PRIVILEGE_FLAG_RESTRICTED,
858 vmballoon_doorbell, b);
48e3d668 859
ce664331
NA
860 if (error != VMCI_SUCCESS)
861 goto fail;
862
10a95d5d
NA
863 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
864 b->vmci_doorbell.context,
865 b->vmci_doorbell.resource, NULL);
ce664331 866
ce664331
NA
867 if (error != VMW_BALLOON_SUCCESS)
868 goto fail;
48e3d668
PM
869
870 return 0;
ce664331
NA
871fail:
872 vmballoon_vmci_cleanup(b);
873 return -EIO;
48e3d668
PM
874}
875
f220a80f
XD
876/*
877 * Perform standard reset sequence by popping the balloon (in case it
878 * is not empty) and then restarting protocol. This operation normally
879 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
880 */
881static void vmballoon_reset(struct vmballoon *b)
882{
48e3d668
PM
883 int error;
884
885 vmballoon_vmci_cleanup(b);
886
f220a80f
XD
887 /* free all pages, skipping monitor unlock */
888 vmballoon_pop(b);
889
890 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
891 return;
892
893 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
df8d0d42 894 if (vmballoon_init_batching(b)) {
f220a80f
XD
895 /*
896 * We failed to initialize batching, inform the monitor
897 * about it by sending a null capability.
898 *
899 * The guest will retry in one second.
900 */
901 vmballoon_send_start(b, 0);
902 return;
903 }
904 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
df8d0d42 905 vmballoon_deinit_batching(b);
f220a80f
XD
906 }
907
908 b->reset_required = false;
48e3d668
PM
909
910 error = vmballoon_vmci_init(b);
911 if (error)
912 pr_err("failed to initialize vmci doorbell\n");
913
f220a80f
XD
914 if (!vmballoon_send_guest_id(b))
915 pr_err("failed to send guest ID to the host\n");
453dc659
DT
916}
917
918/*
919 * Balloon work function: reset protocol, if needed, get the new size and
920 * adjust balloon as needed. Repeat in 1 sec.
921 */
922static void vmballoon_work(struct work_struct *work)
923{
924 struct delayed_work *dwork = to_delayed_work(work);
925 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
453dc659
DT
926
927 STATS_INC(b->stats.timer);
928
929 if (b->reset_required)
930 vmballoon_reset(b);
931
10a95d5d
NA
932 if (!b->reset_required && vmballoon_send_get_target(b)) {
933 unsigned long target = b->target;
453dc659 934
10a95d5d 935 /* update target, adjust size */
453dc659
DT
936 if (b->size < target)
937 vmballoon_inflate(b);
365bd7ef
PM
938 else if (target == 0 ||
939 b->size > target + vmballoon_page_size(true))
453dc659
DT
940 vmballoon_deflate(b);
941 }
942
beda94da
DT
943 /*
944 * We are using a freezable workqueue so that balloon operations are
945 * stopped while the system transitions to/from sleep/hibernation.
946 */
947 queue_delayed_work(system_freezable_wq,
948 dwork, round_jiffies_relative(HZ));
453dc659
DT
949}
950
951/*
952 * DEBUGFS Interface
953 */
954#ifdef CONFIG_DEBUG_FS
955
956static int vmballoon_debug_show(struct seq_file *f, void *offset)
957{
958 struct vmballoon *b = f->private;
959 struct vmballoon_stats *stats = &b->stats;
68131184 960 int i;
453dc659 961
b36e89da
PM
962 /* format capabilities info */
963 seq_printf(f,
964 "balloon capabilities: %#4x\n"
d7568c13
PM
965 "used capabilities: %#4lx\n"
966 "is resetting: %c\n",
967 VMW_BALLOON_CAPABILITIES, b->capabilities,
968 b->reset_required ? 'y' : 'n');
b36e89da 969
453dc659
DT
970 /* format size info */
971 seq_printf(f,
972 "target: %8d pages\n"
973 "current: %8d pages\n",
974 b->target, b->size);
975
68131184
NA
976 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
977 if (vmballoon_cmd_names[i] == NULL)
978 continue;
979
980 seq_printf(f, "%-22s: %16lu (%lu failed)\n",
981 vmballoon_cmd_names[i], stats->ops[i],
982 stats->ops_fail[i]);
983 }
984
453dc659
DT
985 seq_printf(f,
986 "\n"
987 "timer: %8u\n"
48e3d668 988 "doorbell: %8u\n"
365bd7ef 989 "prim2mAlloc: %8u (%4u failed)\n"
622074a9 990 "prim4kAlloc: %8u (%4u failed)\n"
365bd7ef 991 "prim2mFree: %8u\n"
453dc659 992 "primFree: %8u\n"
365bd7ef 993 "err2mAlloc: %8u\n"
453dc659 994 "errAlloc: %8u\n"
365bd7ef 995 "err2mFree: %8u\n"
68131184 996 "errFree: %8u\n",
453dc659 997 stats->timer,
48e3d668 998 stats->doorbell,
365bd7ef
PM
999 stats->alloc[true], stats->alloc_fail[true],
1000 stats->alloc[false], stats->alloc_fail[false],
365bd7ef
PM
1001 stats->free[true],
1002 stats->free[false],
1003 stats->refused_alloc[true], stats->refused_alloc[false],
68131184 1004 stats->refused_free[true], stats->refused_free[false]);
453dc659
DT
1005
1006 return 0;
1007}
1008
1009static int vmballoon_debug_open(struct inode *inode, struct file *file)
1010{
1011 return single_open(file, vmballoon_debug_show, inode->i_private);
1012}
1013
1014static const struct file_operations vmballoon_debug_fops = {
1015 .owner = THIS_MODULE,
1016 .open = vmballoon_debug_open,
1017 .read = seq_read,
1018 .llseek = seq_lseek,
1019 .release = single_release,
1020};
1021
1022static int __init vmballoon_debugfs_init(struct vmballoon *b)
1023{
1024 int error;
1025
1026 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1027 &vmballoon_debug_fops);
1028 if (IS_ERR(b->dbg_entry)) {
1029 error = PTR_ERR(b->dbg_entry);
1030 pr_err("failed to create debugfs entry, error: %d\n", error);
1031 return error;
1032 }
1033
1034 return 0;
1035}
1036
1037static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1038{
1039 debugfs_remove(b->dbg_entry);
1040}
1041
1042#else
1043
1044static inline int vmballoon_debugfs_init(struct vmballoon *b)
1045{
1046 return 0;
1047}
1048
1049static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1050{
1051}
1052
1053#endif /* CONFIG_DEBUG_FS */
1054
1055static int __init vmballoon_init(void)
1056{
1057 int error;
365bd7ef 1058 unsigned is_2m_pages;
453dc659
DT
1059 /*
1060 * Check if we are running on VMware's hypervisor and bail out
1061 * if we are not.
1062 */
03b2a320 1063 if (x86_hyper_type != X86_HYPER_VMWARE)
453dc659
DT
1064 return -ENODEV;
1065
365bd7ef
PM
1066 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1067 is_2m_pages++) {
1068 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1069 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1070 }
453dc659 1071
453dc659
DT
1072 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1073
453dc659
DT
1074 error = vmballoon_debugfs_init(&balloon);
1075 if (error)
beda94da 1076 return error;
453dc659 1077
48e3d668 1078 balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
d7568c13
PM
1079 balloon.batch_page = NULL;
1080 balloon.page = NULL;
1081 balloon.reset_required = true;
1082
beda94da 1083 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
453dc659
DT
1084
1085 return 0;
453dc659 1086}
c3cc1b0f
NA
1087
1088/*
1089 * Using late_initcall() instead of module_init() allows the balloon to use the
1090 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
1091 * VMCI is probed only after the balloon is initialized. If the balloon is used
1092 * as a module, late_initcall() is equivalent to module_init().
1093 */
1094late_initcall(vmballoon_init);
453dc659
DT
1095
1096static void __exit vmballoon_exit(void)
1097{
48e3d668 1098 vmballoon_vmci_cleanup(&balloon);
453dc659 1099 cancel_delayed_work_sync(&balloon.dwork);
453dc659
DT
1100
1101 vmballoon_debugfs_exit(&balloon);
1102
1103 /*
1104 * Deallocate all reserved memory, and reset connection with monitor.
1105 * Reset connection before deallocating memory to avoid potential for
1106 * additional spurious resets from guest touching deallocated pages.
1107 */
d7568c13 1108 vmballoon_send_start(&balloon, 0);
453dc659
DT
1109 vmballoon_pop(&balloon);
1110}
1111module_exit(vmballoon_exit);
This page took 0.720098 seconds and 4 git commands to generate.