vmw_balloon.c - drivers/misc/vmw_balloon.c - Linux diff v3.1

  1/*
  2 * VMware Balloon driver.
  3 *
  4 * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved.
  5 *
  6 * This program is free software; you can redistribute it and/or modify it
  7 * under the terms of the GNU General Public License as published by the
  8 * Free Software Foundation; version 2 of the License and no later version.
  9 *
 10 * This program is distributed in the hope that it will be useful, but
 11 * WITHOUT ANY WARRANTY; without even the implied warranty of
 12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 13 * NON INFRINGEMENT.  See the GNU General Public License for more
 14 * details.
 15 *
 16 * You should have received a copy of the GNU General Public License
 17 * along with this program; if not, write to the Free Software
 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19 *
 20 * Maintained by: Dmitry Torokhov <dtor@vmware.com>
 
 21 */
 22
 23/*
 24 * This is VMware physical memory management driver for Linux. The driver
 25 * acts like a "balloon" that can be inflated to reclaim physical pages by
 26 * reserving them in the guest and invalidating them in the monitor,
 27 * freeing up the underlying machine pages so they can be allocated to
 28 * other guests.  The balloon can also be deflated to allow the guest to
 29 * use more physical memory. Higher level policies can control the sizes
 30 * of balloons in VMs in order to manage physical memory resources.
 31 */
 32
 33//#define DEBUG
 34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 35
 36#include <linux/types.h>
 37#include <linux/kernel.h>
 38#include <linux/mm.h>
 
 39#include <linux/sched.h>
 40#include <linux/module.h>
 41#include <linux/workqueue.h>
 42#include <linux/debugfs.h>
 43#include <linux/seq_file.h>
 
 
 44#include <asm/hypervisor.h>
 45
 46MODULE_AUTHOR("VMware, Inc.");
 47MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
 48MODULE_VERSION("1.2.1.3-k");
 49MODULE_ALIAS("dmi:*:svnVMware*:*");
 50MODULE_ALIAS("vmware_vmmemctl");
 51MODULE_LICENSE("GPL");
 52
 53/*
 54 * Various constants controlling rate of inflaint/deflating balloon,
 55 * measured in pages.
 56 */
 57
 58/*
 59 * Rate of allocating memory when there is no memory pressure
 60 * (driver performs non-sleeping allocations).
 61 */
 62#define VMW_BALLOON_NOSLEEP_ALLOC_MAX	16384U
 63
 64/*
 65 * Rates of memory allocaton when guest experiences memory pressure
 66 * (driver performs sleeping allocations).
 67 */
 68#define VMW_BALLOON_RATE_ALLOC_MIN	512U
 69#define VMW_BALLOON_RATE_ALLOC_MAX	2048U
 70#define VMW_BALLOON_RATE_ALLOC_INC	16U
 71
 72/*
 73 * Rates for releasing pages while deflating balloon.
 74 */
 75#define VMW_BALLOON_RATE_FREE_MIN	512U
 76#define VMW_BALLOON_RATE_FREE_MAX	16384U
 77#define VMW_BALLOON_RATE_FREE_INC	16U
 78
 79/*
 80 * When guest is under memory pressure, use a reduced page allocation
 81 * rate for next several cycles.
 82 */
 83#define VMW_BALLOON_SLOW_CYCLES		4
 84
 85/*
 86 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
 87 * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
 88 * __GFP_NOWARN, to suppress page allocation failure warnings.
 89 */
 90#define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)
 91
 92/*
 93 * Use GFP_HIGHUSER when executing in a separate kernel thread
 94 * context and allocation can sleep.  This is less stressful to
 95 * the guest memory system, since it allows the thread to block
 96 * while memory is reclaimed, and won't take pages from emergency
 97 * low-memory pools.
 98 */
 99#define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)
100
101/* Maximum number of page allocations without yielding processor */
102#define VMW_BALLOON_YIELD_THRESHOLD	1024
103
104/* Maximum number of refused pages we accumulate during inflation cycle */
105#define VMW_BALLOON_MAX_REFUSED		16
106
107/*
108 * Hypervisor communication port definitions.
109 */
110#define VMW_BALLOON_HV_PORT		0x5670
111#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
112#define VMW_BALLOON_PROTOCOL_VERSION	2
113#define VMW_BALLOON_GUEST_ID		1	/* Linux */
114
115#define VMW_BALLOON_CMD_START		0
116#define VMW_BALLOON_CMD_GET_TARGET	1
117#define VMW_BALLOON_CMD_LOCK		2
118#define VMW_BALLOON_CMD_UNLOCK		3
119#define VMW_BALLOON_CMD_GUEST_ID	4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
121/* error codes */
122#define VMW_BALLOON_SUCCESS		0
123#define VMW_BALLOON_FAILURE		-1
124#define VMW_BALLOON_ERROR_CMD_INVALID	1
125#define VMW_BALLOON_ERROR_PPN_INVALID	2
126#define VMW_BALLOON_ERROR_PPN_LOCKED	3
127#define VMW_BALLOON_ERROR_PPN_UNLOCKED	4
128#define VMW_BALLOON_ERROR_PPN_PINNED	5
129#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	6
130#define VMW_BALLOON_ERROR_RESET		7
131#define VMW_BALLOON_ERROR_BUSY		8
132
133#define VMWARE_BALLOON_CMD(cmd, data, result)		\
134({							\
135	unsigned long __stat, __dummy1, __dummy2;	\
136	__asm__ __volatile__ ("inl (%%dx)" :		\
137		"=a"(__stat),				\
138		"=c"(__dummy1),				\
139		"=d"(__dummy2),				\
140		"=b"(result) :				\
141		"0"(VMW_BALLOON_HV_MAGIC),		\
142		"1"(VMW_BALLOON_CMD_##cmd),		\
143		"2"(VMW_BALLOON_HV_PORT),		\
144		"3"(data) :				\
145		"memory");				\
146	result &= -1UL;					\
147	__stat & -1UL;					\
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148})
149
150#ifdef CONFIG_DEBUG_FS
151struct vmballoon_stats {
152	unsigned int timer;
 
153
154	/* allocation statustics */
155	unsigned int alloc;
156	unsigned int alloc_fail;
157	unsigned int sleep_alloc;
158	unsigned int sleep_alloc_fail;
159	unsigned int refused_alloc;
160	unsigned int refused_free;
161	unsigned int free;
162
163	/* monitor operations */
164	unsigned int lock;
165	unsigned int lock_fail;
166	unsigned int unlock;
167	unsigned int unlock_fail;
168	unsigned int target;
169	unsigned int target_fail;
170	unsigned int start;
171	unsigned int start_fail;
172	unsigned int guest_type;
173	unsigned int guest_type_fail;
 
 
174};
175
176#define STATS_INC(stat) (stat)++
177#else
178#define STATS_INC(stat)
179#endif
180
181struct vmballoon {
182
 
 
 
 
 
 
 
 
 
183	/* list of reserved physical pages */
184	struct list_head pages;
185
186	/* transient list of non-balloonable pages */
187	struct list_head refused_pages;
188	unsigned int n_refused_pages;
 
 
 
 
 
 
 
189
190	/* balloon size in pages */
191	unsigned int size;
192	unsigned int target;
193
194	/* reset flag */
195	bool reset_required;
196
197	/* adjustment rates (pages per second) */
198	unsigned int rate_alloc;
199	unsigned int rate_free;
200
201	/* slowdown page allocations for next few cycles */
202	unsigned int slow_allocation_cycles;
203
 
 
 
 
 
 
 
 
204#ifdef CONFIG_DEBUG_FS
205	/* statistics */
206	struct vmballoon_stats stats;
207
208	/* debugfs file exporting statistics */
209	struct dentry *dbg_entry;
210#endif
211
212	struct sysinfo sysinfo;
213
214	struct delayed_work dwork;
 
 
215};
216
217static struct vmballoon balloon;
218
219/*
220 * Send "start" command to the host, communicating supported version
221 * of the protocol.
222 */
223static bool vmballoon_send_start(struct vmballoon *b)
224{
225	unsigned long status, dummy;
 
226
227	STATS_INC(b->stats.start);
228
229	status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_PROTOCOL_VERSION, dummy);
230	if (status == VMW_BALLOON_SUCCESS)
231		return true;
232
233	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
234	STATS_INC(b->stats.start_fail);
235	return false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236}
237
238static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
239{
240	switch (status) {
241	case VMW_BALLOON_SUCCESS:
242		return true;
243
244	case VMW_BALLOON_ERROR_RESET:
245		b->reset_required = true;
246		/* fall through */
247
248	default:
249		return false;
250	}
251}
252
253/*
254 * Communicate guest type to the host so that it can adjust ballooning
255 * algorithm to the one most appropriate for the guest. This command
256 * is normally issued after sending "start" command and is part of
257 * standard reset sequence.
258 */
259static bool vmballoon_send_guest_id(struct vmballoon *b)
260{
261	unsigned long status, dummy;
262
263	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy);
 
264
265	STATS_INC(b->stats.guest_type);
266
267	if (vmballoon_check_status(b, status))
268		return true;
269
270	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
271	STATS_INC(b->stats.guest_type_fail);
272	return false;
273}
274
 
 
 
 
 
 
 
 
275/*
276 * Retrieve desired balloon size from the host.
277 */
278static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
279{
280	unsigned long status;
281	unsigned long target;
282	unsigned long limit;
 
283	u32 limit32;
284
285	/*
286	 * si_meminfo() is cheap. Moreover, we want to provide dynamic
287	 * max balloon size later. So let us call si_meminfo() every
288	 * iteration.
289	 */
290	si_meminfo(&b->sysinfo);
291	limit = b->sysinfo.totalram;
292
293	/* Ensure limit fits in 32-bits */
294	limit32 = (u32)limit;
295	if (limit != limit32)
296		return false;
297
298	/* update stats */
299	STATS_INC(b->stats.target);
300
301	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target);
302	if (vmballoon_check_status(b, status)) {
303		*new_target = target;
304		return true;
305	}
306
307	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
308	STATS_INC(b->stats.target_fail);
309	return false;
310}
311
312/*
313 * Notify the host about allocated page so that host can use it without
314 * fear that guest will need it. Host may reject some pages, we need to
315 * check the return value and maybe submit a different page.
316 */
317static bool vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
318				     unsigned int *hv_status)
319{
320	unsigned long status, dummy;
321	u32 pfn32;
322
323	pfn32 = (u32)pfn;
324	if (pfn32 != pfn)
325		return false;
326
327	STATS_INC(b->stats.lock);
328
329	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy);
330	if (vmballoon_check_status(b, status))
331		return true;
332
333	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
334	STATS_INC(b->stats.lock_fail);
335	return false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336}
337
338/*
339 * Notify the host that guest intends to release given page back into
340 * the pool of available (to the guest) pages.
341 */
342static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
 
343{
344	unsigned long status, dummy;
345	u32 pfn32;
346
347	pfn32 = (u32)pfn;
348	if (pfn32 != pfn)
349		return false;
350
351	STATS_INC(b->stats.unlock);
352
353	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy);
354	if (vmballoon_check_status(b, status))
355		return true;
356
357	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
358	STATS_INC(b->stats.unlock_fail);
359	return false;
360}
361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362/*
363 * Quickly release all pages allocated for the balloon. This function is
364 * called when host decides to "reset" balloon for one reason or another.
365 * Unlike normal "deflate" we do not (shall not) notify host of the pages
366 * being released.
367 */
368static void vmballoon_pop(struct vmballoon *b)
369{
370	struct page *page, *next;
371	unsigned int count = 0;
372
373	list_for_each_entry_safe(page, next, &b->pages, lru) {
374		list_del(&page->lru);
375		__free_page(page);
376		STATS_INC(b->stats.free);
377		b->size--;
378
379		if (++count >= b->rate_free) {
380			count = 0;
 
 
 
 
 
 
 
 
 
381			cond_resched();
382		}
383	}
384}
385
386/*
387 * Perform standard reset sequence by popping the balloon (in case it
388 * is not  empty) and then restarting protocol. This operation normally
389 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
390 */
391static void vmballoon_reset(struct vmballoon *b)
392{
393	/* free all pages, skipping monitor unlock */
394	vmballoon_pop(b);
395
396	if (vmballoon_send_start(b)) {
397		b->reset_required = false;
398		if (!vmballoon_send_guest_id(b))
399			pr_err("failed to send guest ID to the host\n");
400	}
401}
402
403/*
404 * Allocate (or reserve) a page for the balloon and notify the host.  If host
405 * refuses the page put it on "refuse" list and allocate another one until host
406 * is satisfied. "Refused" pages are released at the end of inflation cycle
407 * (when we allocate b->rate_alloc pages).
408 */
409static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
410{
411	struct page *page;
412	gfp_t flags;
413	unsigned int hv_status;
414	bool locked = false;
415
416	do {
417		if (!can_sleep)
418			STATS_INC(b->stats.alloc);
419		else
420			STATS_INC(b->stats.sleep_alloc);
421
422		flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
423		page = alloc_page(flags);
424		if (!page) {
425			if (!can_sleep)
426				STATS_INC(b->stats.alloc_fail);
427			else
428				STATS_INC(b->stats.sleep_alloc_fail);
429			return -ENOMEM;
430		}
431
432		/* inform monitor */
433		locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status);
434		if (!locked) {
435			STATS_INC(b->stats.refused_alloc);
436
437			if (hv_status == VMW_BALLOON_ERROR_RESET ||
438			    hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
439				__free_page(page);
440				return -EIO;
441			}
442
443			/*
444			 * Place page on the list of non-balloonable pages
445			 * and retry allocation, unless we already accumulated
446			 * too many of them, in which case take a breather.
447			 */
448			list_add(&page->lru, &b->refused_pages);
449			if (++b->n_refused_pages >= VMW_BALLOON_MAX_REFUSED)
450				return -EIO;
451		}
452	} while (!locked);
 
453
454	/* track allocated page */
455	list_add(&page->lru, &b->pages);
456
457	/* update balloon size */
458	b->size++;
459
460	return 0;
461}
462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463/*
464 * Release the page allocated for the balloon. Note that we first notify
465 * the host so it can make sure the page will be available for the guest
466 * to use, if needed.
467 */
468static int vmballoon_release_page(struct vmballoon *b, struct page *page)
 
469{
470	if (!vmballoon_send_unlock_page(b, page_to_pfn(page)))
471		return -EIO;
472
473	list_del(&page->lru);
 
 
 
 
 
474
475	/* deallocate page */
476	__free_page(page);
477	STATS_INC(b->stats.free);
478
479	/* update balloon size */
480	b->size--;
481
482	return 0;
483}
484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485/*
486 * Release pages that were allocated while attempting to inflate the
487 * balloon but were refused by the host for one reason or another.
488 */
489static void vmballoon_release_refused_pages(struct vmballoon *b)
 
490{
491	struct page *page, *next;
 
 
492
493	list_for_each_entry_safe(page, next, &b->refused_pages, lru) {
494		list_del(&page->lru);
495		__free_page(page);
496		STATS_INC(b->stats.refused_free);
497	}
498
499	b->n_refused_pages = 0;
 
 
 
 
 
 
 
 
 
 
 
 
500}
501
502/*
503 * Inflate the balloon towards its target size. Note that we try to limit
504 * the rate of allocation to make sure we are not choking the rest of the
505 * system.
506 */
507static void vmballoon_inflate(struct vmballoon *b)
508{
509	unsigned int goal;
510	unsigned int rate;
511	unsigned int i;
512	unsigned int allocations = 0;
 
513	int error = 0;
514	bool alloc_can_sleep = false;
 
515
516	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
517
518	/*
519	 * First try NOSLEEP page allocations to inflate balloon.
520	 *
521	 * If we do not throttle nosleep allocations, we can drain all
522	 * free pages in the guest quickly (if the balloon target is high).
523	 * As a side-effect, draining free pages helps to inform (force)
524	 * the guest to start swapping if balloon target is not met yet,
525	 * which is a desired behavior. However, balloon driver can consume
526	 * all available CPU cycles if too many pages are allocated in a
527	 * second. Therefore, we throttle nosleep allocations even when
528	 * the guest is not under memory pressure. OTOH, if we have already
529	 * predicted that the guest is under memory pressure, then we
530	 * slowdown page allocations considerably.
531	 */
532
533	goal = b->target - b->size;
534	/*
535	 * Start with no sleep allocation rate which may be higher
536	 * than sleeping allocation rate.
537	 */
538	rate = b->slow_allocation_cycles ?
539			b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
541	pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n",
542		 __func__, goal, rate, b->rate_alloc);
 
 
 
 
 
 
543
544	for (i = 0; i < goal; i++) {
 
545
546		error = vmballoon_reserve_page(b, alloc_can_sleep);
547		if (error) {
548			if (error != -ENOMEM) {
549				/*
550				 * Not a page allocation failure, stop this
551				 * cycle. Maybe we'll get new target from
552				 * the host soon.
553				 */
554				break;
 
 
 
555			}
556
557			if (alloc_can_sleep) {
558				/*
559				 * CANSLEEP page allocation failed, so guest
560				 * is under severe memory pressure. Quickly
561				 * decrease allocation rate.
562				 */
563				b->rate_alloc = max(b->rate_alloc / 2,
564						    VMW_BALLOON_RATE_ALLOC_MIN);
 
565				break;
566			}
567
568			/*
569			 * NOSLEEP page allocation failed, so the guest is
570			 * under memory pressure. Let us slow down page
571			 * allocations for next few cycles so that the guest
572			 * gets out of memory pressure. Also, if we already
573			 * allocated b->rate_alloc pages, let's pause,
574			 * otherwise switch to sleeping allocations.
575			 */
576			b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
577
578			if (i >= b->rate_alloc)
579				break;
580
581			alloc_can_sleep = true;
582			/* Lower rate for sleeping allocations. */
583			rate = b->rate_alloc;
 
584		}
585
586		if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) {
587			cond_resched();
588			allocations = 0;
 
 
 
 
589		}
590
591		if (i >= rate) {
 
 
592			/* We allocated enough pages, let's take a break. */
593			break;
594		}
595	}
596
 
 
 
597	/*
598	 * We reached our goal without failures so try increasing
599	 * allocation rate.
600	 */
601	if (error == 0 && i >= b->rate_alloc) {
602		unsigned int mult = i / b->rate_alloc;
603
604		b->rate_alloc =
605			min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
606			    VMW_BALLOON_RATE_ALLOC_MAX);
607	}
608
609	vmballoon_release_refused_pages(b);
 
610}
611
612/*
613 * Decrease the size of the balloon allowing guest to use more memory.
614 */
615static void vmballoon_deflate(struct vmballoon *b)
616{
617	struct page *page, *next;
618	unsigned int i = 0;
619	unsigned int goal;
620	int error;
621
622	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
623
624	/* limit deallocation rate */
625	goal = min(b->size - b->target, b->rate_free);
 
 
 
 
 
 
 
 
 
 
 
 
 
626
627	pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free);
 
628
629	/* free pages to reach target */
630	list_for_each_entry_safe(page, next, &b->pages, lru) {
631		error = vmballoon_release_page(b, page);
632		if (error) {
633			/* quickly decrease rate in case of error */
634			b->rate_free = max(b->rate_free / 2,
635					   VMW_BALLOON_RATE_FREE_MIN);
636			return;
 
 
 
637		}
638
639		if (++i >= goal)
640			break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641	}
642
643	/* slowly increase rate if there were no errors */
644	b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC,
645			   VMW_BALLOON_RATE_FREE_MAX);
 
 
 
 
 
646}
647
648/*
649 * Balloon work function: reset protocol, if needed, get the new size and
650 * adjust balloon as needed. Repeat in 1 sec.
651 */
652static void vmballoon_work(struct work_struct *work)
653{
654	struct delayed_work *dwork = to_delayed_work(work);
655	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
656	unsigned int target;
657
658	STATS_INC(b->stats.timer);
659
660	if (b->reset_required)
661		vmballoon_reset(b);
662
663	if (b->slow_allocation_cycles > 0)
664		b->slow_allocation_cycles--;
665
666	if (vmballoon_send_get_target(b, &target)) {
667		/* update target, adjust size */
668		b->target = target;
669
670		if (b->size < target)
671			vmballoon_inflate(b);
672		else if (b->size > target)
 
673			vmballoon_deflate(b);
674	}
675
676	/*
677	 * We are using a freezable workqueue so that balloon operations are
678	 * stopped while the system transitions to/from sleep/hibernation.
679	 */
680	queue_delayed_work(system_freezable_wq,
681			   dwork, round_jiffies_relative(HZ));
682}
683
684/*
685 * DEBUGFS Interface
686 */
687#ifdef CONFIG_DEBUG_FS
688
689static int vmballoon_debug_show(struct seq_file *f, void *offset)
690{
691	struct vmballoon *b = f->private;
692	struct vmballoon_stats *stats = &b->stats;
693
 
 
 
 
 
 
 
 
694	/* format size info */
695	seq_printf(f,
696		   "target:             %8d pages\n"
697		   "current:            %8d pages\n",
698		   b->target, b->size);
699
700	/* format rate info */
701	seq_printf(f,
702		   "rateNoSleepAlloc:   %8d pages/sec\n"
703		   "rateSleepAlloc:     %8d pages/sec\n"
704		   "rateFree:           %8d pages/sec\n",
705		   VMW_BALLOON_NOSLEEP_ALLOC_MAX,
706		   b->rate_alloc, b->rate_free);
707
708	seq_printf(f,
709		   "\n"
710		   "timer:              %8u\n"
 
711		   "start:              %8u (%4u failed)\n"
712		   "guestType:          %8u (%4u failed)\n"
 
713		   "lock:               %8u (%4u failed)\n"
 
714		   "unlock:             %8u (%4u failed)\n"
715		   "target:             %8u (%4u failed)\n"
 
716		   "primNoSleepAlloc:   %8u (%4u failed)\n"
717		   "primCanSleepAlloc:  %8u (%4u failed)\n"
 
718		   "primFree:           %8u\n"
 
719		   "errAlloc:           %8u\n"
720		   "errFree:            %8u\n",
 
 
 
721		   stats->timer,
 
722		   stats->start, stats->start_fail,
723		   stats->guest_type, stats->guest_type_fail,
724		   stats->lock,  stats->lock_fail,
725		   stats->unlock, stats->unlock_fail,
 
 
726		   stats->target, stats->target_fail,
727		   stats->alloc, stats->alloc_fail,
 
728		   stats->sleep_alloc, stats->sleep_alloc_fail,
729		   stats->free,
730		   stats->refused_alloc, stats->refused_free);
 
 
 
731
732	return 0;
733}
734
735static int vmballoon_debug_open(struct inode *inode, struct file *file)
736{
737	return single_open(file, vmballoon_debug_show, inode->i_private);
738}
739
740static const struct file_operations vmballoon_debug_fops = {
741	.owner		= THIS_MODULE,
742	.open		= vmballoon_debug_open,
743	.read		= seq_read,
744	.llseek		= seq_lseek,
745	.release	= single_release,
746};
747
748static int __init vmballoon_debugfs_init(struct vmballoon *b)
749{
750	int error;
751
752	b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
753					   &vmballoon_debug_fops);
754	if (IS_ERR(b->dbg_entry)) {
755		error = PTR_ERR(b->dbg_entry);
756		pr_err("failed to create debugfs entry, error: %d\n", error);
757		return error;
758	}
759
760	return 0;
761}
762
763static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
764{
765	debugfs_remove(b->dbg_entry);
766}
767
768#else
769
770static inline int vmballoon_debugfs_init(struct vmballoon *b)
771{
772	return 0;
773}
774
775static inline void vmballoon_debugfs_exit(struct vmballoon *b)
776{
777}
778
779#endif	/* CONFIG_DEBUG_FS */
780
781static int __init vmballoon_init(void)
782{
783	int error;
784
785	/*
786	 * Check if we are running on VMware's hypervisor and bail out
787	 * if we are not.
788	 */
789	if (x86_hyper != &x86_hyper_vmware)
790		return -ENODEV;
791
792	INIT_LIST_HEAD(&balloon.pages);
793	INIT_LIST_HEAD(&balloon.refused_pages);
 
 
 
794
795	/* initialize rates */
796	balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
797	balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX;
798
799	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
800
801	/*
802	 * Start balloon.
803	 */
804	if (!vmballoon_send_start(&balloon)) {
805		pr_err("failed to send start command to the host\n");
806		return -EIO;
807	}
808
809	if (!vmballoon_send_guest_id(&balloon)) {
810		pr_err("failed to send guest ID to the host\n");
811		return -EIO;
812	}
813
814	error = vmballoon_debugfs_init(&balloon);
815	if (error)
816		return error;
817
 
 
 
 
 
818	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
819
820	return 0;
821}
822module_init(vmballoon_init);
823
824static void __exit vmballoon_exit(void)
825{
 
826	cancel_delayed_work_sync(&balloon.dwork);
827
828	vmballoon_debugfs_exit(&balloon);
829
830	/*
831	 * Deallocate all reserved memory, and reset connection with monitor.
832	 * Reset connection before deallocating memory to avoid potential for
833	 * additional spurious resets from guest touching deallocated pages.
834	 */
835	vmballoon_send_start(&balloon);
836	vmballoon_pop(&balloon);
837}
838module_exit(vmballoon_exit);

   1/*
   2 * VMware Balloon driver.
   3 *
   4 * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation; version 2 of the License and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13 * NON INFRINGEMENT.  See the GNU General Public License for more
  14 * details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 * Maintained by:	Xavier Deguillard <xdeguillard@vmware.com>
  21 *			Philip Moltmann <moltmann@vmware.com>
  22 */
  23
  24/*
  25 * This is VMware physical memory management driver for Linux. The driver
  26 * acts like a "balloon" that can be inflated to reclaim physical pages by
  27 * reserving them in the guest and invalidating them in the monitor,
  28 * freeing up the underlying machine pages so they can be allocated to
  29 * other guests.  The balloon can also be deflated to allow the guest to
  30 * use more physical memory. Higher level policies can control the sizes
  31 * of balloons in VMs in order to manage physical memory resources.
  32 */
  33
  34//#define DEBUG
  35#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  36
  37#include <linux/types.h>
  38#include <linux/kernel.h>
  39#include <linux/mm.h>
  40#include <linux/vmalloc.h>
  41#include <linux/sched.h>
  42#include <linux/module.h>
  43#include <linux/workqueue.h>
  44#include <linux/debugfs.h>
  45#include <linux/seq_file.h>
  46#include <linux/vmw_vmci_defs.h>
  47#include <linux/vmw_vmci_api.h>
  48#include <asm/hypervisor.h>
  49
  50MODULE_AUTHOR("VMware, Inc.");
  51MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
  52MODULE_VERSION("1.5.0.0-k");
  53MODULE_ALIAS("dmi:*:svnVMware*:*");
  54MODULE_ALIAS("vmware_vmmemctl");
  55MODULE_LICENSE("GPL");
  56
  57/*
  58 * Various constants controlling rate of inflaint/deflating balloon,
  59 * measured in pages.
  60 */
  61
  62/*
 
 
 
 
 
 
  63 * Rates of memory allocaton when guest experiences memory pressure
  64 * (driver performs sleeping allocations).
  65 */
  66#define VMW_BALLOON_RATE_ALLOC_MIN	512U
  67#define VMW_BALLOON_RATE_ALLOC_MAX	2048U
  68#define VMW_BALLOON_RATE_ALLOC_INC	16U
  69
  70/*
 
 
 
 
 
 
 
  71 * When guest is under memory pressure, use a reduced page allocation
  72 * rate for next several cycles.
  73 */
  74#define VMW_BALLOON_SLOW_CYCLES		4
  75
  76/*
  77 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
  78 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
  79 * __GFP_NOWARN, to suppress page allocation failure warnings.
  80 */
  81#define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)
  82
  83/*
  84 * Use GFP_HIGHUSER when executing in a separate kernel thread
  85 * context and allocation can sleep.  This is less stressful to
  86 * the guest memory system, since it allows the thread to block
  87 * while memory is reclaimed, and won't take pages from emergency
  88 * low-memory pools.
  89 */
  90#define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)
  91
 
 
 
  92/* Maximum number of refused pages we accumulate during inflation cycle */
  93#define VMW_BALLOON_MAX_REFUSED		16
  94
  95/*
  96 * Hypervisor communication port definitions.
  97 */
  98#define VMW_BALLOON_HV_PORT		0x5670
  99#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
 
 100#define VMW_BALLOON_GUEST_ID		1	/* Linux */
 101
 102enum vmwballoon_capabilities {
 103	/*
 104	 * Bit 0 is reserved and not associated to any capability.
 105	 */
 106	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
 107	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
 108	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
 109	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
 110};
 111
 112#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_BASIC_CMDS \
 113					| VMW_BALLOON_BATCHED_CMDS \
 114					| VMW_BALLOON_BATCHED_2M_CMDS \
 115					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
 116
 117#define VMW_BALLOON_2M_SHIFT		(9)
 118#define VMW_BALLOON_NUM_PAGE_SIZES	(2)
 119
 120/*
 121 * Backdoor commands availability:
 122 *
 123 * START, GET_TARGET and GUEST_ID are always available,
 124 *
 125 * VMW_BALLOON_BASIC_CMDS:
 126 *	LOCK and UNLOCK commands,
 127 * VMW_BALLOON_BATCHED_CMDS:
 128 *	BATCHED_LOCK and BATCHED_UNLOCK commands.
 129 * VMW BALLOON_BATCHED_2M_CMDS:
 130 *	BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
 131 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
 132 *	VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
 133 */
 134#define VMW_BALLOON_CMD_START			0
 135#define VMW_BALLOON_CMD_GET_TARGET		1
 136#define VMW_BALLOON_CMD_LOCK			2
 137#define VMW_BALLOON_CMD_UNLOCK			3
 138#define VMW_BALLOON_CMD_GUEST_ID		4
 139#define VMW_BALLOON_CMD_BATCHED_LOCK		6
 140#define VMW_BALLOON_CMD_BATCHED_UNLOCK		7
 141#define VMW_BALLOON_CMD_BATCHED_2M_LOCK		8
 142#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK	9
 143#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET	10
 144
 145
 146/* error codes */
 147#define VMW_BALLOON_SUCCESS		        0
 148#define VMW_BALLOON_FAILURE		        -1
 149#define VMW_BALLOON_ERROR_CMD_INVALID	        1
 150#define VMW_BALLOON_ERROR_PPN_INVALID	        2
 151#define VMW_BALLOON_ERROR_PPN_LOCKED	        3
 152#define VMW_BALLOON_ERROR_PPN_UNLOCKED	        4
 153#define VMW_BALLOON_ERROR_PPN_PINNED	        5
 154#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	        6
 155#define VMW_BALLOON_ERROR_RESET		        7
 156#define VMW_BALLOON_ERROR_BUSY		        8
 157
 158#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)
 159
 160/* Batch page description */
 161
 162/*
 163 * Layout of a page in the batch page:
 164 *
 165 * +-------------+----------+--------+
 166 * |             |          |        |
 167 * | Page number | Reserved | Status |
 168 * |             |          |        |
 169 * +-------------+----------+--------+
 170 * 64  PAGE_SHIFT          6         0
 171 *
 172 * The reserved field should be set to 0.
 173 */
 174#define VMW_BALLOON_BATCH_MAX_PAGES	(PAGE_SIZE / sizeof(u64))
 175#define VMW_BALLOON_BATCH_STATUS_MASK	((1UL << 5) - 1)
 176#define VMW_BALLOON_BATCH_PAGE_MASK	(~((1UL << PAGE_SHIFT) - 1))
 177
 178struct vmballoon_batch_page {
 179	u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
 180};
 181
 182static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
 183{
 184	return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
 185}
 186
 187static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
 188				int idx)
 189{
 190	return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
 191}
 192
 193static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
 194				u64 pa)
 195{
 196	batch->pages[idx] = pa;
 197}
 198
 199
 200#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)		\
 201({								\
 202	unsigned long __status, __dummy1, __dummy2, __dummy3;	\
 203	__asm__ __volatile__ ("inl %%dx" :			\
 204		"=a"(__status),					\
 205		"=c"(__dummy1),					\
 206		"=d"(__dummy2),					\
 207		"=b"(result),					\
 208		"=S" (__dummy3) :				\
 209		"0"(VMW_BALLOON_HV_MAGIC),			\
 210		"1"(VMW_BALLOON_CMD_##cmd),			\
 211		"2"(VMW_BALLOON_HV_PORT),			\
 212		"3"(arg1),					\
 213		"4" (arg2) :					\
 214		"memory");					\
 215	if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)	\
 216		result = __dummy1;				\
 217	result &= -1UL;						\
 218	__status & -1UL;					\
 219})
 220
 221#ifdef CONFIG_DEBUG_FS
 222struct vmballoon_stats {
 223	unsigned int timer;
 224	unsigned int doorbell;
 225
 226	/* allocation statistics */
 227	unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
 228	unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
 229	unsigned int sleep_alloc;
 230	unsigned int sleep_alloc_fail;
 231	unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
 232	unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
 233	unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
 234
 235	/* monitor operations */
 236	unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
 237	unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
 238	unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
 239	unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
 240	unsigned int target;
 241	unsigned int target_fail;
 242	unsigned int start;
 243	unsigned int start_fail;
 244	unsigned int guest_type;
 245	unsigned int guest_type_fail;
 246	unsigned int doorbell_set;
 247	unsigned int doorbell_unset;
 248};
 249
 250#define STATS_INC(stat) (stat)++
 251#else
 252#define STATS_INC(stat)
 253#endif
 254
 255struct vmballoon;
 256
 257struct vmballoon_ops {
 258	void (*add_page)(struct vmballoon *b, int idx, struct page *p);
 259	int (*lock)(struct vmballoon *b, unsigned int num_pages,
 260			bool is_2m_pages, unsigned int *target);
 261	int (*unlock)(struct vmballoon *b, unsigned int num_pages,
 262			bool is_2m_pages, unsigned int *target);
 263};
 264
 265struct vmballoon_page_size {
 266	/* list of reserved physical pages */
 267	struct list_head pages;
 268
 269	/* transient list of non-balloonable pages */
 270	struct list_head refused_pages;
 271	unsigned int n_refused_pages;
 272};
 273
 274struct vmballoon {
 275	struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
 276
 277	/* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
 278	unsigned supported_page_sizes;
 279
 280	/* balloon size in pages */
 281	unsigned int size;
 282	unsigned int target;
 283
 284	/* reset flag */
 285	bool reset_required;
 286
 287	/* adjustment rates (pages per second) */
 288	unsigned int rate_alloc;
 
 289
 290	/* slowdown page allocations for next few cycles */
 291	unsigned int slow_allocation_cycles;
 292
 293	unsigned long capabilities;
 294
 295	struct vmballoon_batch_page *batch_page;
 296	unsigned int batch_max_pages;
 297	struct page *page;
 298
 299	const struct vmballoon_ops *ops;
 300
 301#ifdef CONFIG_DEBUG_FS
 302	/* statistics */
 303	struct vmballoon_stats stats;
 304
 305	/* debugfs file exporting statistics */
 306	struct dentry *dbg_entry;
 307#endif
 308
 309	struct sysinfo sysinfo;
 310
 311	struct delayed_work dwork;
 312
 313	struct vmci_handle vmci_doorbell;
 314};
 315
 316static struct vmballoon balloon;
 317
 318/*
 319 * Send "start" command to the host, communicating supported version
 320 * of the protocol.
 321 */
 322static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
 323{
 324	unsigned long status, capabilities, dummy = 0;
 325	bool success;
 326
 327	STATS_INC(b->stats.start);
 328
 329	status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
 
 
 330
 331	switch (status) {
 332	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
 333		b->capabilities = capabilities;
 334		success = true;
 335		break;
 336	case VMW_BALLOON_SUCCESS:
 337		b->capabilities = VMW_BALLOON_BASIC_CMDS;
 338		success = true;
 339		break;
 340	default:
 341		success = false;
 342	}
 343
 344	if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
 345		b->supported_page_sizes = 2;
 346	else
 347		b->supported_page_sizes = 1;
 348
 349	if (!success) {
 350		pr_debug("%s - failed, hv returns %ld\n", __func__, status);
 351		STATS_INC(b->stats.start_fail);
 352	}
 353	return success;
 354}
 355
 356static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
 357{
 358	switch (status) {
 359	case VMW_BALLOON_SUCCESS:
 360		return true;
 361
 362	case VMW_BALLOON_ERROR_RESET:
 363		b->reset_required = true;
 364		/* fall through */
 365
 366	default:
 367		return false;
 368	}
 369}
 370
 371/*
 372 * Communicate guest type to the host so that it can adjust ballooning
 373 * algorithm to the one most appropriate for the guest. This command
 374 * is normally issued after sending "start" command and is part of
 375 * standard reset sequence.
 376 */
 377static bool vmballoon_send_guest_id(struct vmballoon *b)
 378{
 379	unsigned long status, dummy = 0;
 380
 381	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
 382				dummy);
 383
 384	STATS_INC(b->stats.guest_type);
 385
 386	if (vmballoon_check_status(b, status))
 387		return true;
 388
 389	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
 390	STATS_INC(b->stats.guest_type_fail);
 391	return false;
 392}
 393
 394static u16 vmballoon_page_size(bool is_2m_page)
 395{
 396	if (is_2m_page)
 397		return 1 << VMW_BALLOON_2M_SHIFT;
 398
 399	return 1;
 400}
 401
 402/*
 403 * Retrieve desired balloon size from the host.
 404 */
 405static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
 406{
 407	unsigned long status;
 408	unsigned long target;
 409	unsigned long limit;
 410	unsigned long dummy = 0;
 411	u32 limit32;
 412
 413	/*
 414	 * si_meminfo() is cheap. Moreover, we want to provide dynamic
 415	 * max balloon size later. So let us call si_meminfo() every
 416	 * iteration.
 417	 */
 418	si_meminfo(&b->sysinfo);
 419	limit = b->sysinfo.totalram;
 420
 421	/* Ensure limit fits in 32-bits */
 422	limit32 = (u32)limit;
 423	if (limit != limit32)
 424		return false;
 425
 426	/* update stats */
 427	STATS_INC(b->stats.target);
 428
 429	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
 430	if (vmballoon_check_status(b, status)) {
 431		*new_target = target;
 432		return true;
 433	}
 434
 435	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
 436	STATS_INC(b->stats.target_fail);
 437	return false;
 438}
 439
 440/*
 441 * Notify the host about allocated page so that host can use it without
 442 * fear that guest will need it. Host may reject some pages, we need to
 443 * check the return value and maybe submit a different page.
 444 */
 445static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
 446				unsigned int *hv_status, unsigned int *target)
 447{
 448	unsigned long status, dummy = 0;
 449	u32 pfn32;
 450
 451	pfn32 = (u32)pfn;
 452	if (pfn32 != pfn)
 453		return -1;
 454
 455	STATS_INC(b->stats.lock[false]);
 456
 457	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
 458	if (vmballoon_check_status(b, status))
 459		return 0;
 460
 461	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
 462	STATS_INC(b->stats.lock_fail[false]);
 463	return 1;
 464}
 465
 466static int vmballoon_send_batched_lock(struct vmballoon *b,
 467		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
 468{
 469	unsigned long status;
 470	unsigned long pfn = page_to_pfn(b->page);
 471
 472	STATS_INC(b->stats.lock[is_2m_pages]);
 473
 474	if (is_2m_pages)
 475		status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
 476				*target);
 477	else
 478		status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
 479				*target);
 480
 481	if (vmballoon_check_status(b, status))
 482		return 0;
 483
 484	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
 485	STATS_INC(b->stats.lock_fail[is_2m_pages]);
 486	return 1;
 487}
 488
 489/*
 490 * Notify the host that guest intends to release given page back into
 491 * the pool of available (to the guest) pages.
 492 */
 493static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
 494							unsigned int *target)
 495{
 496	unsigned long status, dummy = 0;
 497	u32 pfn32;
 498
 499	pfn32 = (u32)pfn;
 500	if (pfn32 != pfn)
 501		return false;
 502
 503	STATS_INC(b->stats.unlock[false]);
 504
 505	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
 506	if (vmballoon_check_status(b, status))
 507		return true;
 508
 509	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
 510	STATS_INC(b->stats.unlock_fail[false]);
 511	return false;
 512}
 513
 514static bool vmballoon_send_batched_unlock(struct vmballoon *b,
 515		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
 516{
 517	unsigned long status;
 518	unsigned long pfn = page_to_pfn(b->page);
 519
 520	STATS_INC(b->stats.unlock[is_2m_pages]);
 521
 522	if (is_2m_pages)
 523		status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
 524				*target);
 525	else
 526		status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
 527				*target);
 528
 529	if (vmballoon_check_status(b, status))
 530		return true;
 531
 532	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
 533	STATS_INC(b->stats.unlock_fail[is_2m_pages]);
 534	return false;
 535}
 536
 537static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
 538{
 539	if (is_2m_page)
 540		return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
 541
 542	return alloc_page(flags);
 543}
 544
 545static void vmballoon_free_page(struct page *page, bool is_2m_page)
 546{
 547	if (is_2m_page)
 548		__free_pages(page, VMW_BALLOON_2M_SHIFT);
 549	else
 550		__free_page(page);
 551}
 552
 553/*
 554 * Quickly release all pages allocated for the balloon. This function is
 555 * called when host decides to "reset" balloon for one reason or another.
 556 * Unlike normal "deflate" we do not (shall not) notify host of the pages
 557 * being released.
 558 */
 559static void vmballoon_pop(struct vmballoon *b)
 560{
 561	struct page *page, *next;
 562	unsigned is_2m_pages;
 
 
 
 
 
 
 563
 564	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
 565			is_2m_pages++) {
 566		struct vmballoon_page_size *page_size =
 567				&b->page_sizes[is_2m_pages];
 568		u16 size_per_page = vmballoon_page_size(is_2m_pages);
 569
 570		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
 571			list_del(&page->lru);
 572			vmballoon_free_page(page, is_2m_pages);
 573			STATS_INC(b->stats.free[is_2m_pages]);
 574			b->size -= size_per_page;
 575			cond_resched();
 576		}
 577	}
 
 578
 579	if (b->batch_page) {
 580		vunmap(b->batch_page);
 581		b->batch_page = NULL;
 582	}
 
 
 
 
 
 583
 584	if (b->page) {
 585		__free_page(b->page);
 586		b->page = NULL;
 
 587	}
 588}
 589
 590/*
 591 * Notify the host of a ballooned page. If host rejects the page put it on the
 592 * refuse list, those refused page are then released at the end of the
 593 * inflation cycle.
 594 */
 595static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
 596				bool is_2m_pages, unsigned int *target)
 597{
 598	int locked, hv_status;
 599	struct page *page = b->page;
 600	struct vmballoon_page_size *page_size = &b->page_sizes[false];
 601
 602	/* is_2m_pages can never happen as 2m pages support implies batching */
 603
 604	locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
 605								target);
 606	if (locked > 0) {
 607		STATS_INC(b->stats.refused_alloc[false]);
 608
 609		if (hv_status == VMW_BALLOON_ERROR_RESET ||
 610				hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
 611			vmballoon_free_page(page, false);
 612			return -EIO;
 
 
 
 
 613		}
 614
 615		/*
 616		 * Place page on the list of non-balloonable pages
 617		 * and retry allocation, unless we already accumulated
 618		 * too many of them, in which case take a breather.
 619		 */
 620		if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
 621			page_size->n_refused_pages++;
 622			list_add(&page->lru, &page_size->refused_pages);
 623		} else {
 624			vmballoon_free_page(page, false);
 
 
 
 
 
 
 
 
 
 625		}
 626		return -EIO;
 627	}
 628
 629	/* track allocated page */
 630	list_add(&page->lru, &page_size->pages);
 631
 632	/* update balloon size */
 633	b->size++;
 634
 635	return 0;
 636}
 637
 638static int vmballoon_lock_batched_page(struct vmballoon *b,
 639		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
 640{
 641	int locked, i;
 642	u16 size_per_page = vmballoon_page_size(is_2m_pages);
 643
 644	locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
 645			target);
 646	if (locked > 0) {
 647		for (i = 0; i < num_pages; i++) {
 648			u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
 649			struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
 650
 651			vmballoon_free_page(p, is_2m_pages);
 652		}
 653
 654		return -EIO;
 655	}
 656
 657	for (i = 0; i < num_pages; i++) {
 658		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
 659		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
 660		struct vmballoon_page_size *page_size =
 661				&b->page_sizes[is_2m_pages];
 662
 663		locked = vmballoon_batch_get_status(b->batch_page, i);
 664
 665		switch (locked) {
 666		case VMW_BALLOON_SUCCESS:
 667			list_add(&p->lru, &page_size->pages);
 668			b->size += size_per_page;
 669			break;
 670		case VMW_BALLOON_ERROR_PPN_PINNED:
 671		case VMW_BALLOON_ERROR_PPN_INVALID:
 672			if (page_size->n_refused_pages
 673					< VMW_BALLOON_MAX_REFUSED) {
 674				list_add(&p->lru, &page_size->refused_pages);
 675				page_size->n_refused_pages++;
 676				break;
 677			}
 678			/* Fallthrough */
 679		case VMW_BALLOON_ERROR_RESET:
 680		case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
 681			vmballoon_free_page(p, is_2m_pages);
 682			break;
 683		default:
 684			/* This should never happen */
 685			WARN_ON_ONCE(true);
 686		}
 687	}
 688
 689	return 0;
 690}
 691
 692/*
 693 * Release the page allocated for the balloon. Note that we first notify
 694 * the host so it can make sure the page will be available for the guest
 695 * to use, if needed.
 696 */
 697static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
 698		bool is_2m_pages, unsigned int *target)
 699{
 700	struct page *page = b->page;
 701	struct vmballoon_page_size *page_size = &b->page_sizes[false];
 702
 703	/* is_2m_pages can never happen as 2m pages support implies batching */
 704
 705	if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
 706		list_add(&page->lru, &page_size->pages);
 707		return -EIO;
 708	}
 709
 710	/* deallocate page */
 711	vmballoon_free_page(page, false);
 712	STATS_INC(b->stats.free[false]);
 713
 714	/* update balloon size */
 715	b->size--;
 716
 717	return 0;
 718}
 719
 720static int vmballoon_unlock_batched_page(struct vmballoon *b,
 721				unsigned int num_pages, bool is_2m_pages,
 722				unsigned int *target)
 723{
 724	int locked, i, ret = 0;
 725	bool hv_success;
 726	u16 size_per_page = vmballoon_page_size(is_2m_pages);
 727
 728	hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
 729			target);
 730	if (!hv_success)
 731		ret = -EIO;
 732
 733	for (i = 0; i < num_pages; i++) {
 734		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
 735		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
 736		struct vmballoon_page_size *page_size =
 737				&b->page_sizes[is_2m_pages];
 738
 739		locked = vmballoon_batch_get_status(b->batch_page, i);
 740		if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
 741			/*
 742			 * That page wasn't successfully unlocked by the
 743			 * hypervisor, re-add it to the list of pages owned by
 744			 * the balloon driver.
 745			 */
 746			list_add(&p->lru, &page_size->pages);
 747		} else {
 748			/* deallocate page */
 749			vmballoon_free_page(p, is_2m_pages);
 750			STATS_INC(b->stats.free[is_2m_pages]);
 751
 752			/* update balloon size */
 753			b->size -= size_per_page;
 754		}
 755	}
 756
 757	return ret;
 758}
 759
 760/*
 761 * Release pages that were allocated while attempting to inflate the
 762 * balloon but were refused by the host for one reason or another.
 763 */
 764static void vmballoon_release_refused_pages(struct vmballoon *b,
 765		bool is_2m_pages)
 766{
 767	struct page *page, *next;
 768	struct vmballoon_page_size *page_size =
 769			&b->page_sizes[is_2m_pages];
 770
 771	list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
 772		list_del(&page->lru);
 773		vmballoon_free_page(page, is_2m_pages);
 774		STATS_INC(b->stats.refused_free[is_2m_pages]);
 775	}
 776
 777	page_size->n_refused_pages = 0;
 778}
 779
 780static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
 781{
 782	b->page = p;
 783}
 784
 785static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
 786				struct page *p)
 787{
 788	vmballoon_batch_set_pa(b->batch_page, idx,
 789			(u64)page_to_pfn(p) << PAGE_SHIFT);
 790}
 791
 792/*
 793 * Inflate the balloon towards its target size. Note that we try to limit
 794 * the rate of allocation to make sure we are not choking the rest of the
 795 * system.
 796 */
 797static void vmballoon_inflate(struct vmballoon *b)
 798{
 799	unsigned rate;
 
 
 800	unsigned int allocations = 0;
 801	unsigned int num_pages = 0;
 802	int error = 0;
 803	gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
 804	bool is_2m_pages;
 805
 806	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
 807
 808	/*
 809	 * First try NOSLEEP page allocations to inflate balloon.
 810	 *
 811	 * If we do not throttle nosleep allocations, we can drain all
 812	 * free pages in the guest quickly (if the balloon target is high).
 813	 * As a side-effect, draining free pages helps to inform (force)
 814	 * the guest to start swapping if balloon target is not met yet,
 815	 * which is a desired behavior. However, balloon driver can consume
 816	 * all available CPU cycles if too many pages are allocated in a
 817	 * second. Therefore, we throttle nosleep allocations even when
 818	 * the guest is not under memory pressure. OTOH, if we have already
 819	 * predicted that the guest is under memory pressure, then we
 820	 * slowdown page allocations considerably.
 821	 */
 822
 
 823	/*
 824	 * Start with no sleep allocation rate which may be higher
 825	 * than sleeping allocation rate.
 826	 */
 827	if (b->slow_allocation_cycles) {
 828		rate = b->rate_alloc;
 829		is_2m_pages = false;
 830	} else {
 831		rate = UINT_MAX;
 832		is_2m_pages =
 833			b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
 834	}
 835
 836	pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
 837		 __func__, b->target - b->size, rate, b->rate_alloc);
 838
 839	while (!b->reset_required &&
 840		b->size + num_pages * vmballoon_page_size(is_2m_pages)
 841		< b->target) {
 842		struct page *page;
 843
 844		if (flags == VMW_PAGE_ALLOC_NOSLEEP)
 845			STATS_INC(b->stats.alloc[is_2m_pages]);
 846		else
 847			STATS_INC(b->stats.sleep_alloc);
 848
 849		page = vmballoon_alloc_page(flags, is_2m_pages);
 850		if (!page) {
 851			STATS_INC(b->stats.alloc_fail[is_2m_pages]);
 852
 853			if (is_2m_pages) {
 854				b->ops->lock(b, num_pages, true, &b->target);
 855
 
 
 
 856				/*
 857				 * ignore errors from locking as we now switch
 858				 * to 4k pages and we might get different
 859				 * errors.
 860				 */
 861
 862				num_pages = 0;
 863				is_2m_pages = false;
 864				continue;
 865			}
 866
 867			if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
 868				/*
 869				 * CANSLEEP page allocation failed, so guest
 870				 * is under severe memory pressure. Quickly
 871				 * decrease allocation rate.
 872				 */
 873				b->rate_alloc = max(b->rate_alloc / 2,
 874						    VMW_BALLOON_RATE_ALLOC_MIN);
 875				STATS_INC(b->stats.sleep_alloc_fail);
 876				break;
 877			}
 878
 879			/*
 880			 * NOSLEEP page allocation failed, so the guest is
 881			 * under memory pressure. Let us slow down page
 882			 * allocations for next few cycles so that the guest
 883			 * gets out of memory pressure. Also, if we already
 884			 * allocated b->rate_alloc pages, let's pause,
 885			 * otherwise switch to sleeping allocations.
 886			 */
 887			b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
 888
 889			if (allocations >= b->rate_alloc)
 890				break;
 891
 892			flags = VMW_PAGE_ALLOC_CANSLEEP;
 893			/* Lower rate for sleeping allocations. */
 894			rate = b->rate_alloc;
 895			continue;
 896		}
 897
 898		b->ops->add_page(b, num_pages++, page);
 899		if (num_pages == b->batch_max_pages) {
 900			error = b->ops->lock(b, num_pages, is_2m_pages,
 901					&b->target);
 902			num_pages = 0;
 903			if (error)
 904				break;
 905		}
 906
 907		cond_resched();
 908
 909		if (allocations >= rate) {
 910			/* We allocated enough pages, let's take a break. */
 911			break;
 912		}
 913	}
 914
 915	if (num_pages > 0)
 916		b->ops->lock(b, num_pages, is_2m_pages, &b->target);
 917
 918	/*
 919	 * We reached our goal without failures so try increasing
 920	 * allocation rate.
 921	 */
 922	if (error == 0 && allocations >= b->rate_alloc) {
 923		unsigned int mult = allocations / b->rate_alloc;
 924
 925		b->rate_alloc =
 926			min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
 927			    VMW_BALLOON_RATE_ALLOC_MAX);
 928	}
 929
 930	vmballoon_release_refused_pages(b, true);
 931	vmballoon_release_refused_pages(b, false);
 932}
 933
 934/*
 935 * Decrease the size of the balloon allowing guest to use more memory.
 936 */
 937static void vmballoon_deflate(struct vmballoon *b)
 938{
 939	unsigned is_2m_pages;
 
 
 
 940
 941	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
 942
 943	/* free pages to reach target */
 944	for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
 945			is_2m_pages++) {
 946		struct page *page, *next;
 947		unsigned int num_pages = 0;
 948		struct vmballoon_page_size *page_size =
 949				&b->page_sizes[is_2m_pages];
 950
 951		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
 952			if (b->reset_required ||
 953				(b->target > 0 &&
 954					b->size - num_pages
 955					* vmballoon_page_size(is_2m_pages)
 956				< b->target + vmballoon_page_size(true)))
 957				break;
 958
 959			list_del(&page->lru);
 960			b->ops->add_page(b, num_pages++, page);
 961
 962			if (num_pages == b->batch_max_pages) {
 963				int error;
 964
 965				error = b->ops->unlock(b, num_pages,
 966						is_2m_pages, &b->target);
 967				num_pages = 0;
 968				if (error)
 969					return;
 970			}
 971
 972			cond_resched();
 973		}
 974
 975		if (num_pages > 0)
 976			b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
 977	}
 978}
 979
 980static const struct vmballoon_ops vmballoon_basic_ops = {
 981	.add_page = vmballoon_add_page,
 982	.lock = vmballoon_lock_page,
 983	.unlock = vmballoon_unlock_page
 984};
 985
 986static const struct vmballoon_ops vmballoon_batched_ops = {
 987	.add_page = vmballoon_add_batched_page,
 988	.lock = vmballoon_lock_batched_page,
 989	.unlock = vmballoon_unlock_batched_page
 990};
 991
 992static bool vmballoon_init_batching(struct vmballoon *b)
 993{
 994	b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
 995	if (!b->page)
 996		return false;
 997
 998	b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
 999	if (!b->batch_page) {
1000		__free_page(b->page);
1001		return false;
1002	}
1003
1004	return true;
1005}
1006
1007/*
1008 * Receive notification and resize balloon
1009 */
1010static void vmballoon_doorbell(void *client_data)
1011{
1012	struct vmballoon *b = client_data;
1013
1014	STATS_INC(b->stats.doorbell);
1015
1016	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
1017}
1018
1019/*
1020 * Clean up vmci doorbell
1021 */
1022static void vmballoon_vmci_cleanup(struct vmballoon *b)
1023{
1024	int error;
1025
1026	VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
1027			VMCI_INVALID_ID, error);
1028	STATS_INC(b->stats.doorbell_unset);
1029
1030	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
1031		vmci_doorbell_destroy(b->vmci_doorbell);
1032		b->vmci_doorbell = VMCI_INVALID_HANDLE;
1033	}
1034}
1035
1036/*
1037 * Initialize vmci doorbell, to get notified as soon as balloon changes
1038 */
1039static int vmballoon_vmci_init(struct vmballoon *b)
1040{
1041	int error = 0;
1042
1043	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
1044		error = vmci_doorbell_create(&b->vmci_doorbell,
1045				VMCI_FLAG_DELAYED_CB,
1046				VMCI_PRIVILEGE_FLAG_RESTRICTED,
1047				vmballoon_doorbell, b);
1048
1049		if (error == VMCI_SUCCESS) {
1050			VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
1051					b->vmci_doorbell.context,
1052					b->vmci_doorbell.resource, error);
1053			STATS_INC(b->stats.doorbell_set);
1054		}
1055	}
1056
1057	if (error != 0) {
1058		vmballoon_vmci_cleanup(b);
1059
1060		return -EIO;
1061	}
1062
1063	return 0;
1064}
1065
1066/*
1067 * Perform standard reset sequence by popping the balloon (in case it
1068 * is not  empty) and then restarting protocol. This operation normally
1069 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
1070 */
1071static void vmballoon_reset(struct vmballoon *b)
1072{
1073	int error;
1074
1075	vmballoon_vmci_cleanup(b);
1076
1077	/* free all pages, skipping monitor unlock */
1078	vmballoon_pop(b);
1079
1080	if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
1081		return;
1082
1083	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1084		b->ops = &vmballoon_batched_ops;
1085		b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1086		if (!vmballoon_init_batching(b)) {
1087			/*
1088			 * We failed to initialize batching, inform the monitor
1089			 * about it by sending a null capability.
1090			 *
1091			 * The guest will retry in one second.
1092			 */
1093			vmballoon_send_start(b, 0);
1094			return;
1095		}
1096	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1097		b->ops = &vmballoon_basic_ops;
1098		b->batch_max_pages = 1;
1099	}
1100
1101	b->reset_required = false;
1102
1103	error = vmballoon_vmci_init(b);
1104	if (error)
1105		pr_err("failed to initialize vmci doorbell\n");
1106
1107	if (!vmballoon_send_guest_id(b))
1108		pr_err("failed to send guest ID to the host\n");
1109}
1110
1111/*
1112 * Balloon work function: reset protocol, if needed, get the new size and
1113 * adjust balloon as needed. Repeat in 1 sec.
1114 */
1115static void vmballoon_work(struct work_struct *work)
1116{
1117	struct delayed_work *dwork = to_delayed_work(work);
1118	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
1119	unsigned int target;
1120
1121	STATS_INC(b->stats.timer);
1122
1123	if (b->reset_required)
1124		vmballoon_reset(b);
1125
1126	if (b->slow_allocation_cycles > 0)
1127		b->slow_allocation_cycles--;
1128
1129	if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
1130		/* update target, adjust size */
1131		b->target = target;
1132
1133		if (b->size < target)
1134			vmballoon_inflate(b);
1135		else if (target == 0 ||
1136				b->size > target + vmballoon_page_size(true))
1137			vmballoon_deflate(b);
1138	}
1139
1140	/*
1141	 * We are using a freezable workqueue so that balloon operations are
1142	 * stopped while the system transitions to/from sleep/hibernation.
1143	 */
1144	queue_delayed_work(system_freezable_wq,
1145			   dwork, round_jiffies_relative(HZ));
1146}
1147
1148/*
1149 * DEBUGFS Interface
1150 */
1151#ifdef CONFIG_DEBUG_FS
1152
1153static int vmballoon_debug_show(struct seq_file *f, void *offset)
1154{
1155	struct vmballoon *b = f->private;
1156	struct vmballoon_stats *stats = &b->stats;
1157
1158	/* format capabilities info */
1159	seq_printf(f,
1160		   "balloon capabilities:   %#4x\n"
1161		   "used capabilities:      %#4lx\n"
1162		   "is resetting:           %c\n",
1163		   VMW_BALLOON_CAPABILITIES, b->capabilities,
1164		   b->reset_required ? 'y' : 'n');
1165
1166	/* format size info */
1167	seq_printf(f,
1168		   "target:             %8d pages\n"
1169		   "current:            %8d pages\n",
1170		   b->target, b->size);
1171
1172	/* format rate info */
1173	seq_printf(f,
1174		   "rateSleepAlloc:     %8d pages/sec\n",
1175		   b->rate_alloc);
 
 
 
1176
1177	seq_printf(f,
1178		   "\n"
1179		   "timer:              %8u\n"
1180		   "doorbell:           %8u\n"
1181		   "start:              %8u (%4u failed)\n"
1182		   "guestType:          %8u (%4u failed)\n"
1183		   "2m-lock:            %8u (%4u failed)\n"
1184		   "lock:               %8u (%4u failed)\n"
1185		   "2m-unlock:          %8u (%4u failed)\n"
1186		   "unlock:             %8u (%4u failed)\n"
1187		   "target:             %8u (%4u failed)\n"
1188		   "prim2mAlloc:        %8u (%4u failed)\n"
1189		   "primNoSleepAlloc:   %8u (%4u failed)\n"
1190		   "primCanSleepAlloc:  %8u (%4u failed)\n"
1191		   "prim2mFree:         %8u\n"
1192		   "primFree:           %8u\n"
1193		   "err2mAlloc:         %8u\n"
1194		   "errAlloc:           %8u\n"
1195		   "err2mFree:          %8u\n"
1196		   "errFree:            %8u\n"
1197		   "doorbellSet:        %8u\n"
1198		   "doorbellUnset:      %8u\n",
1199		   stats->timer,
1200		   stats->doorbell,
1201		   stats->start, stats->start_fail,
1202		   stats->guest_type, stats->guest_type_fail,
1203		   stats->lock[true],  stats->lock_fail[true],
1204		   stats->lock[false],  stats->lock_fail[false],
1205		   stats->unlock[true], stats->unlock_fail[true],
1206		   stats->unlock[false], stats->unlock_fail[false],
1207		   stats->target, stats->target_fail,
1208		   stats->alloc[true], stats->alloc_fail[true],
1209		   stats->alloc[false], stats->alloc_fail[false],
1210		   stats->sleep_alloc, stats->sleep_alloc_fail,
1211		   stats->free[true],
1212		   stats->free[false],
1213		   stats->refused_alloc[true], stats->refused_alloc[false],
1214		   stats->refused_free[true], stats->refused_free[false],
1215		   stats->doorbell_set, stats->doorbell_unset);
1216
1217	return 0;
1218}
1219
1220static int vmballoon_debug_open(struct inode *inode, struct file *file)
1221{
1222	return single_open(file, vmballoon_debug_show, inode->i_private);
1223}
1224
1225static const struct file_operations vmballoon_debug_fops = {
1226	.owner		= THIS_MODULE,
1227	.open		= vmballoon_debug_open,
1228	.read		= seq_read,
1229	.llseek		= seq_lseek,
1230	.release	= single_release,
1231};
1232
1233static int __init vmballoon_debugfs_init(struct vmballoon *b)
1234{
1235	int error;
1236
1237	b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1238					   &vmballoon_debug_fops);
1239	if (IS_ERR(b->dbg_entry)) {
1240		error = PTR_ERR(b->dbg_entry);
1241		pr_err("failed to create debugfs entry, error: %d\n", error);
1242		return error;
1243	}
1244
1245	return 0;
1246}
1247
1248static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1249{
1250	debugfs_remove(b->dbg_entry);
1251}
1252
1253#else
1254
1255static inline int vmballoon_debugfs_init(struct vmballoon *b)
1256{
1257	return 0;
1258}
1259
1260static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1261{
1262}
1263
1264#endif	/* CONFIG_DEBUG_FS */
1265
1266static int __init vmballoon_init(void)
1267{
1268	int error;
1269	unsigned is_2m_pages;
1270	/*
1271	 * Check if we are running on VMware's hypervisor and bail out
1272	 * if we are not.
1273	 */
1274	if (x86_hyper_type != X86_HYPER_VMWARE)
1275		return -ENODEV;
1276
1277	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1278			is_2m_pages++) {
1279		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1280		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1281	}
1282
1283	/* initialize rates */
1284	balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
 
1285
1286	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1287
 
 
 
 
 
 
 
 
 
 
 
 
 
1288	error = vmballoon_debugfs_init(&balloon);
1289	if (error)
1290		return error;
1291
1292	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
1293	balloon.batch_page = NULL;
1294	balloon.page = NULL;
1295	balloon.reset_required = true;
1296
1297	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
1298
1299	return 0;
1300}
1301module_init(vmballoon_init);
1302
1303static void __exit vmballoon_exit(void)
1304{
1305	vmballoon_vmci_cleanup(&balloon);
1306	cancel_delayed_work_sync(&balloon.dwork);
1307
1308	vmballoon_debugfs_exit(&balloon);
1309
1310	/*
1311	 * Deallocate all reserved memory, and reset connection with monitor.
1312	 * Reset connection before deallocating memory to avoid potential for
1313	 * additional spurious resets from guest touching deallocated pages.
1314	 */
1315	vmballoon_send_start(&balloon, 0);
1316	vmballoon_pop(&balloon);
1317}
1318module_exit(vmballoon_exit);