memslot_perf_test.c - tools/testing/selftests/kvm/memslot_perf_test.c - Linux source code v4.10.11

Note: File does not exist in v4.10.11.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * A memslot-related performance benchmark.
   4 *
   5 * Copyright (C) 2021 Oracle and/or its affiliates.
   6 *
   7 * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
   8 */
   9#include <pthread.h>
  10#include <sched.h>
  11#include <semaphore.h>
  12#include <stdatomic.h>
  13#include <stdbool.h>
  14#include <stdint.h>
  15#include <stdio.h>
  16#include <stdlib.h>
  17#include <string.h>
  18#include <sys/mman.h>
  19#include <time.h>
  20#include <unistd.h>
  21
  22#include <linux/compiler.h>
  23#include <linux/sizes.h>
  24
  25#include <test_util.h>
  26#include <kvm_util.h>
  27#include <processor.h>
  28
  29#define MEM_EXTRA_SIZE		SZ_64K
  30
  31#define MEM_SIZE		(SZ_512M + MEM_EXTRA_SIZE)
  32#define MEM_GPA			SZ_256M
  33#define MEM_AUX_GPA		MEM_GPA
  34#define MEM_SYNC_GPA		MEM_AUX_GPA
  35#define MEM_TEST_GPA		(MEM_AUX_GPA + MEM_EXTRA_SIZE)
  36#define MEM_TEST_SIZE		(MEM_SIZE - MEM_EXTRA_SIZE)
  37
  38/*
  39 * 32 MiB is max size that gets well over 100 iterations on 509 slots.
  40 * Considering that each slot needs to have at least one page up to
  41 * 8194 slots in use can then be tested (although with slightly
  42 * limited resolution).
  43 */
  44#define MEM_SIZE_MAP		(SZ_32M + MEM_EXTRA_SIZE)
  45#define MEM_TEST_MAP_SIZE	(MEM_SIZE_MAP - MEM_EXTRA_SIZE)
  46
  47/*
  48 * 128 MiB is min size that fills 32k slots with at least one page in each
  49 * while at the same time gets 100+ iterations in such test
  50 *
  51 * 2 MiB chunk size like a typical huge page
  52 */
  53#define MEM_TEST_UNMAP_SIZE		SZ_128M
  54#define MEM_TEST_UNMAP_CHUNK_SIZE	SZ_2M
  55
  56/*
  57 * For the move active test the middle of the test area is placed on
  58 * a memslot boundary: half lies in the memslot being moved, half in
  59 * other memslot(s).
  60 *
  61 * We have different number of memory slots, excluding the reserved
  62 * memory slot 0, on various architectures and configurations. The
  63 * memory size in this test is calculated by picking the maximal
  64 * last memory slot's memory size, with alignment to the largest
  65 * supported page size (64KB). In this way, the selected memory
  66 * size for this test is compatible with test_memslot_move_prepare().
  67 *
  68 * architecture   slots    memory-per-slot    memory-on-last-slot
  69 * --------------------------------------------------------------
  70 * x86-4KB        32763    16KB               160KB
  71 * arm64-4KB      32766    16KB               112KB
  72 * arm64-16KB     32766    16KB               112KB
  73 * arm64-64KB     8192     64KB               128KB
  74 */
  75#define MEM_TEST_MOVE_SIZE		(3 * SZ_64K)
  76#define MEM_TEST_MOVE_GPA_DEST		(MEM_GPA + MEM_SIZE)
  77static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
  78	      "invalid move test region size");
  79
  80#define MEM_TEST_VAL_1 0x1122334455667788
  81#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
  82
  83struct vm_data {
  84	struct kvm_vm *vm;
  85	struct kvm_vcpu *vcpu;
  86	pthread_t vcpu_thread;
  87	uint32_t nslots;
  88	uint64_t npages;
  89	uint64_t pages_per_slot;
  90	void **hva_slots;
  91	bool mmio_ok;
  92	uint64_t mmio_gpa_min;
  93	uint64_t mmio_gpa_max;
  94};
  95
  96struct sync_area {
  97	uint32_t    guest_page_size;
  98	atomic_bool start_flag;
  99	atomic_bool exit_flag;
 100	atomic_bool sync_flag;
 101	void *move_area_ptr;
 102};
 103
 104/*
 105 * Technically, we need also for the atomic bool to be address-free, which
 106 * is recommended, but not strictly required, by C11 for lockless
 107 * implementations.
 108 * However, in practice both GCC and Clang fulfill this requirement on
 109 * all KVM-supported platforms.
 110 */
 111static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
 112
 113static sem_t vcpu_ready;
 114
 115static bool map_unmap_verify;
 116#ifdef __x86_64__
 117static bool disable_slot_zap_quirk;
 118#endif
 119
 120static bool verbose;
 121#define pr_info_v(...)				\
 122	do {					\
 123		if (verbose)			\
 124			pr_info(__VA_ARGS__);	\
 125	} while (0)
 126
 127static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
 128{
 129	TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
 130	TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
 131	TEST_ASSERT(run->mmio.len == 8,
 132		    "Unexpected exit mmio size = %u", run->mmio.len);
 133	TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
 134		    run->mmio.phys_addr <= data->mmio_gpa_max,
 135		    "Unexpected exit mmio address = 0x%llx",
 136		    run->mmio.phys_addr);
 137}
 138
 139static void *vcpu_worker(void *__data)
 140{
 141	struct vm_data *data = __data;
 142	struct kvm_vcpu *vcpu = data->vcpu;
 143	struct kvm_run *run = vcpu->run;
 144	struct ucall uc;
 145
 146	while (1) {
 147		vcpu_run(vcpu);
 148
 149		switch (get_ucall(vcpu, &uc)) {
 150		case UCALL_SYNC:
 151			TEST_ASSERT(uc.args[1] == 0,
 152				"Unexpected sync ucall, got %lx",
 153				(ulong)uc.args[1]);
 154			sem_post(&vcpu_ready);
 155			continue;
 156		case UCALL_NONE:
 157			if (run->exit_reason == KVM_EXIT_MMIO)
 158				check_mmio_access(data, run);
 159			else
 160				goto done;
 161			break;
 162		case UCALL_ABORT:
 163			REPORT_GUEST_ASSERT(uc);
 164			break;
 165		case UCALL_DONE:
 166			goto done;
 167		default:
 168			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 169		}
 170	}
 171
 172done:
 173	return NULL;
 174}
 175
 176static void wait_for_vcpu(void)
 177{
 178	struct timespec ts;
 179
 180	TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
 181		    "clock_gettime() failed: %d", errno);
 182
 183	ts.tv_sec += 2;
 184	TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
 185		    "sem_timedwait() failed: %d", errno);
 186}
 187
 188static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
 189{
 190	uint64_t gpage, pgoffs;
 191	uint32_t slot, slotoffs;
 192	void *base;
 193	uint32_t guest_page_size = data->vm->page_size;
 194
 195	TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
 196	TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
 197		    "Too high gpa to translate");
 198	gpa -= MEM_GPA;
 199
 200	gpage = gpa / guest_page_size;
 201	pgoffs = gpa % guest_page_size;
 202	slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
 203	slotoffs = gpage - (slot * data->pages_per_slot);
 204
 205	if (rempages) {
 206		uint64_t slotpages;
 207
 208		if (slot == data->nslots - 1)
 209			slotpages = data->npages - slot * data->pages_per_slot;
 210		else
 211			slotpages = data->pages_per_slot;
 212
 213		TEST_ASSERT(!pgoffs,
 214			    "Asking for remaining pages in slot but gpa not page aligned");
 215		*rempages = slotpages - slotoffs;
 216	}
 217
 218	base = data->hva_slots[slot];
 219	return (uint8_t *)base + slotoffs * guest_page_size + pgoffs;
 220}
 221
 222static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
 223{
 224	uint32_t guest_page_size = data->vm->page_size;
 225
 226	TEST_ASSERT(slot < data->nslots, "Too high slot number");
 227
 228	return MEM_GPA + slot * data->pages_per_slot * guest_page_size;
 229}
 230
 231static struct vm_data *alloc_vm(void)
 232{
 233	struct vm_data *data;
 234
 235	data = malloc(sizeof(*data));
 236	TEST_ASSERT(data, "malloc(vmdata) failed");
 237
 238	data->vm = NULL;
 239	data->vcpu = NULL;
 240	data->hva_slots = NULL;
 241
 242	return data;
 243}
 244
 245static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
 246			     uint64_t pages_per_slot, uint64_t rempages)
 247{
 248	if (!pages_per_slot)
 249		return false;
 250
 251	if ((pages_per_slot * guest_page_size) % host_page_size)
 252		return false;
 253
 254	if ((rempages * guest_page_size) % host_page_size)
 255		return false;
 256
 257	return true;
 258}
 259
 260
 261static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
 262{
 263	uint32_t guest_page_size = data->vm->page_size;
 264	uint64_t mempages, pages_per_slot, rempages;
 265	uint64_t slots;
 266
 267	mempages = data->npages;
 268	slots = data->nslots;
 269	while (--slots > 1) {
 270		pages_per_slot = mempages / slots;
 271		if (!pages_per_slot)
 272			continue;
 273
 274		rempages = mempages % pages_per_slot;
 275		if (check_slot_pages(host_page_size, guest_page_size,
 276				     pages_per_slot, rempages))
 277			return slots + 1;	/* slot 0 is reserved */
 278	}
 279
 280	return 0;
 281}
 282
 283static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 284		       void *guest_code, uint64_t mem_size,
 285		       struct timespec *slot_runtime)
 286{
 287	uint64_t mempages, rempages;
 288	uint64_t guest_addr;
 289	uint32_t slot, host_page_size, guest_page_size;
 290	struct timespec tstart;
 291	struct sync_area *sync;
 292
 293	host_page_size = getpagesize();
 294	guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
 295	mempages = mem_size / guest_page_size;
 296
 297	data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
 298	TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size");
 299
 300	data->npages = mempages;
 301	TEST_ASSERT(data->npages > 1, "Can't test without any memory");
 302	data->nslots = nslots;
 303	data->pages_per_slot = data->npages / data->nslots;
 304	rempages = data->npages % data->nslots;
 305	if (!check_slot_pages(host_page_size, guest_page_size,
 306			      data->pages_per_slot, rempages)) {
 307		*maxslots = get_max_slots(data, host_page_size);
 308		return false;
 309	}
 310
 311	data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
 312	TEST_ASSERT(data->hva_slots, "malloc() fail");
 313
 314	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 315		data->nslots, data->pages_per_slot, rempages);
 316
 317	clock_gettime(CLOCK_MONOTONIC, &tstart);
 318	for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
 319		uint64_t npages;
 320
 321		npages = data->pages_per_slot;
 322		if (slot == data->nslots)
 323			npages += rempages;
 324
 325		vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
 326					    guest_addr, slot, npages,
 327					    0);
 328		guest_addr += npages * guest_page_size;
 329	}
 330	*slot_runtime = timespec_elapsed(tstart);
 331
 332	for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
 333		uint64_t npages;
 334		uint64_t gpa;
 335
 336		npages = data->pages_per_slot;
 337		if (slot == data->nslots)
 338			npages += rempages;
 339
 340		gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
 341		TEST_ASSERT(gpa == guest_addr,
 342			    "vm_phy_pages_alloc() failed");
 343
 344		data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
 345		memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
 346
 347		guest_addr += npages * guest_page_size;
 348	}
 349
 350	virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages);
 351
 352	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 353	sync->guest_page_size = data->vm->page_size;
 354	atomic_init(&sync->start_flag, false);
 355	atomic_init(&sync->exit_flag, false);
 356	atomic_init(&sync->sync_flag, false);
 357
 358	data->mmio_ok = false;
 359
 360	return true;
 361}
 362
 363static void launch_vm(struct vm_data *data)
 364{
 365	pr_info_v("Launching the test VM\n");
 366
 367	pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
 368
 369	/* Ensure the guest thread is spun up. */
 370	wait_for_vcpu();
 371}
 372
 373static void free_vm(struct vm_data *data)
 374{
 375	kvm_vm_free(data->vm);
 376	free(data->hva_slots);
 377	free(data);
 378}
 379
 380static void wait_guest_exit(struct vm_data *data)
 381{
 382	pthread_join(data->vcpu_thread, NULL);
 383}
 384
 385static void let_guest_run(struct sync_area *sync)
 386{
 387	atomic_store_explicit(&sync->start_flag, true, memory_order_release);
 388}
 389
 390static void guest_spin_until_start(void)
 391{
 392	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 393
 394	while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
 395		;
 396}
 397
 398static void make_guest_exit(struct sync_area *sync)
 399{
 400	atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
 401}
 402
 403static bool _guest_should_exit(void)
 404{
 405	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 406
 407	return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
 408}
 409
 410#define guest_should_exit() unlikely(_guest_should_exit())
 411
 412/*
 413 * noinline so we can easily see how much time the host spends waiting
 414 * for the guest.
 415 * For the same reason use alarm() instead of polling clock_gettime()
 416 * to implement a wait timeout.
 417 */
 418static noinline void host_perform_sync(struct sync_area *sync)
 419{
 420	alarm(10);
 421
 422	atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
 423	while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
 424		;
 425
 426	alarm(0);
 427}
 428
 429static bool guest_perform_sync(void)
 430{
 431	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 432	bool expected;
 433
 434	do {
 435		if (guest_should_exit())
 436			return false;
 437
 438		expected = true;
 439	} while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
 440							&expected, false,
 441							memory_order_acq_rel,
 442							memory_order_relaxed));
 443
 444	return true;
 445}
 446
 447static void guest_code_test_memslot_move(void)
 448{
 449	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 450	uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
 451	uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
 452
 453	GUEST_SYNC(0);
 454
 455	guest_spin_until_start();
 456
 457	while (!guest_should_exit()) {
 458		uintptr_t ptr;
 459
 460		for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
 461		     ptr += page_size)
 462			*(uint64_t *)ptr = MEM_TEST_VAL_1;
 463
 464		/*
 465		 * No host sync here since the MMIO exits are so expensive
 466		 * that the host would spend most of its time waiting for
 467		 * the guest and so instead of measuring memslot move
 468		 * performance we would measure the performance and
 469		 * likelihood of MMIO exits
 470		 */
 471	}
 472
 473	GUEST_DONE();
 474}
 475
 476static void guest_code_test_memslot_map(void)
 477{
 478	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 479	uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
 480
 481	GUEST_SYNC(0);
 482
 483	guest_spin_until_start();
 484
 485	while (1) {
 486		uintptr_t ptr;
 487
 488		for (ptr = MEM_TEST_GPA;
 489		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
 490		     ptr += page_size)
 491			*(uint64_t *)ptr = MEM_TEST_VAL_1;
 492
 493		if (!guest_perform_sync())
 494			break;
 495
 496		for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
 497		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
 498		     ptr += page_size)
 499			*(uint64_t *)ptr = MEM_TEST_VAL_2;
 500
 501		if (!guest_perform_sync())
 502			break;
 503	}
 504
 505	GUEST_DONE();
 506}
 507
 508static void guest_code_test_memslot_unmap(void)
 509{
 510	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 511
 512	GUEST_SYNC(0);
 513
 514	guest_spin_until_start();
 515
 516	while (1) {
 517		uintptr_t ptr = MEM_TEST_GPA;
 518
 519		/*
 520		 * We can afford to access (map) just a small number of pages
 521		 * per host sync as otherwise the host will spend
 522		 * a significant amount of its time waiting for the guest
 523		 * (instead of doing unmap operations), so this will
 524		 * effectively turn this test into a map performance test.
 525		 *
 526		 * Just access a single page to be on the safe side.
 527		 */
 528		*(uint64_t *)ptr = MEM_TEST_VAL_1;
 529
 530		if (!guest_perform_sync())
 531			break;
 532
 533		ptr += MEM_TEST_UNMAP_SIZE / 2;
 534		*(uint64_t *)ptr = MEM_TEST_VAL_2;
 535
 536		if (!guest_perform_sync())
 537			break;
 538	}
 539
 540	GUEST_DONE();
 541}
 542
 543static void guest_code_test_memslot_rw(void)
 544{
 545	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 546	uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
 547
 548	GUEST_SYNC(0);
 549
 550	guest_spin_until_start();
 551
 552	while (1) {
 553		uintptr_t ptr;
 554
 555		for (ptr = MEM_TEST_GPA;
 556		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
 557			*(uint64_t *)ptr = MEM_TEST_VAL_1;
 558
 559		if (!guest_perform_sync())
 560			break;
 561
 562		for (ptr = MEM_TEST_GPA + page_size / 2;
 563		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
 564			uint64_t val = *(uint64_t *)ptr;
 565
 566			GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
 567			*(uint64_t *)ptr = 0;
 568		}
 569
 570		if (!guest_perform_sync())
 571			break;
 572	}
 573
 574	GUEST_DONE();
 575}
 576
 577static bool test_memslot_move_prepare(struct vm_data *data,
 578				      struct sync_area *sync,
 579				      uint64_t *maxslots, bool isactive)
 580{
 581	uint32_t guest_page_size = data->vm->page_size;
 582	uint64_t movesrcgpa, movetestgpa;
 583
 584#ifdef __x86_64__
 585	if (disable_slot_zap_quirk)
 586		vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
 587#endif
 588
 589	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
 590
 591	if (isactive) {
 592		uint64_t lastpages;
 593
 594		vm_gpa2hva(data, movesrcgpa, &lastpages);
 595		if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
 596			*maxslots = 0;
 597			return false;
 598		}
 599	}
 600
 601	movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
 602	sync->move_area_ptr = (void *)movetestgpa;
 603
 604	if (isactive) {
 605		data->mmio_ok = true;
 606		data->mmio_gpa_min = movesrcgpa;
 607		data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
 608	}
 609
 610	return true;
 611}
 612
 613static bool test_memslot_move_prepare_active(struct vm_data *data,
 614					     struct sync_area *sync,
 615					     uint64_t *maxslots)
 616{
 617	return test_memslot_move_prepare(data, sync, maxslots, true);
 618}
 619
 620static bool test_memslot_move_prepare_inactive(struct vm_data *data,
 621					       struct sync_area *sync,
 622					       uint64_t *maxslots)
 623{
 624	return test_memslot_move_prepare(data, sync, maxslots, false);
 625}
 626
 627static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
 628{
 629	uint64_t movesrcgpa;
 630
 631	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
 632	vm_mem_region_move(data->vm, data->nslots - 1 + 1,
 633			   MEM_TEST_MOVE_GPA_DEST);
 634	vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
 635}
 636
 637static void test_memslot_do_unmap(struct vm_data *data,
 638				  uint64_t offsp, uint64_t count)
 639{
 640	uint64_t gpa, ctr;
 641	uint32_t guest_page_size = data->vm->page_size;
 642
 643	for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
 644		uint64_t npages;
 645		void *hva;
 646		int ret;
 647
 648		hva = vm_gpa2hva(data, gpa, &npages);
 649		TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
 650		npages = min(npages, count - ctr);
 651		ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED);
 652		TEST_ASSERT(!ret,
 653			    "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
 654			    hva, gpa);
 655		ctr += npages;
 656		gpa += npages * guest_page_size;
 657	}
 658	TEST_ASSERT(ctr == count,
 659		    "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
 660}
 661
 662static void test_memslot_map_unmap_check(struct vm_data *data,
 663					 uint64_t offsp, uint64_t valexp)
 664{
 665	uint64_t gpa;
 666	uint64_t *val;
 667	uint32_t guest_page_size = data->vm->page_size;
 668
 669	if (!map_unmap_verify)
 670		return;
 671
 672	gpa = MEM_TEST_GPA + offsp * guest_page_size;
 673	val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
 674	TEST_ASSERT(*val == valexp,
 675		    "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
 676		    *val, valexp, gpa);
 677	*val = 0;
 678}
 679
 680static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
 681{
 682	uint32_t guest_page_size = data->vm->page_size;
 683	uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
 684
 685	/*
 686	 * Unmap the second half of the test area while guest writes to (maps)
 687	 * the first half.
 688	 */
 689	test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
 690
 691	/*
 692	 * Wait for the guest to finish writing the first half of the test
 693	 * area, verify the written value on the first and the last page of
 694	 * this area and then unmap it.
 695	 * Meanwhile, the guest is writing to (mapping) the second half of
 696	 * the test area.
 697	 */
 698	host_perform_sync(sync);
 699	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
 700	test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
 701	test_memslot_do_unmap(data, 0, guest_pages / 2);
 702
 703
 704	/*
 705	 * Wait for the guest to finish writing the second half of the test
 706	 * area and verify the written value on the first and the last page
 707	 * of this area.
 708	 * The area will be unmapped at the beginning of the next loop
 709	 * iteration.
 710	 * Meanwhile, the guest is writing to (mapping) the first half of
 711	 * the test area.
 712	 */
 713	host_perform_sync(sync);
 714	test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
 715	test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
 716}
 717
 718static void test_memslot_unmap_loop_common(struct vm_data *data,
 719					   struct sync_area *sync,
 720					   uint64_t chunk)
 721{
 722	uint32_t guest_page_size = data->vm->page_size;
 723	uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
 724	uint64_t ctr;
 725
 726	/*
 727	 * Wait for the guest to finish mapping page(s) in the first half
 728	 * of the test area, verify the written value and then perform unmap
 729	 * of this area.
 730	 * Meanwhile, the guest is writing to (mapping) page(s) in the second
 731	 * half of the test area.
 732	 */
 733	host_perform_sync(sync);
 734	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
 735	for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
 736		test_memslot_do_unmap(data, ctr, chunk);
 737
 738	/* Likewise, but for the opposite host / guest areas */
 739	host_perform_sync(sync);
 740	test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
 741	for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
 742		test_memslot_do_unmap(data, ctr, chunk);
 743}
 744
 745static void test_memslot_unmap_loop(struct vm_data *data,
 746				    struct sync_area *sync)
 747{
 748	uint32_t host_page_size = getpagesize();
 749	uint32_t guest_page_size = data->vm->page_size;
 750	uint64_t guest_chunk_pages = guest_page_size >= host_page_size ?
 751					1 : host_page_size / guest_page_size;
 752
 753	test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
 754}
 755
 756static void test_memslot_unmap_loop_chunked(struct vm_data *data,
 757					    struct sync_area *sync)
 758{
 759	uint32_t guest_page_size = data->vm->page_size;
 760	uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
 761
 762	test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
 763}
 764
 765static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
 766{
 767	uint64_t gptr;
 768	uint32_t guest_page_size = data->vm->page_size;
 769
 770	for (gptr = MEM_TEST_GPA + guest_page_size / 2;
 771	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
 772		*(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
 773
 774	host_perform_sync(sync);
 775
 776	for (gptr = MEM_TEST_GPA;
 777	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
 778		uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
 779		uint64_t val = *vptr;
 780
 781		TEST_ASSERT(val == MEM_TEST_VAL_1,
 782			    "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
 783			    val, gptr);
 784		*vptr = 0;
 785	}
 786
 787	host_perform_sync(sync);
 788}
 789
 790struct test_data {
 791	const char *name;
 792	uint64_t mem_size;
 793	void (*guest_code)(void);
 794	bool (*prepare)(struct vm_data *data, struct sync_area *sync,
 795			uint64_t *maxslots);
 796	void (*loop)(struct vm_data *data, struct sync_area *sync);
 797};
 798
 799static bool test_execute(int nslots, uint64_t *maxslots,
 800			 unsigned int maxtime,
 801			 const struct test_data *tdata,
 802			 uint64_t *nloops,
 803			 struct timespec *slot_runtime,
 804			 struct timespec *guest_runtime)
 805{
 806	uint64_t mem_size = tdata->mem_size ? : MEM_SIZE;
 807	struct vm_data *data;
 808	struct sync_area *sync;
 809	struct timespec tstart;
 810	bool ret = true;
 811
 812	data = alloc_vm();
 813	if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
 814			mem_size, slot_runtime)) {
 815		ret = false;
 816		goto exit_free;
 817	}
 818
 819	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 820	if (tdata->prepare &&
 821	    !tdata->prepare(data, sync, maxslots)) {
 822		ret = false;
 823		goto exit_free;
 824	}
 825
 826	launch_vm(data);
 827
 828	clock_gettime(CLOCK_MONOTONIC, &tstart);
 829	let_guest_run(sync);
 830
 831	while (1) {
 832		*guest_runtime = timespec_elapsed(tstart);
 833		if (guest_runtime->tv_sec >= maxtime)
 834			break;
 835
 836		tdata->loop(data, sync);
 837
 838		(*nloops)++;
 839	}
 840
 841	make_guest_exit(sync);
 842	wait_guest_exit(data);
 843
 844exit_free:
 845	free_vm(data);
 846
 847	return ret;
 848}
 849
 850static const struct test_data tests[] = {
 851	{
 852		.name = "map",
 853		.mem_size = MEM_SIZE_MAP,
 854		.guest_code = guest_code_test_memslot_map,
 855		.loop = test_memslot_map_loop,
 856	},
 857	{
 858		.name = "unmap",
 859		.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
 860		.guest_code = guest_code_test_memslot_unmap,
 861		.loop = test_memslot_unmap_loop,
 862	},
 863	{
 864		.name = "unmap chunked",
 865		.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
 866		.guest_code = guest_code_test_memslot_unmap,
 867		.loop = test_memslot_unmap_loop_chunked,
 868	},
 869	{
 870		.name = "move active area",
 871		.guest_code = guest_code_test_memslot_move,
 872		.prepare = test_memslot_move_prepare_active,
 873		.loop = test_memslot_move_loop,
 874	},
 875	{
 876		.name = "move inactive area",
 877		.guest_code = guest_code_test_memslot_move,
 878		.prepare = test_memslot_move_prepare_inactive,
 879		.loop = test_memslot_move_loop,
 880	},
 881	{
 882		.name = "RW",
 883		.guest_code = guest_code_test_memslot_rw,
 884		.loop = test_memslot_rw_loop
 885	},
 886};
 887
 888#define NTESTS ARRAY_SIZE(tests)
 889
 890struct test_args {
 891	int tfirst;
 892	int tlast;
 893	int nslots;
 894	int seconds;
 895	int runs;
 896};
 897
 898static void help(char *name, struct test_args *targs)
 899{
 900	int ctr;
 901
 902	pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
 903		name);
 904	pr_info(" -h: print this help screen.\n");
 905	pr_info(" -v: enable verbose mode (not for benchmarking).\n");
 906	pr_info(" -d: enable extra debug checks.\n");
 907	pr_info(" -q: Disable memslot zap quirk during memslot move.\n");
 908	pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
 909		targs->nslots);
 910	pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
 911		targs->tfirst, NTESTS - 1);
 912	pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
 913		targs->tlast, NTESTS - 1);
 914	pr_info(" -l: specify the test length in seconds (currently: %i)\n",
 915		targs->seconds);
 916	pr_info(" -r: specify the number of runs per test (currently: %i)\n",
 917		targs->runs);
 918
 919	pr_info("\nAvailable tests:\n");
 920	for (ctr = 0; ctr < NTESTS; ctr++)
 921		pr_info("%d: %s\n", ctr, tests[ctr].name);
 922}
 923
 924static bool check_memory_sizes(void)
 925{
 926	uint32_t host_page_size = getpagesize();
 927	uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
 928
 929	if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
 930		pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
 931			host_page_size, guest_page_size);
 932		return false;
 933	}
 934
 935	if (MEM_SIZE % guest_page_size ||
 936	    MEM_TEST_SIZE % guest_page_size) {
 937		pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
 938		return false;
 939	}
 940
 941	if (MEM_SIZE_MAP % guest_page_size		||
 942	    MEM_TEST_MAP_SIZE % guest_page_size		||
 943	    (MEM_TEST_MAP_SIZE / guest_page_size) <= 2	||
 944	    (MEM_TEST_MAP_SIZE / guest_page_size) % 2) {
 945		pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n");
 946		return false;
 947	}
 948
 949	if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE		||
 950	    MEM_TEST_UNMAP_SIZE % guest_page_size	||
 951	    (MEM_TEST_UNMAP_SIZE / guest_page_size) %
 952	    (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) {
 953		pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n");
 954		return false;
 955	}
 956
 957	return true;
 958}
 959
 960static bool parse_args(int argc, char *argv[],
 961		       struct test_args *targs)
 962{
 963	uint32_t max_mem_slots;
 964	int opt;
 965
 966	while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) {
 967		switch (opt) {
 968		case 'h':
 969		default:
 970			help(argv[0], targs);
 971			return false;
 972		case 'v':
 973			verbose = true;
 974			break;
 975		case 'd':
 976			map_unmap_verify = true;
 977			break;
 978#ifdef __x86_64__
 979		case 'q':
 980			disable_slot_zap_quirk = true;
 981			TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
 982				     KVM_X86_QUIRK_SLOT_ZAP_ALL);
 983			break;
 984#endif
 985		case 's':
 986			targs->nslots = atoi_paranoid(optarg);
 987			if (targs->nslots <= 1 && targs->nslots != -1) {
 988				pr_info("Slot count cap must be larger than 1 or -1 for no cap\n");
 989				return false;
 990			}
 991			break;
 992		case 'f':
 993			targs->tfirst = atoi_non_negative("First test", optarg);
 994			break;
 995		case 'e':
 996			targs->tlast = atoi_non_negative("Last test", optarg);
 997			if (targs->tlast >= NTESTS) {
 998				pr_info("Last test to run has to be non-negative and less than %zu\n",
 999					NTESTS);
1000				return false;
1001			}
1002			break;
1003		case 'l':
1004			targs->seconds = atoi_non_negative("Test length", optarg);
1005			break;
1006		case 'r':
1007			targs->runs = atoi_positive("Runs per test", optarg);
1008			break;
1009		}
1010	}
1011
1012	if (optind < argc) {
1013		help(argv[0], targs);
1014		return false;
1015	}
1016
1017	if (targs->tfirst > targs->tlast) {
1018		pr_info("First test to run cannot be greater than the last test to run\n");
1019		return false;
1020	}
1021
1022	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
1023	if (max_mem_slots <= 1) {
1024		pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n");
1025		return false;
1026	}
1027
1028	/* Memory slot 0 is reserved */
1029	if (targs->nslots == -1)
1030		targs->nslots = max_mem_slots - 1;
1031	else
1032		targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1;
1033
1034	pr_info_v("Allowed Number of memory slots: %"PRIu32"\n",
1035		  targs->nslots + 1);
1036
1037	return true;
1038}
1039
1040struct test_result {
1041	struct timespec slot_runtime, guest_runtime, iter_runtime;
1042	int64_t slottimens, runtimens;
1043	uint64_t nloops;
1044};
1045
1046static bool test_loop(const struct test_data *data,
1047		      const struct test_args *targs,
1048		      struct test_result *rbestslottime,
1049		      struct test_result *rbestruntime)
1050{
1051	uint64_t maxslots;
1052	struct test_result result = {};
1053
1054	if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
1055			  &result.nloops,
1056			  &result.slot_runtime, &result.guest_runtime)) {
1057		if (maxslots)
1058			pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
1059				maxslots);
1060		else
1061			pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
1062
1063		return false;
1064	}
1065
1066	pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
1067		result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
1068		result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
1069	if (!result.nloops) {
1070		pr_info("No full loops done - too short test time or system too loaded?\n");
1071		return true;
1072	}
1073
1074	result.iter_runtime = timespec_div(result.guest_runtime,
1075					   result.nloops);
1076	pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
1077		result.nloops,
1078		result.iter_runtime.tv_sec,
1079		result.iter_runtime.tv_nsec);
1080	result.slottimens = timespec_to_ns(result.slot_runtime);
1081	result.runtimens = timespec_to_ns(result.iter_runtime);
1082
1083	/*
1084	 * Only rank the slot setup time for tests using the whole test memory
1085	 * area so they are comparable
1086	 */
1087	if (!data->mem_size &&
1088	    (!rbestslottime->slottimens ||
1089	     result.slottimens < rbestslottime->slottimens))
1090		*rbestslottime = result;
1091	if (!rbestruntime->runtimens ||
1092	    result.runtimens < rbestruntime->runtimens)
1093		*rbestruntime = result;
1094
1095	return true;
1096}
1097
1098int main(int argc, char *argv[])
1099{
1100	struct test_args targs = {
1101		.tfirst = 0,
1102		.tlast = NTESTS - 1,
1103		.nslots = -1,
1104		.seconds = 5,
1105		.runs = 1,
1106	};
1107	struct test_result rbestslottime = {};
1108	int tctr;
1109
1110	if (!check_memory_sizes())
1111		return -1;
1112
1113	if (!parse_args(argc, argv, &targs))
1114		return -1;
1115
1116	for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
1117		const struct test_data *data = &tests[tctr];
1118		unsigned int runctr;
1119		struct test_result rbestruntime = {};
1120
1121		if (tctr > targs.tfirst)
1122			pr_info("\n");
1123
1124		pr_info("Testing %s performance with %i runs, %d seconds each\n",
1125			data->name, targs.runs, targs.seconds);
1126
1127		for (runctr = 0; runctr < targs.runs; runctr++)
1128			if (!test_loop(data, &targs,
1129				       &rbestslottime, &rbestruntime))
1130				break;
1131
1132		if (rbestruntime.runtimens)
1133			pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
1134				rbestruntime.iter_runtime.tv_sec,
1135				rbestruntime.iter_runtime.tv_nsec,
1136				rbestruntime.nloops);
1137	}
1138
1139	if (rbestslottime.slottimens)
1140		pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
1141			rbestslottime.slot_runtime.tv_sec,
1142			rbestslottime.slot_runtime.tv_nsec);
1143
1144	return 0;
1145}