Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.17.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * COW (Copy On Write) tests.
   4 *
   5 * Copyright 2022, Red Hat, Inc.
   6 *
   7 * Author(s): David Hildenbrand <david@redhat.com>
   8 */
   9#define _GNU_SOURCE
  10#include <stdlib.h>
  11#include <string.h>
  12#include <stdbool.h>
  13#include <stdint.h>
  14#include <unistd.h>
  15#include <errno.h>
  16#include <fcntl.h>
  17#include <dirent.h>
  18#include <assert.h>
  19#include <sys/mman.h>
  20#include <sys/ioctl.h>
  21#include <sys/wait.h>
  22#include <linux/memfd.h>
  23
  24#include "local_config.h"
  25#ifdef LOCAL_CONFIG_HAVE_LIBURING
  26#include <liburing.h>
  27#endif /* LOCAL_CONFIG_HAVE_LIBURING */
  28
  29#include "../../../../mm/gup_test.h"
  30#include "../kselftest.h"
  31#include "vm_util.h"
  32
  33static size_t pagesize;
  34static int pagemap_fd;
  35static size_t thpsize;
  36static int nr_hugetlbsizes;
  37static size_t hugetlbsizes[10];
  38static int gup_fd;
  39static bool has_huge_zeropage;
  40
  41static void detect_thpsize(void)
  42{
  43	int fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size",
  44		      O_RDONLY);
  45	size_t size = 0;
  46	char buf[15];
  47	int ret;
  48
  49	if (fd < 0)
  50		return;
  51
  52	ret = pread(fd, buf, sizeof(buf), 0);
  53	if (ret > 0 && ret < sizeof(buf)) {
  54		buf[ret] = 0;
  55
  56		size = strtoul(buf, NULL, 10);
  57		if (size < pagesize)
  58			size = 0;
  59		if (size > 0) {
  60			thpsize = size;
  61			ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
  62				       thpsize / 1024);
  63		}
  64	}
  65
  66	close(fd);
  67}
  68
  69static void detect_huge_zeropage(void)
  70{
  71	int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
  72		      O_RDONLY);
  73	size_t enabled = 0;
  74	char buf[15];
  75	int ret;
  76
  77	if (fd < 0)
  78		return;
  79
  80	ret = pread(fd, buf, sizeof(buf), 0);
  81	if (ret > 0 && ret < sizeof(buf)) {
  82		buf[ret] = 0;
  83
  84		enabled = strtoul(buf, NULL, 10);
  85		if (enabled == 1) {
  86			has_huge_zeropage = true;
  87			ksft_print_msg("[INFO] huge zeropage is enabled\n");
  88		}
  89	}
  90
  91	close(fd);
  92}
  93
  94static void detect_hugetlbsizes(void)
  95{
  96	DIR *dir = opendir("/sys/kernel/mm/hugepages/");
  97
  98	if (!dir)
  99		return;
 100
 101	while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) {
 102		struct dirent *entry = readdir(dir);
 103		size_t kb;
 104
 105		if (!entry)
 106			break;
 107		if (entry->d_type != DT_DIR)
 108			continue;
 109		if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
 110			continue;
 111		hugetlbsizes[nr_hugetlbsizes] = kb * 1024;
 112		nr_hugetlbsizes++;
 113		ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n",
 114			       kb);
 115	}
 116	closedir(dir);
 117}
 118
 119static bool range_is_swapped(void *addr, size_t size)
 120{
 121	for (; size; addr += pagesize, size -= pagesize)
 122		if (!pagemap_is_swapped(pagemap_fd, addr))
 123			return false;
 124	return true;
 125}
 126
 127struct comm_pipes {
 128	int child_ready[2];
 129	int parent_ready[2];
 130};
 131
 132static int setup_comm_pipes(struct comm_pipes *comm_pipes)
 133{
 134	if (pipe(comm_pipes->child_ready) < 0)
 135		return -errno;
 136	if (pipe(comm_pipes->parent_ready) < 0) {
 137		close(comm_pipes->child_ready[0]);
 138		close(comm_pipes->child_ready[1]);
 139		return -errno;
 140	}
 141
 142	return 0;
 143}
 144
 145static void close_comm_pipes(struct comm_pipes *comm_pipes)
 146{
 147	close(comm_pipes->child_ready[0]);
 148	close(comm_pipes->child_ready[1]);
 149	close(comm_pipes->parent_ready[0]);
 150	close(comm_pipes->parent_ready[1]);
 151}
 152
 153static int child_memcmp_fn(char *mem, size_t size,
 154			   struct comm_pipes *comm_pipes)
 155{
 156	char *old = malloc(size);
 157	char buf;
 158
 159	/* Backup the original content. */
 160	memcpy(old, mem, size);
 161
 162	/* Wait until the parent modified the page. */
 163	write(comm_pipes->child_ready[1], "0", 1);
 164	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
 165		;
 166
 167	/* See if we still read the old values. */
 168	return memcmp(old, mem, size);
 169}
 170
 171static int child_vmsplice_memcmp_fn(char *mem, size_t size,
 172				    struct comm_pipes *comm_pipes)
 173{
 174	struct iovec iov = {
 175		.iov_base = mem,
 176		.iov_len = size,
 177	};
 178	ssize_t cur, total, transferred;
 179	char *old, *new;
 180	int fds[2];
 181	char buf;
 182
 183	old = malloc(size);
 184	new = malloc(size);
 185
 186	/* Backup the original content. */
 187	memcpy(old, mem, size);
 188
 189	if (pipe(fds) < 0)
 190		return -errno;
 191
 192	/* Trigger a read-only pin. */
 193	transferred = vmsplice(fds[1], &iov, 1, 0);
 194	if (transferred < 0)
 195		return -errno;
 196	if (transferred == 0)
 197		return -EINVAL;
 198
 199	/* Unmap it from our page tables. */
 200	if (munmap(mem, size) < 0)
 201		return -errno;
 202
 203	/* Wait until the parent modified it. */
 204	write(comm_pipes->child_ready[1], "0", 1);
 205	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
 206		;
 207
 208	/* See if we still read the old values via the pipe. */
 209	for (total = 0; total < transferred; total += cur) {
 210		cur = read(fds[0], new + total, transferred - total);
 211		if (cur < 0)
 212			return -errno;
 213	}
 214
 215	return memcmp(old, new, transferred);
 216}
 217
 218typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
 219
 220static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
 221				  child_fn fn)
 222{
 223	struct comm_pipes comm_pipes;
 224	char buf;
 225	int ret;
 226
 227	ret = setup_comm_pipes(&comm_pipes);
 228	if (ret) {
 229		ksft_test_result_fail("pipe() failed\n");
 230		return;
 231	}
 232
 233	ret = fork();
 234	if (ret < 0) {
 235		ksft_test_result_fail("fork() failed\n");
 236		goto close_comm_pipes;
 237	} else if (!ret) {
 238		exit(fn(mem, size, &comm_pipes));
 239	}
 240
 241	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
 242		;
 243
 244	if (do_mprotect) {
 245		/*
 246		 * mprotect() optimizations might try avoiding
 247		 * write-faults by directly mapping pages writable.
 248		 */
 249		ret = mprotect(mem, size, PROT_READ);
 250		ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
 251		if (ret) {
 252			ksft_test_result_fail("mprotect() failed\n");
 253			write(comm_pipes.parent_ready[1], "0", 1);
 254			wait(&ret);
 255			goto close_comm_pipes;
 256		}
 257	}
 258
 259	/* Modify the page. */
 260	memset(mem, 0xff, size);
 261	write(comm_pipes.parent_ready[1], "0", 1);
 262
 263	wait(&ret);
 264	if (WIFEXITED(ret))
 265		ret = WEXITSTATUS(ret);
 266	else
 267		ret = -EINVAL;
 268
 269	ksft_test_result(!ret, "No leak from parent into child\n");
 270close_comm_pipes:
 271	close_comm_pipes(&comm_pipes);
 272}
 273
 274static void test_cow_in_parent(char *mem, size_t size)
 275{
 276	do_test_cow_in_parent(mem, size, false, child_memcmp_fn);
 277}
 278
 279static void test_cow_in_parent_mprotect(char *mem, size_t size)
 280{
 281	do_test_cow_in_parent(mem, size, true, child_memcmp_fn);
 282}
 283
 284static void test_vmsplice_in_child(char *mem, size_t size)
 285{
 286	do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn);
 287}
 288
 289static void test_vmsplice_in_child_mprotect(char *mem, size_t size)
 290{
 291	do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn);
 292}
 293
 294static void do_test_vmsplice_in_parent(char *mem, size_t size,
 295				       bool before_fork)
 296{
 297	struct iovec iov = {
 298		.iov_base = mem,
 299		.iov_len = size,
 300	};
 301	ssize_t cur, total, transferred;
 302	struct comm_pipes comm_pipes;
 303	char *old, *new;
 304	int ret, fds[2];
 305	char buf;
 306
 307	old = malloc(size);
 308	new = malloc(size);
 309
 310	memcpy(old, mem, size);
 311
 312	ret = setup_comm_pipes(&comm_pipes);
 313	if (ret) {
 314		ksft_test_result_fail("pipe() failed\n");
 315		goto free;
 316	}
 317
 318	if (pipe(fds) < 0) {
 319		ksft_test_result_fail("pipe() failed\n");
 320		goto close_comm_pipes;
 321	}
 322
 323	if (before_fork) {
 324		transferred = vmsplice(fds[1], &iov, 1, 0);
 325		if (transferred <= 0) {
 326			ksft_test_result_fail("vmsplice() failed\n");
 327			goto close_pipe;
 328		}
 329	}
 330
 331	ret = fork();
 332	if (ret < 0) {
 333		ksft_test_result_fail("fork() failed\n");
 334		goto close_pipe;
 335	} else if (!ret) {
 336		write(comm_pipes.child_ready[1], "0", 1);
 337		while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
 338			;
 339		/* Modify page content in the child. */
 340		memset(mem, 0xff, size);
 341		exit(0);
 342	}
 343
 344	if (!before_fork) {
 345		transferred = vmsplice(fds[1], &iov, 1, 0);
 346		if (transferred <= 0) {
 347			ksft_test_result_fail("vmsplice() failed\n");
 348			wait(&ret);
 349			goto close_pipe;
 350		}
 351	}
 352
 353	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
 354		;
 355	if (munmap(mem, size) < 0) {
 356		ksft_test_result_fail("munmap() failed\n");
 357		goto close_pipe;
 358	}
 359	write(comm_pipes.parent_ready[1], "0", 1);
 360
 361	/* Wait until the child is done writing. */
 362	wait(&ret);
 363	if (!WIFEXITED(ret)) {
 364		ksft_test_result_fail("wait() failed\n");
 365		goto close_pipe;
 366	}
 367
 368	/* See if we still read the old values. */
 369	for (total = 0; total < transferred; total += cur) {
 370		cur = read(fds[0], new + total, transferred - total);
 371		if (cur < 0) {
 372			ksft_test_result_fail("read() failed\n");
 373			goto close_pipe;
 374		}
 375	}
 376
 377	ksft_test_result(!memcmp(old, new, transferred),
 378			 "No leak from child into parent\n");
 379close_pipe:
 380	close(fds[0]);
 381	close(fds[1]);
 382close_comm_pipes:
 383	close_comm_pipes(&comm_pipes);
 384free:
 385	free(old);
 386	free(new);
 387}
 388
 389static void test_vmsplice_before_fork(char *mem, size_t size)
 390{
 391	do_test_vmsplice_in_parent(mem, size, true);
 392}
 393
 394static void test_vmsplice_after_fork(char *mem, size_t size)
 395{
 396	do_test_vmsplice_in_parent(mem, size, false);
 397}
 398
 399#ifdef LOCAL_CONFIG_HAVE_LIBURING
 400static void do_test_iouring(char *mem, size_t size, bool use_fork)
 401{
 402	struct comm_pipes comm_pipes;
 403	struct io_uring_cqe *cqe;
 404	struct io_uring_sqe *sqe;
 405	struct io_uring ring;
 406	ssize_t cur, total;
 407	struct iovec iov;
 408	char *buf, *tmp;
 409	int ret, fd;
 410	FILE *file;
 411
 412	ret = setup_comm_pipes(&comm_pipes);
 413	if (ret) {
 414		ksft_test_result_fail("pipe() failed\n");
 415		return;
 416	}
 417
 418	file = tmpfile();
 419	if (!file) {
 420		ksft_test_result_fail("tmpfile() failed\n");
 421		goto close_comm_pipes;
 422	}
 423	fd = fileno(file);
 424	assert(fd);
 425
 426	tmp = malloc(size);
 427	if (!tmp) {
 428		ksft_test_result_fail("malloc() failed\n");
 429		goto close_file;
 430	}
 431
 432	/* Skip on errors, as we might just lack kernel support. */
 433	ret = io_uring_queue_init(1, &ring, 0);
 434	if (ret < 0) {
 435		ksft_test_result_skip("io_uring_queue_init() failed\n");
 436		goto free_tmp;
 437	}
 438
 439	/*
 440	 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
 441	 * | FOLL_LONGTERM the range.
 442	 *
 443	 * Skip on errors, as we might just lack kernel support or might not
 444	 * have sufficient MEMLOCK permissions.
 445	 */
 446	iov.iov_base = mem;
 447	iov.iov_len = size;
 448	ret = io_uring_register_buffers(&ring, &iov, 1);
 449	if (ret) {
 450		ksft_test_result_skip("io_uring_register_buffers() failed\n");
 451		goto queue_exit;
 452	}
 453
 454	if (use_fork) {
 455		/*
 456		 * fork() and keep the child alive until we're done. Note that
 457		 * we expect the pinned page to not get shared with the child.
 458		 */
 459		ret = fork();
 460		if (ret < 0) {
 461			ksft_test_result_fail("fork() failed\n");
 462			goto unregister_buffers;
 463		} else if (!ret) {
 464			write(comm_pipes.child_ready[1], "0", 1);
 465			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
 466				;
 467			exit(0);
 468		}
 469
 470		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
 471			;
 472	} else {
 473		/*
 474		 * Map the page R/O into the page table. Enable softdirty
 475		 * tracking to stop the page from getting mapped R/W immediately
 476		 * again by mprotect() optimizations. Note that we don't have an
 477		 * easy way to test if that worked (the pagemap does not export
 478		 * if the page is mapped R/O vs. R/W).
 479		 */
 480		ret = mprotect(mem, size, PROT_READ);
 481		clear_softdirty();
 482		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
 483		if (ret) {
 484			ksft_test_result_fail("mprotect() failed\n");
 485			goto unregister_buffers;
 486		}
 487	}
 488
 489	/*
 490	 * Modify the page and write page content as observed by the fixed
 491	 * buffer pin to the file so we can verify it.
 492	 */
 493	memset(mem, 0xff, size);
 494	sqe = io_uring_get_sqe(&ring);
 495	if (!sqe) {
 496		ksft_test_result_fail("io_uring_get_sqe() failed\n");
 497		goto quit_child;
 498	}
 499	io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
 500
 501	ret = io_uring_submit(&ring);
 502	if (ret < 0) {
 503		ksft_test_result_fail("io_uring_submit() failed\n");
 504		goto quit_child;
 505	}
 506
 507	ret = io_uring_wait_cqe(&ring, &cqe);
 508	if (ret < 0) {
 509		ksft_test_result_fail("io_uring_wait_cqe() failed\n");
 510		goto quit_child;
 511	}
 512
 513	if (cqe->res != size) {
 514		ksft_test_result_fail("write_fixed failed\n");
 515		goto quit_child;
 516	}
 517	io_uring_cqe_seen(&ring, cqe);
 518
 519	/* Read back the file content to the temporary buffer. */
 520	total = 0;
 521	while (total < size) {
 522		cur = pread(fd, tmp + total, size - total, total);
 523		if (cur < 0) {
 524			ksft_test_result_fail("pread() failed\n");
 525			goto quit_child;
 526		}
 527		total += cur;
 528	}
 529
 530	/* Finally, check if we read what we expected. */
 531	ksft_test_result(!memcmp(mem, tmp, size),
 532			 "Longterm R/W pin is reliable\n");
 533
 534quit_child:
 535	if (use_fork) {
 536		write(comm_pipes.parent_ready[1], "0", 1);
 537		wait(&ret);
 538	}
 539unregister_buffers:
 540	io_uring_unregister_buffers(&ring);
 541queue_exit:
 542	io_uring_queue_exit(&ring);
 543free_tmp:
 544	free(tmp);
 545close_file:
 546	fclose(file);
 547close_comm_pipes:
 548	close_comm_pipes(&comm_pipes);
 549}
 550
 551static void test_iouring_ro(char *mem, size_t size)
 552{
 553	do_test_iouring(mem, size, false);
 554}
 555
 556static void test_iouring_fork(char *mem, size_t size)
 557{
 558	do_test_iouring(mem, size, true);
 559}
 560
 561#endif /* LOCAL_CONFIG_HAVE_LIBURING */
 562
 563enum ro_pin_test {
 564	RO_PIN_TEST,
 565	RO_PIN_TEST_SHARED,
 566	RO_PIN_TEST_PREVIOUSLY_SHARED,
 567	RO_PIN_TEST_RO_EXCLUSIVE,
 568};
 569
 570static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
 571			   bool fast)
 572{
 573	struct pin_longterm_test args;
 574	struct comm_pipes comm_pipes;
 575	char *tmp, buf;
 576	__u64 tmp_val;
 577	int ret;
 578
 579	if (gup_fd < 0) {
 580		ksft_test_result_skip("gup_test not available\n");
 581		return;
 582	}
 583
 584	tmp = malloc(size);
 585	if (!tmp) {
 586		ksft_test_result_fail("malloc() failed\n");
 587		return;
 588	}
 589
 590	ret = setup_comm_pipes(&comm_pipes);
 591	if (ret) {
 592		ksft_test_result_fail("pipe() failed\n");
 593		goto free_tmp;
 594	}
 595
 596	switch (test) {
 597	case RO_PIN_TEST:
 598		break;
 599	case RO_PIN_TEST_SHARED:
 600	case RO_PIN_TEST_PREVIOUSLY_SHARED:
 601		/*
 602		 * Share the pages with our child. As the pages are not pinned,
 603		 * this should just work.
 604		 */
 605		ret = fork();
 606		if (ret < 0) {
 607			ksft_test_result_fail("fork() failed\n");
 608			goto close_comm_pipes;
 609		} else if (!ret) {
 610			write(comm_pipes.child_ready[1], "0", 1);
 611			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
 612				;
 613			exit(0);
 614		}
 615
 616		/* Wait until our child is ready. */
 617		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
 618			;
 619
 620		if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
 621			/*
 622			 * Tell the child to quit now and wait until it quit.
 623			 * The pages should now be mapped R/O into our page
 624			 * tables, but they are no longer shared.
 625			 */
 626			write(comm_pipes.parent_ready[1], "0", 1);
 627			wait(&ret);
 628			if (!WIFEXITED(ret))
 629				ksft_print_msg("[INFO] wait() failed\n");
 630		}
 631		break;
 632	case RO_PIN_TEST_RO_EXCLUSIVE:
 633		/*
 634		 * Map the page R/O into the page table. Enable softdirty
 635		 * tracking to stop the page from getting mapped R/W immediately
 636		 * again by mprotect() optimizations. Note that we don't have an
 637		 * easy way to test if that worked (the pagemap does not export
 638		 * if the page is mapped R/O vs. R/W).
 639		 */
 640		ret = mprotect(mem, size, PROT_READ);
 641		clear_softdirty();
 642		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
 643		if (ret) {
 644			ksft_test_result_fail("mprotect() failed\n");
 645			goto close_comm_pipes;
 646		}
 647		break;
 648	default:
 649		assert(false);
 650	}
 651
 652	/* Take a R/O pin. This should trigger unsharing. */
 653	args.addr = (__u64)(uintptr_t)mem;
 654	args.size = size;
 655	args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
 656	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
 657	if (ret) {
 658		if (errno == EINVAL)
 659			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
 660		else
 661			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
 662		goto wait;
 663	}
 664
 665	/* Modify the page. */
 666	memset(mem, 0xff, size);
 667
 668	/*
 669	 * Read back the content via the pin to the temporary buffer and
 670	 * test if we observed the modification.
 671	 */
 672	tmp_val = (__u64)(uintptr_t)tmp;
 673	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
 674	if (ret)
 675		ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
 676	else
 677		ksft_test_result(!memcmp(mem, tmp, size),
 678				 "Longterm R/O pin is reliable\n");
 679
 680	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
 681	if (ret)
 682		ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
 683wait:
 684	switch (test) {
 685	case RO_PIN_TEST_SHARED:
 686		write(comm_pipes.parent_ready[1], "0", 1);
 687		wait(&ret);
 688		if (!WIFEXITED(ret))
 689			ksft_print_msg("[INFO] wait() failed\n");
 690		break;
 691	default:
 692		break;
 693	}
 694close_comm_pipes:
 695	close_comm_pipes(&comm_pipes);
 696free_tmp:
 697	free(tmp);
 698}
 699
 700static void test_ro_pin_on_shared(char *mem, size_t size)
 701{
 702	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
 703}
 704
 705static void test_ro_fast_pin_on_shared(char *mem, size_t size)
 706{
 707	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
 708}
 709
 710static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size)
 711{
 712	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
 713}
 714
 715static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size)
 716{
 717	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
 718}
 719
 720static void test_ro_pin_on_ro_exclusive(char *mem, size_t size)
 721{
 722	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
 723}
 724
 725static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size)
 726{
 727	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
 728}
 729
 730typedef void (*test_fn)(char *mem, size_t size);
 731
 732static void do_run_with_base_page(test_fn fn, bool swapout)
 733{
 734	char *mem;
 735	int ret;
 736
 737	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
 738		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 739	if (mem == MAP_FAILED) {
 740		ksft_test_result_fail("mmap() failed\n");
 741		return;
 742	}
 743
 744	ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
 745	/* Ignore if not around on a kernel. */
 746	if (ret && errno != EINVAL) {
 747		ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
 748		goto munmap;
 749	}
 750
 751	/* Populate a base page. */
 752	memset(mem, 0, pagesize);
 753
 754	if (swapout) {
 755		madvise(mem, pagesize, MADV_PAGEOUT);
 756		if (!pagemap_is_swapped(pagemap_fd, mem)) {
 757			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
 758			goto munmap;
 759		}
 760	}
 761
 762	fn(mem, pagesize);
 763munmap:
 764	munmap(mem, pagesize);
 765}
 766
 767static void run_with_base_page(test_fn fn, const char *desc)
 768{
 769	ksft_print_msg("[RUN] %s ... with base page\n", desc);
 770	do_run_with_base_page(fn, false);
 771}
 772
 773static void run_with_base_page_swap(test_fn fn, const char *desc)
 774{
 775	ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
 776	do_run_with_base_page(fn, true);
 777}
 778
 779enum thp_run {
 780	THP_RUN_PMD,
 781	THP_RUN_PMD_SWAPOUT,
 782	THP_RUN_PTE,
 783	THP_RUN_PTE_SWAPOUT,
 784	THP_RUN_SINGLE_PTE,
 785	THP_RUN_SINGLE_PTE_SWAPOUT,
 786	THP_RUN_PARTIAL_MREMAP,
 787	THP_RUN_PARTIAL_SHARED,
 788};
 789
 790static void do_run_with_thp(test_fn fn, enum thp_run thp_run)
 791{
 792	char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
 793	size_t size, mmap_size, mremap_size;
 794	int ret;
 795
 796	/* For alignment purposes, we need twice the thp size. */
 797	mmap_size = 2 * thpsize;
 798	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
 799			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 800	if (mmap_mem == MAP_FAILED) {
 801		ksft_test_result_fail("mmap() failed\n");
 802		return;
 803	}
 804
 805	/* We need a THP-aligned memory area. */
 806	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
 807
 808	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
 809	if (ret) {
 810		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
 811		goto munmap;
 812	}
 813
 814	/*
 815	 * Try to populate a THP. Touch the first sub-page and test if we get
 816	 * another sub-page populated automatically.
 817	 */
 818	mem[0] = 0;
 819	if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) {
 820		ksft_test_result_skip("Did not get a THP populated\n");
 821		goto munmap;
 822	}
 823	memset(mem, 0, thpsize);
 824
 825	size = thpsize;
 826	switch (thp_run) {
 827	case THP_RUN_PMD:
 828	case THP_RUN_PMD_SWAPOUT:
 829		break;
 830	case THP_RUN_PTE:
 831	case THP_RUN_PTE_SWAPOUT:
 832		/*
 833		 * Trigger PTE-mapping the THP by temporarily mapping a single
 834		 * subpage R/O.
 835		 */
 836		ret = mprotect(mem + pagesize, pagesize, PROT_READ);
 837		if (ret) {
 838			ksft_test_result_fail("mprotect() failed\n");
 839			goto munmap;
 840		}
 841		ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
 842		if (ret) {
 843			ksft_test_result_fail("mprotect() failed\n");
 844			goto munmap;
 845		}
 846		break;
 847	case THP_RUN_SINGLE_PTE:
 848	case THP_RUN_SINGLE_PTE_SWAPOUT:
 849		/*
 850		 * Discard all but a single subpage of that PTE-mapped THP. What
 851		 * remains is a single PTE mapping a single subpage.
 852		 */
 853		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
 854		if (ret) {
 855			ksft_test_result_fail("MADV_DONTNEED failed\n");
 856			goto munmap;
 857		}
 858		size = pagesize;
 859		break;
 860	case THP_RUN_PARTIAL_MREMAP:
 861		/*
 862		 * Remap half of the THP. We need some new memory location
 863		 * for that.
 864		 */
 865		mremap_size = thpsize / 2;
 866		mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
 867				  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 868		if (mem == MAP_FAILED) {
 869			ksft_test_result_fail("mmap() failed\n");
 870			goto munmap;
 871		}
 872		tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
 873			     MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
 874		if (tmp != mremap_mem) {
 875			ksft_test_result_fail("mremap() failed\n");
 876			goto munmap;
 877		}
 878		size = mremap_size;
 879		break;
 880	case THP_RUN_PARTIAL_SHARED:
 881		/*
 882		 * Share the first page of the THP with a child and quit the
 883		 * child. This will result in some parts of the THP never
 884		 * have been shared.
 885		 */
 886		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
 887		if (ret) {
 888			ksft_test_result_fail("MADV_DONTFORK failed\n");
 889			goto munmap;
 890		}
 891		ret = fork();
 892		if (ret < 0) {
 893			ksft_test_result_fail("fork() failed\n");
 894			goto munmap;
 895		} else if (!ret) {
 896			exit(0);
 897		}
 898		wait(&ret);
 899		/* Allow for sharing all pages again. */
 900		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
 901		if (ret) {
 902			ksft_test_result_fail("MADV_DOFORK failed\n");
 903			goto munmap;
 904		}
 905		break;
 906	default:
 907		assert(false);
 908	}
 909
 910	switch (thp_run) {
 911	case THP_RUN_PMD_SWAPOUT:
 912	case THP_RUN_PTE_SWAPOUT:
 913	case THP_RUN_SINGLE_PTE_SWAPOUT:
 914		madvise(mem, size, MADV_PAGEOUT);
 915		if (!range_is_swapped(mem, size)) {
 916			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
 917			goto munmap;
 918		}
 919		break;
 920	default:
 921		break;
 922	}
 923
 924	fn(mem, size);
 925munmap:
 926	munmap(mmap_mem, mmap_size);
 927	if (mremap_mem != MAP_FAILED)
 928		munmap(mremap_mem, mremap_size);
 929}
 930
 931static void run_with_thp(test_fn fn, const char *desc)
 932{
 933	ksft_print_msg("[RUN] %s ... with THP\n", desc);
 934	do_run_with_thp(fn, THP_RUN_PMD);
 935}
 936
 937static void run_with_thp_swap(test_fn fn, const char *desc)
 938{
 939	ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc);
 940	do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT);
 941}
 942
 943static void run_with_pte_mapped_thp(test_fn fn, const char *desc)
 944{
 945	ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc);
 946	do_run_with_thp(fn, THP_RUN_PTE);
 947}
 948
 949static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc)
 950{
 951	ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc);
 952	do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT);
 953}
 954
 955static void run_with_single_pte_of_thp(test_fn fn, const char *desc)
 956{
 957	ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc);
 958	do_run_with_thp(fn, THP_RUN_SINGLE_PTE);
 959}
 960
 961static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc)
 962{
 963	ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc);
 964	do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT);
 965}
 966
 967static void run_with_partial_mremap_thp(test_fn fn, const char *desc)
 968{
 969	ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc);
 970	do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP);
 971}
 972
 973static void run_with_partial_shared_thp(test_fn fn, const char *desc)
 974{
 975	ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc);
 976	do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED);
 977}
 978
 979static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
 980{
 981	int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
 982	char *mem, *dummy;
 983
 984	ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
 985		       hugetlbsize / 1024);
 986
 987	flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
 988
 989	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
 990	if (mem == MAP_FAILED) {
 991		ksft_test_result_skip("need more free huge pages\n");
 992		return;
 993	}
 994
 995	/* Populate an huge page. */
 996	memset(mem, 0, hugetlbsize);
 997
 998	/*
 999	 * We need a total of two hugetlb pages to handle COW/unsharing
1000	 * properly, otherwise we might get zapped by a SIGBUS.
1001	 */
1002	dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1003	if (dummy == MAP_FAILED) {
1004		ksft_test_result_skip("need more free huge pages\n");
1005		goto munmap;
1006	}
1007	munmap(dummy, hugetlbsize);
1008
1009	fn(mem, hugetlbsize);
1010munmap:
1011	munmap(mem, hugetlbsize);
1012}
1013
1014struct test_case {
1015	const char *desc;
1016	test_fn fn;
1017};
1018
1019/*
1020 * Test cases that are specific to anonymous pages: pages in private mappings
1021 * that may get shared via COW during fork().
1022 */
1023static const struct test_case anon_test_cases[] = {
1024	/*
1025	 * Basic COW tests for fork() without any GUP. If we miss to break COW,
1026	 * either the child can observe modifications by the parent or the
1027	 * other way around.
1028	 */
1029	{
1030		"Basic COW after fork()",
1031		test_cow_in_parent,
1032	},
1033	/*
1034	 * Basic test, but do an additional mprotect(PROT_READ)+
1035	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1036	 */
1037	{
1038		"Basic COW after fork() with mprotect() optimization",
1039		test_cow_in_parent_mprotect,
1040	},
1041	/*
1042	 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
1043	 * we miss to break COW, the child observes modifications by the parent.
1044	 * This is CVE-2020-29374 reported by Jann Horn.
1045	 */
1046	{
1047		"vmsplice() + unmap in child",
1048		test_vmsplice_in_child
1049	},
1050	/*
1051	 * vmsplice() test, but do an additional mprotect(PROT_READ)+
1052	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1053	 */
1054	{
1055		"vmsplice() + unmap in child with mprotect() optimization",
1056		test_vmsplice_in_child_mprotect
1057	},
1058	/*
1059	 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1060	 * fork(); modify in the child. If we miss to break COW, the parent
1061	 * observes modifications by the child.
1062	 */
1063	{
1064		"vmsplice() before fork(), unmap in parent after fork()",
1065		test_vmsplice_before_fork,
1066	},
1067	/*
1068	 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1069	 * child. If we miss to break COW, the parent observes modifications by
1070	 * the child.
1071	 */
1072	{
1073		"vmsplice() + unmap in parent after fork()",
1074		test_vmsplice_after_fork,
1075	},
1076#ifdef LOCAL_CONFIG_HAVE_LIBURING
1077	/*
1078	 * Take a R/W longterm pin and then map the page R/O into the page
1079	 * table to trigger a write fault on next access. When modifying the
1080	 * page, the page content must be visible via the pin.
1081	 */
1082	{
1083		"R/O-mapping a page registered as iouring fixed buffer",
1084		test_iouring_ro,
1085	},
1086	/*
1087	 * Take a R/W longterm pin and then fork() a child. When modifying the
1088	 * page, the page content must be visible via the pin. We expect the
1089	 * pinned page to not get shared with the child.
1090	 */
1091	{
1092		"fork() with an iouring fixed buffer",
1093		test_iouring_fork,
1094	},
1095
1096#endif /* LOCAL_CONFIG_HAVE_LIBURING */
1097	/*
1098	 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1099	 * When modifying the page via the page table, the page content change
1100	 * must be visible via the pin.
1101	 */
1102	{
1103		"R/O GUP pin on R/O-mapped shared page",
1104		test_ro_pin_on_shared,
1105	},
1106	/* Same as above, but using GUP-fast. */
1107	{
1108		"R/O GUP-fast pin on R/O-mapped shared page",
1109		test_ro_fast_pin_on_shared,
1110	},
1111	/*
1112	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1113	 * was previously shared. When modifying the page via the page table,
1114	 * the page content change must be visible via the pin.
1115	 */
1116	{
1117		"R/O GUP pin on R/O-mapped previously-shared page",
1118		test_ro_pin_on_ro_previously_shared,
1119	},
1120	/* Same as above, but using GUP-fast. */
1121	{
1122		"R/O GUP-fast pin on R/O-mapped previously-shared page",
1123		test_ro_fast_pin_on_ro_previously_shared,
1124	},
1125	/*
1126	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1127	 * When modifying the page via the page table, the page content change
1128	 * must be visible via the pin.
1129	 */
1130	{
1131		"R/O GUP pin on R/O-mapped exclusive page",
1132		test_ro_pin_on_ro_exclusive,
1133	},
1134	/* Same as above, but using GUP-fast. */
1135	{
1136		"R/O GUP-fast pin on R/O-mapped exclusive page",
1137		test_ro_fast_pin_on_ro_exclusive,
1138	},
1139};
1140
1141static void run_anon_test_case(struct test_case const *test_case)
1142{
1143	int i;
1144
1145	run_with_base_page(test_case->fn, test_case->desc);
1146	run_with_base_page_swap(test_case->fn, test_case->desc);
1147	if (thpsize) {
1148		run_with_thp(test_case->fn, test_case->desc);
1149		run_with_thp_swap(test_case->fn, test_case->desc);
1150		run_with_pte_mapped_thp(test_case->fn, test_case->desc);
1151		run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc);
1152		run_with_single_pte_of_thp(test_case->fn, test_case->desc);
1153		run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc);
1154		run_with_partial_mremap_thp(test_case->fn, test_case->desc);
1155		run_with_partial_shared_thp(test_case->fn, test_case->desc);
1156	}
1157	for (i = 0; i < nr_hugetlbsizes; i++)
1158		run_with_hugetlb(test_case->fn, test_case->desc,
1159				 hugetlbsizes[i]);
1160}
1161
1162static void run_anon_test_cases(void)
1163{
1164	int i;
1165
1166	ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1167
1168	for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1169		run_anon_test_case(&anon_test_cases[i]);
1170}
1171
1172static int tests_per_anon_test_case(void)
1173{
1174	int tests = 2 + nr_hugetlbsizes;
1175
1176	if (thpsize)
1177		tests += 8;
1178	return tests;
1179}
1180
1181typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1182
1183static void test_cow(char *mem, const char *smem, size_t size)
1184{
1185	char *old = malloc(size);
1186
1187	/* Backup the original content. */
1188	memcpy(old, smem, size);
1189
1190	/* Modify the page. */
1191	memset(mem, 0xff, size);
1192
1193	/* See if we still read the old values via the other mapping. */
1194	ksft_test_result(!memcmp(smem, old, size),
1195			 "Other mapping not modified\n");
1196	free(old);
1197}
1198
1199static void test_ro_pin(char *mem, const char *smem, size_t size)
1200{
1201	do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1202}
1203
1204static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1205{
1206	do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1207}
1208
1209static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1210{
1211	char *mem, *smem, tmp;
1212
1213	ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
1214
1215	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1216		   MAP_PRIVATE | MAP_ANON, -1, 0);
1217	if (mem == MAP_FAILED) {
1218		ksft_test_result_fail("mmap() failed\n");
1219		return;
1220	}
1221
1222	smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1223	if (mem == MAP_FAILED) {
1224		ksft_test_result_fail("mmap() failed\n");
1225		goto munmap;
1226	}
1227
1228	/* Read from the page to populate the shared zeropage. */
1229	tmp = *mem + *smem;
1230	asm volatile("" : "+r" (tmp));
1231
1232	fn(mem, smem, pagesize);
1233munmap:
1234	munmap(mem, pagesize);
1235	if (smem != MAP_FAILED)
1236		munmap(smem, pagesize);
1237}
1238
1239static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1240{
1241	char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
1242	size_t mmap_size;
1243	int ret;
1244
1245	ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
1246
1247	if (!has_huge_zeropage) {
1248		ksft_test_result_skip("Huge zeropage not enabled\n");
1249		return;
1250	}
1251
1252	/* For alignment purposes, we need twice the thp size. */
1253	mmap_size = 2 * thpsize;
1254	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1255			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1256	if (mmap_mem == MAP_FAILED) {
1257		ksft_test_result_fail("mmap() failed\n");
1258		return;
1259	}
1260	mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1261			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1262	if (mmap_smem == MAP_FAILED) {
1263		ksft_test_result_fail("mmap() failed\n");
1264		goto munmap;
1265	}
1266
1267	/* We need a THP-aligned memory area. */
1268	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
1269	smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1));
1270
1271	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
1272	ret |= madvise(smem, thpsize, MADV_HUGEPAGE);
1273	if (ret) {
1274		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
1275		goto munmap;
1276	}
1277
1278	/*
1279	 * Read from the memory to populate the huge shared zeropage. Read from
1280	 * the first sub-page and test if we get another sub-page populated
1281	 * automatically.
1282	 */
1283	tmp = *mem + *smem;
1284	asm volatile("" : "+r" (tmp));
1285	if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1286	    !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1287		ksft_test_result_skip("Did not get THPs populated\n");
1288		goto munmap;
1289	}
1290
1291	fn(mem, smem, thpsize);
1292munmap:
1293	munmap(mmap_mem, mmap_size);
1294	if (mmap_smem != MAP_FAILED)
1295		munmap(mmap_smem, mmap_size);
1296}
1297
1298static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1299{
1300	char *mem, *smem, tmp;
1301	int fd;
1302
1303	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
1304
1305	fd = memfd_create("test", 0);
1306	if (fd < 0) {
1307		ksft_test_result_fail("memfd_create() failed\n");
1308		return;
1309	}
1310
1311	/* File consists of a single page filled with zeroes. */
1312	if (fallocate(fd, 0, 0, pagesize)) {
1313		ksft_test_result_fail("fallocate() failed\n");
1314		goto close;
1315	}
1316
1317	/* Create a private mapping of the memfd. */
1318	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1319	if (mem == MAP_FAILED) {
1320		ksft_test_result_fail("mmap() failed\n");
1321		goto close;
1322	}
1323	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1324	if (mem == MAP_FAILED) {
1325		ksft_test_result_fail("mmap() failed\n");
1326		goto munmap;
1327	}
1328
1329	/* Fault the page in. */
1330	tmp = *mem + *smem;
1331	asm volatile("" : "+r" (tmp));
1332
1333	fn(mem, smem, pagesize);
1334munmap:
1335	munmap(mem, pagesize);
1336	if (smem != MAP_FAILED)
1337		munmap(smem, pagesize);
1338close:
1339	close(fd);
1340}
1341
1342static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1343{
1344	char *mem, *smem, tmp;
1345	FILE *file;
1346	int fd;
1347
1348	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
1349
1350	file = tmpfile();
1351	if (!file) {
1352		ksft_test_result_fail("tmpfile() failed\n");
1353		return;
1354	}
1355
1356	fd = fileno(file);
1357	if (fd < 0) {
1358		ksft_test_result_skip("fileno() failed\n");
1359		return;
1360	}
1361
1362	/* File consists of a single page filled with zeroes. */
1363	if (fallocate(fd, 0, 0, pagesize)) {
1364		ksft_test_result_fail("fallocate() failed\n");
1365		goto close;
1366	}
1367
1368	/* Create a private mapping of the memfd. */
1369	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1370	if (mem == MAP_FAILED) {
1371		ksft_test_result_fail("mmap() failed\n");
1372		goto close;
1373	}
1374	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1375	if (mem == MAP_FAILED) {
1376		ksft_test_result_fail("mmap() failed\n");
1377		goto munmap;
1378	}
1379
1380	/* Fault the page in. */
1381	tmp = *mem + *smem;
1382	asm volatile("" : "+r" (tmp));
1383
1384	fn(mem, smem, pagesize);
1385munmap:
1386	munmap(mem, pagesize);
1387	if (smem != MAP_FAILED)
1388		munmap(smem, pagesize);
1389close:
1390	fclose(file);
1391}
1392
1393static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1394				   size_t hugetlbsize)
1395{
1396	int flags = MFD_HUGETLB;
1397	char *mem, *smem, tmp;
1398	int fd;
1399
1400	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
1401		       hugetlbsize / 1024);
1402
1403	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1404
1405	fd = memfd_create("test", flags);
1406	if (fd < 0) {
1407		ksft_test_result_skip("memfd_create() failed\n");
1408		return;
1409	}
1410
1411	/* File consists of a single page filled with zeroes. */
1412	if (fallocate(fd, 0, 0, hugetlbsize)) {
1413		ksft_test_result_skip("need more free huge pages\n");
1414		goto close;
1415	}
1416
1417	/* Create a private mapping of the memfd. */
1418	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1419		   0);
1420	if (mem == MAP_FAILED) {
1421		ksft_test_result_skip("need more free huge pages\n");
1422		goto close;
1423	}
1424	smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1425	if (mem == MAP_FAILED) {
1426		ksft_test_result_fail("mmap() failed\n");
1427		goto munmap;
1428	}
1429
1430	/* Fault the page in. */
1431	tmp = *mem + *smem;
1432	asm volatile("" : "+r" (tmp));
1433
1434	fn(mem, smem, hugetlbsize);
1435munmap:
1436	munmap(mem, hugetlbsize);
1437	if (mem != MAP_FAILED)
1438		munmap(smem, hugetlbsize);
1439close:
1440	close(fd);
1441}
1442
1443struct non_anon_test_case {
1444	const char *desc;
1445	non_anon_test_fn fn;
1446};
1447
1448/*
1449 * Test cases that target any pages in private mappings that are not anonymous:
1450 * pages that may get shared via COW ndependent of fork(). This includes
1451 * the shared zeropage(s), pagecache pages, ...
1452 */
1453static const struct non_anon_test_case non_anon_test_cases[] = {
1454	/*
1455	 * Basic COW test without any GUP. If we miss to break COW, changes are
1456	 * visible via other private/shared mappings.
1457	 */
1458	{
1459		"Basic COW",
1460		test_cow,
1461	},
1462	/*
1463	 * Take a R/O longterm pin. When modifying the page via the page table,
1464	 * the page content change must be visible via the pin.
1465	 */
1466	{
1467		"R/O longterm GUP pin",
1468		test_ro_pin,
1469	},
1470	/* Same as above, but using GUP-fast. */
1471	{
1472		"R/O longterm GUP-fast pin",
1473		test_ro_fast_pin,
1474	},
1475};
1476
1477static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1478{
1479	int i;
1480
1481	run_with_zeropage(test_case->fn, test_case->desc);
1482	run_with_memfd(test_case->fn, test_case->desc);
1483	run_with_tmpfile(test_case->fn, test_case->desc);
1484	if (thpsize)
1485		run_with_huge_zeropage(test_case->fn, test_case->desc);
1486	for (i = 0; i < nr_hugetlbsizes; i++)
1487		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1488				       hugetlbsizes[i]);
1489}
1490
1491static void run_non_anon_test_cases(void)
1492{
1493	int i;
1494
1495	ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1496
1497	for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1498		run_non_anon_test_case(&non_anon_test_cases[i]);
1499}
1500
1501static int tests_per_non_anon_test_case(void)
1502{
1503	int tests = 3 + nr_hugetlbsizes;
1504
1505	if (thpsize)
1506		tests += 1;
1507	return tests;
1508}
1509
1510int main(int argc, char **argv)
1511{
1512	int err;
1513
1514	pagesize = getpagesize();
1515	detect_thpsize();
1516	detect_hugetlbsizes();
1517	detect_huge_zeropage();
1518
1519	ksft_print_header();
1520	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1521		      ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1522
1523	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1524	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1525	if (pagemap_fd < 0)
1526		ksft_exit_fail_msg("opening pagemap failed\n");
1527
1528	run_anon_test_cases();
1529	run_non_anon_test_cases();
1530
1531	err = ksft_get_fail_cnt();
1532	if (err)
1533		ksft_exit_fail_msg("%d out of %d tests failed\n",
1534				   err, ksft_test_num());
1535	return ksft_exit_pass();
1536}