Linux Audio

Check our new training course

Loading...
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0
   2#define _GNU_SOURCE
   3#define __EXPORTED_HEADERS__
   4
   5#include <errno.h>
   6#include <inttypes.h>
   7#include <limits.h>
   8#include <linux/falloc.h>
   9#include <fcntl.h>
  10#include <linux/memfd.h>
  11#include <sched.h>
  12#include <stdio.h>
  13#include <stdlib.h>
  14#include <signal.h>
  15#include <string.h>
  16#include <sys/mman.h>
  17#include <sys/stat.h>
  18#include <sys/syscall.h>
  19#include <sys/wait.h>
  20#include <unistd.h>
  21#include <ctype.h>
  22
  23#include "common.h"
  24
  25#define MEMFD_STR	"memfd:"
  26#define MEMFD_HUGE_STR	"memfd-hugetlb:"
  27#define SHARED_FT_STR	"(shared file-table)"
  28
  29#define MFD_DEF_SIZE 8192
  30#define STACK_SIZE 65536
  31
  32#define F_SEAL_EXEC	0x0020
  33
  34#define F_WX_SEALS (F_SEAL_SHRINK | \
  35		    F_SEAL_GROW | \
  36		    F_SEAL_WRITE | \
  37		    F_SEAL_FUTURE_WRITE | \
  38		    F_SEAL_EXEC)
  39
  40#define MFD_NOEXEC_SEAL	0x0008U
  41
  42/*
  43 * Default is not to test hugetlbfs
  44 */
  45static size_t mfd_def_size = MFD_DEF_SIZE;
  46static const char *memfd_str = MEMFD_STR;
 
 
  47
  48static ssize_t fd2name(int fd, char *buf, size_t bufsize)
  49{
  50	char buf1[PATH_MAX];
  51	int size;
  52	ssize_t nbytes;
  53
  54	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
  55	if (size < 0) {
  56		printf("snprintf(%d) failed on %m\n", fd);
  57		abort();
  58	}
  59
  60	/*
  61	 * reserver one byte for string termination.
  62	 */
  63	nbytes = readlink(buf1, buf, bufsize-1);
  64	if (nbytes == -1) {
  65		printf("readlink(%s) failed %m\n", buf1);
  66		abort();
  67	}
  68	buf[nbytes] = '\0';
  69	return nbytes;
  70}
  71
  72static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
  73{
  74	int r, fd;
  75
  76	fd = sys_memfd_create(name, flags);
  77	if (fd < 0) {
  78		printf("memfd_create(\"%s\", %u) failed: %m\n",
  79		       name, flags);
  80		abort();
  81	}
  82
  83	r = ftruncate(fd, sz);
  84	if (r < 0) {
  85		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
  86		abort();
  87	}
  88
  89	return fd;
  90}
  91
  92static void sysctl_assert_write(const char *val)
  93{
  94	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
  95
  96	if (fd < 0) {
  97		printf("open sysctl failed: %m\n");
  98		abort();
  99	}
 100
 101	if (write(fd, val, strlen(val)) < 0) {
 102		printf("write sysctl %s failed: %m\n", val);
 103		abort();
 104	}
 105}
 106
 107static void sysctl_fail_write(const char *val)
 108{
 109	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
 110
 111	if (fd < 0) {
 112		printf("open sysctl failed: %m\n");
 113		abort();
 114	}
 115
 116	if (write(fd, val, strlen(val)) >= 0) {
 117		printf("write sysctl %s succeeded, but failure expected\n",
 118				val);
 119		abort();
 120	}
 121}
 122
 123static void sysctl_assert_equal(const char *val)
 124{
 125	char *p, buf[128] = {};
 126	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
 127
 128	if (fd < 0) {
 129		printf("open sysctl failed: %m\n");
 130		abort();
 131	}
 132
 133	if (read(fd, buf, sizeof(buf)) < 0) {
 134		printf("read sysctl failed: %m\n");
 135		abort();
 136	}
 137
 138	/* Strip trailing whitespace. */
 139	p = buf;
 140	while (!isspace(*p))
 141		p++;
 142	*p = '\0';
 143
 144	if (strcmp(buf, val) != 0) {
 145		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
 146		abort();
 147	}
 148}
 149
 150static int mfd_assert_reopen_fd(int fd_in)
 151{
 152	int fd;
 153	char path[100];
 154
 155	sprintf(path, "/proc/self/fd/%d", fd_in);
 156
 157	fd = open(path, O_RDWR);
 158	if (fd < 0) {
 159		printf("re-open of existing fd %d failed\n", fd_in);
 160		abort();
 161	}
 162
 163	return fd;
 164}
 165
 166static void mfd_fail_new(const char *name, unsigned int flags)
 167{
 168	int r;
 169
 170	r = sys_memfd_create(name, flags);
 171	if (r >= 0) {
 172		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
 173		       name, flags);
 174		close(r);
 175		abort();
 176	}
 177}
 178
 179static unsigned int mfd_assert_get_seals(int fd)
 180{
 181	int r;
 182
 183	r = fcntl(fd, F_GET_SEALS);
 184	if (r < 0) {
 185		printf("GET_SEALS(%d) failed: %m\n", fd);
 186		abort();
 187	}
 188
 189	return (unsigned int)r;
 190}
 191
 192static void mfd_assert_has_seals(int fd, unsigned int seals)
 193{
 194	char buf[PATH_MAX];
 
 195	unsigned int s;
 196	fd2name(fd, buf, PATH_MAX);
 197
 198	s = mfd_assert_get_seals(fd);
 199	if (s != seals) {
 200		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
 201		abort();
 202	}
 203}
 204
 205static void mfd_assert_add_seals(int fd, unsigned int seals)
 206{
 207	int r;
 208	unsigned int s;
 209
 210	s = mfd_assert_get_seals(fd);
 211	r = fcntl(fd, F_ADD_SEALS, seals);
 212	if (r < 0) {
 213		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
 214		abort();
 215	}
 216}
 217
 218static void mfd_fail_add_seals(int fd, unsigned int seals)
 219{
 220	int r;
 221	unsigned int s;
 222
 223	r = fcntl(fd, F_GET_SEALS);
 224	if (r < 0)
 225		s = 0;
 226	else
 227		s = (unsigned int)r;
 228
 229	r = fcntl(fd, F_ADD_SEALS, seals);
 230	if (r >= 0) {
 231		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
 232				fd, s, seals);
 233		abort();
 234	}
 235}
 236
 237static void mfd_assert_size(int fd, size_t size)
 238{
 239	struct stat st;
 240	int r;
 241
 242	r = fstat(fd, &st);
 243	if (r < 0) {
 244		printf("fstat(%d) failed: %m\n", fd);
 245		abort();
 246	} else if (st.st_size != size) {
 247		printf("wrong file size %lld, but expected %lld\n",
 248		       (long long)st.st_size, (long long)size);
 249		abort();
 250	}
 251}
 252
 253static int mfd_assert_dup(int fd)
 254{
 255	int r;
 256
 257	r = dup(fd);
 258	if (r < 0) {
 259		printf("dup(%d) failed: %m\n", fd);
 260		abort();
 261	}
 262
 263	return r;
 264}
 265
 266static void *mfd_assert_mmap_shared(int fd)
 267{
 268	void *p;
 269
 270	p = mmap(NULL,
 271		 mfd_def_size,
 272		 PROT_READ | PROT_WRITE,
 273		 MAP_SHARED,
 274		 fd,
 275		 0);
 276	if (p == MAP_FAILED) {
 277		printf("mmap() failed: %m\n");
 278		abort();
 279	}
 280
 281	return p;
 282}
 283
 284static void *mfd_assert_mmap_private(int fd)
 285{
 286	void *p;
 287
 288	p = mmap(NULL,
 289		 mfd_def_size,
 290		 PROT_READ,
 291		 MAP_PRIVATE,
 292		 fd,
 293		 0);
 294	if (p == MAP_FAILED) {
 295		printf("mmap() failed: %m\n");
 296		abort();
 297	}
 298
 299	return p;
 300}
 301
 302static int mfd_assert_open(int fd, int flags, mode_t mode)
 303{
 304	char buf[512];
 305	int r;
 306
 307	sprintf(buf, "/proc/self/fd/%d", fd);
 308	r = open(buf, flags, mode);
 309	if (r < 0) {
 310		printf("open(%s) failed: %m\n", buf);
 311		abort();
 312	}
 313
 314	return r;
 315}
 316
 317static void mfd_fail_open(int fd, int flags, mode_t mode)
 318{
 319	char buf[512];
 320	int r;
 321
 322	sprintf(buf, "/proc/self/fd/%d", fd);
 323	r = open(buf, flags, mode);
 324	if (r >= 0) {
 325		printf("open(%s) didn't fail as expected\n", buf);
 326		abort();
 327	}
 328}
 329
 330static void mfd_assert_read(int fd)
 331{
 332	char buf[16];
 333	void *p;
 334	ssize_t l;
 335
 336	l = read(fd, buf, sizeof(buf));
 337	if (l != sizeof(buf)) {
 338		printf("read() failed: %m\n");
 339		abort();
 340	}
 341
 342	/* verify PROT_READ *is* allowed */
 343	p = mmap(NULL,
 344		 mfd_def_size,
 345		 PROT_READ,
 346		 MAP_PRIVATE,
 347		 fd,
 348		 0);
 349	if (p == MAP_FAILED) {
 350		printf("mmap() failed: %m\n");
 351		abort();
 352	}
 353	munmap(p, mfd_def_size);
 354
 355	/* verify MAP_PRIVATE is *always* allowed (even writable) */
 356	p = mmap(NULL,
 357		 mfd_def_size,
 358		 PROT_READ | PROT_WRITE,
 359		 MAP_PRIVATE,
 360		 fd,
 361		 0);
 362	if (p == MAP_FAILED) {
 363		printf("mmap() failed: %m\n");
 364		abort();
 365	}
 366	munmap(p, mfd_def_size);
 367}
 368
 369/* Test that PROT_READ + MAP_SHARED mappings work. */
 370static void mfd_assert_read_shared(int fd)
 371{
 372	void *p;
 373
 374	/* verify PROT_READ and MAP_SHARED *is* allowed */
 375	p = mmap(NULL,
 376		 mfd_def_size,
 377		 PROT_READ,
 378		 MAP_SHARED,
 379		 fd,
 380		 0);
 381	if (p == MAP_FAILED) {
 382		printf("mmap() failed: %m\n");
 383		abort();
 384	}
 385	munmap(p, mfd_def_size);
 386}
 387
 388static void mfd_assert_fork_private_write(int fd)
 389{
 390	int *p;
 391	pid_t pid;
 392
 393	p = mmap(NULL,
 394		 mfd_def_size,
 395		 PROT_READ | PROT_WRITE,
 396		 MAP_PRIVATE,
 397		 fd,
 398		 0);
 399	if (p == MAP_FAILED) {
 400		printf("mmap() failed: %m\n");
 401		abort();
 402	}
 403
 404	p[0] = 22;
 405
 406	pid = fork();
 407	if (pid == 0) {
 408		p[0] = 33;
 409		exit(0);
 410	} else {
 411		waitpid(pid, NULL, 0);
 412
 413		if (p[0] != 22) {
 414			printf("MAP_PRIVATE copy-on-write failed: %m\n");
 415			abort();
 416		}
 417	}
 418
 419	munmap(p, mfd_def_size);
 420}
 421
 422static void mfd_assert_write(int fd)
 423{
 424	ssize_t l;
 425	void *p;
 426	int r;
 427
 428	/*
 429	 * huegtlbfs does not support write, but we want to
 430	 * verify everything else here.
 431	 */
 432	if (!hugetlbfs_test) {
 433		/* verify write() succeeds */
 434		l = write(fd, "\0\0\0\0", 4);
 435		if (l != 4) {
 436			printf("write() failed: %m\n");
 437			abort();
 438		}
 439	}
 440
 441	/* verify PROT_READ | PROT_WRITE is allowed */
 442	p = mmap(NULL,
 443		 mfd_def_size,
 444		 PROT_READ | PROT_WRITE,
 445		 MAP_SHARED,
 446		 fd,
 447		 0);
 448	if (p == MAP_FAILED) {
 449		printf("mmap() failed: %m\n");
 450		abort();
 451	}
 452	*(char *)p = 0;
 453	munmap(p, mfd_def_size);
 454
 455	/* verify PROT_WRITE is allowed */
 456	p = mmap(NULL,
 457		 mfd_def_size,
 458		 PROT_WRITE,
 459		 MAP_SHARED,
 460		 fd,
 461		 0);
 462	if (p == MAP_FAILED) {
 463		printf("mmap() failed: %m\n");
 464		abort();
 465	}
 466	*(char *)p = 0;
 467	munmap(p, mfd_def_size);
 468
 469	/* verify PROT_READ with MAP_SHARED is allowed and a following
 470	 * mprotect(PROT_WRITE) allows writing */
 471	p = mmap(NULL,
 472		 mfd_def_size,
 473		 PROT_READ,
 474		 MAP_SHARED,
 475		 fd,
 476		 0);
 477	if (p == MAP_FAILED) {
 478		printf("mmap() failed: %m\n");
 479		abort();
 480	}
 481
 482	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
 483	if (r < 0) {
 484		printf("mprotect() failed: %m\n");
 485		abort();
 486	}
 487
 488	*(char *)p = 0;
 489	munmap(p, mfd_def_size);
 490
 491	/* verify PUNCH_HOLE works */
 492	r = fallocate(fd,
 493		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 494		      0,
 495		      mfd_def_size);
 496	if (r < 0) {
 497		printf("fallocate(PUNCH_HOLE) failed: %m\n");
 498		abort();
 499	}
 500}
 501
 502static void mfd_fail_write(int fd)
 503{
 504	ssize_t l;
 505	void *p;
 506	int r;
 507
 508	/* verify write() fails */
 509	l = write(fd, "data", 4);
 510	if (l != -EPERM) {
 511		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
 512		abort();
 513	}
 514
 515	/* verify PROT_READ | PROT_WRITE is not allowed */
 516	p = mmap(NULL,
 517		 mfd_def_size,
 518		 PROT_READ | PROT_WRITE,
 519		 MAP_SHARED,
 520		 fd,
 521		 0);
 522	if (p != MAP_FAILED) {
 523		printf("mmap() didn't fail as expected\n");
 524		abort();
 525	}
 526
 527	/* verify PROT_WRITE is not allowed */
 528	p = mmap(NULL,
 529		 mfd_def_size,
 530		 PROT_WRITE,
 531		 MAP_SHARED,
 532		 fd,
 533		 0);
 534	if (p != MAP_FAILED) {
 535		printf("mmap() didn't fail as expected\n");
 536		abort();
 537	}
 538
 539	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
 540	 * allowed. Note that for r/w the kernel already prevents the mmap. */
 541	p = mmap(NULL,
 542		 mfd_def_size,
 543		 PROT_READ,
 544		 MAP_SHARED,
 545		 fd,
 546		 0);
 547	if (p != MAP_FAILED) {
 548		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
 549		if (r >= 0) {
 550			printf("mmap()+mprotect() didn't fail as expected\n");
 551			abort();
 552		}
 553		munmap(p, mfd_def_size);
 554	}
 555
 556	/* verify PUNCH_HOLE fails */
 557	r = fallocate(fd,
 558		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 559		      0,
 560		      mfd_def_size);
 561	if (r >= 0) {
 562		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
 563		abort();
 564	}
 565}
 566
 567static void mfd_assert_shrink(int fd)
 568{
 569	int r, fd2;
 570
 571	r = ftruncate(fd, mfd_def_size / 2);
 572	if (r < 0) {
 573		printf("ftruncate(SHRINK) failed: %m\n");
 574		abort();
 575	}
 576
 577	mfd_assert_size(fd, mfd_def_size / 2);
 578
 579	fd2 = mfd_assert_open(fd,
 580			      O_RDWR | O_CREAT | O_TRUNC,
 581			      S_IRUSR | S_IWUSR);
 582	close(fd2);
 583
 584	mfd_assert_size(fd, 0);
 585}
 586
 587static void mfd_fail_shrink(int fd)
 588{
 589	int r;
 590
 591	r = ftruncate(fd, mfd_def_size / 2);
 592	if (r >= 0) {
 593		printf("ftruncate(SHRINK) didn't fail as expected\n");
 594		abort();
 595	}
 596
 597	mfd_fail_open(fd,
 598		      O_RDWR | O_CREAT | O_TRUNC,
 599		      S_IRUSR | S_IWUSR);
 600}
 601
 602static void mfd_assert_grow(int fd)
 603{
 604	int r;
 605
 606	r = ftruncate(fd, mfd_def_size * 2);
 607	if (r < 0) {
 608		printf("ftruncate(GROW) failed: %m\n");
 609		abort();
 610	}
 611
 612	mfd_assert_size(fd, mfd_def_size * 2);
 613
 614	r = fallocate(fd,
 615		      0,
 616		      0,
 617		      mfd_def_size * 4);
 618	if (r < 0) {
 619		printf("fallocate(ALLOC) failed: %m\n");
 620		abort();
 621	}
 622
 623	mfd_assert_size(fd, mfd_def_size * 4);
 624}
 625
 626static void mfd_fail_grow(int fd)
 627{
 628	int r;
 629
 630	r = ftruncate(fd, mfd_def_size * 2);
 631	if (r >= 0) {
 632		printf("ftruncate(GROW) didn't fail as expected\n");
 633		abort();
 634	}
 635
 636	r = fallocate(fd,
 637		      0,
 638		      0,
 639		      mfd_def_size * 4);
 640	if (r >= 0) {
 641		printf("fallocate(ALLOC) didn't fail as expected\n");
 642		abort();
 643	}
 644}
 645
 646static void mfd_assert_grow_write(int fd)
 647{
 648	static char *buf;
 649	ssize_t l;
 650
 651	/* hugetlbfs does not support write */
 652	if (hugetlbfs_test)
 653		return;
 654
 655	buf = malloc(mfd_def_size * 8);
 656	if (!buf) {
 657		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
 658		abort();
 659	}
 660
 661	l = pwrite(fd, buf, mfd_def_size * 8, 0);
 662	if (l != (mfd_def_size * 8)) {
 663		printf("pwrite() failed: %m\n");
 664		abort();
 665	}
 666
 667	mfd_assert_size(fd, mfd_def_size * 8);
 668}
 669
 670static void mfd_fail_grow_write(int fd)
 671{
 672	static char *buf;
 673	ssize_t l;
 674
 675	/* hugetlbfs does not support write */
 676	if (hugetlbfs_test)
 677		return;
 678
 679	buf = malloc(mfd_def_size * 8);
 680	if (!buf) {
 681		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
 682		abort();
 683	}
 684
 685	l = pwrite(fd, buf, mfd_def_size * 8, 0);
 686	if (l == (mfd_def_size * 8)) {
 687		printf("pwrite() didn't fail as expected\n");
 688		abort();
 689	}
 690}
 691
 692static void mfd_assert_mode(int fd, int mode)
 693{
 694	struct stat st;
 695	char buf[PATH_MAX];
 
 696
 697	fd2name(fd, buf, PATH_MAX);
 698
 699	if (fstat(fd, &st) < 0) {
 700		printf("fstat(%s) failed: %m\n", buf);
 701		abort();
 702	}
 703
 704	if ((st.st_mode & 07777) != mode) {
 705		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
 706		       buf, (int)st.st_mode & 07777, mode);
 707		abort();
 708	}
 709}
 710
 711static void mfd_assert_chmod(int fd, int mode)
 712{
 713	char buf[PATH_MAX];
 
 714
 715	fd2name(fd, buf, PATH_MAX);
 716
 717	if (fchmod(fd, mode) < 0) {
 718		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
 719		abort();
 720	}
 721
 722	mfd_assert_mode(fd, mode);
 723}
 724
 725static void mfd_fail_chmod(int fd, int mode)
 726{
 727	struct stat st;
 728	char buf[PATH_MAX];
 
 729
 730	fd2name(fd, buf, PATH_MAX);
 731
 732	if (fstat(fd, &st) < 0) {
 733		printf("fstat(%s) failed: %m\n", buf);
 734		abort();
 735	}
 736
 737	if (fchmod(fd, mode) == 0) {
 738		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
 739		       buf, mode);
 740		abort();
 741	}
 742
 743	/* verify that file mode bits did not change */
 744	mfd_assert_mode(fd, st.st_mode & 07777);
 745}
 746
 747static int idle_thread_fn(void *arg)
 748{
 749	sigset_t set;
 750	int sig;
 751
 752	/* dummy waiter; SIGTERM terminates us anyway */
 753	sigemptyset(&set);
 754	sigaddset(&set, SIGTERM);
 755	sigwait(&set, &sig);
 756
 757	return 0;
 758}
 759
 760static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
 761{
 762	uint8_t *stack;
 763	pid_t pid;
 764
 765	stack = malloc(STACK_SIZE);
 766	if (!stack) {
 767		printf("malloc(STACK_SIZE) failed: %m\n");
 768		abort();
 769	}
 770
 771	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
 772	if (pid < 0) {
 773		printf("clone() failed: %m\n");
 774		abort();
 775	}
 776
 777	return pid;
 778}
 779
 780static void join_thread(pid_t pid)
 781{
 782	int wstatus;
 783
 784	if (waitpid(pid, &wstatus, 0) < 0) {
 785		printf("newpid thread: waitpid() failed: %m\n");
 786		abort();
 787	}
 788
 789	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
 790		printf("newpid thread: exited with non-zero error code %d\n",
 791		       WEXITSTATUS(wstatus));
 792		abort();
 793	}
 794
 795	if (WIFSIGNALED(wstatus)) {
 796		printf("newpid thread: killed by signal %d\n",
 797		       WTERMSIG(wstatus));
 798		abort();
 799	}
 800}
 801
 802static pid_t spawn_idle_thread(unsigned int flags)
 803{
 804	return spawn_thread(flags, idle_thread_fn, NULL);
 805}
 806
 807static void join_idle_thread(pid_t pid)
 808{
 809	kill(pid, SIGTERM);
 810	waitpid(pid, NULL, 0);
 811}
 812
 813/*
 814 * Test memfd_create() syscall
 815 * Verify syscall-argument validation, including name checks, flag validation
 816 * and more.
 817 */
 818static void test_create(void)
 819{
 820	char buf[2048];
 821	int fd;
 822
 823	printf("%s CREATE\n", memfd_str);
 824
 825	/* test NULL name */
 826	mfd_fail_new(NULL, 0);
 827
 828	/* test over-long name (not zero-terminated) */
 829	memset(buf, 0xff, sizeof(buf));
 830	mfd_fail_new(buf, 0);
 831
 832	/* test over-long zero-terminated name */
 833	memset(buf, 0xff, sizeof(buf));
 834	buf[sizeof(buf) - 1] = 0;
 835	mfd_fail_new(buf, 0);
 836
 837	/* verify "" is a valid name */
 838	fd = mfd_assert_new("", 0, 0);
 839	close(fd);
 840
 841	/* verify invalid O_* open flags */
 842	mfd_fail_new("", 0x0100);
 843	mfd_fail_new("", ~MFD_CLOEXEC);
 844	mfd_fail_new("", ~MFD_ALLOW_SEALING);
 845	mfd_fail_new("", ~0);
 846	mfd_fail_new("", 0x80000000U);
 847
 848	/* verify EXEC and NOEXEC_SEAL can't both be set */
 849	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
 850
 851	/* verify MFD_CLOEXEC is allowed */
 852	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
 853	close(fd);
 854
 855	/* verify MFD_ALLOW_SEALING is allowed */
 856	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
 857	close(fd);
 858
 859	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
 860	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
 861	close(fd);
 862}
 863
 864/*
 865 * Test basic sealing
 866 * A very basic sealing test to see whether setting/retrieving seals works.
 867 */
 868static void test_basic(void)
 869{
 870	int fd;
 871
 872	printf("%s BASIC\n", memfd_str);
 873
 874	fd = mfd_assert_new("kern_memfd_basic",
 875			    mfd_def_size,
 876			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 877
 878	/* add basic seals */
 879	mfd_assert_has_seals(fd, 0);
 880	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
 881				 F_SEAL_WRITE);
 882	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
 883				 F_SEAL_WRITE);
 884
 885	/* add them again */
 886	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
 887				 F_SEAL_WRITE);
 888	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
 889				 F_SEAL_WRITE);
 890
 891	/* add more seals and seal against sealing */
 892	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
 893	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
 894				 F_SEAL_GROW |
 895				 F_SEAL_WRITE |
 896				 F_SEAL_SEAL);
 897
 898	/* verify that sealing no longer works */
 899	mfd_fail_add_seals(fd, F_SEAL_GROW);
 900	mfd_fail_add_seals(fd, 0);
 901
 902	close(fd);
 903
 904	/* verify sealing does not work without MFD_ALLOW_SEALING */
 905	fd = mfd_assert_new("kern_memfd_basic",
 906			    mfd_def_size,
 907			    MFD_CLOEXEC);
 908	mfd_assert_has_seals(fd, F_SEAL_SEAL);
 909	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
 910			       F_SEAL_GROW |
 911			       F_SEAL_WRITE);
 912	mfd_assert_has_seals(fd, F_SEAL_SEAL);
 913	close(fd);
 914}
 915
 916/*
 917 * Test SEAL_WRITE
 918 * Test whether SEAL_WRITE actually prevents modifications.
 919 */
 920static void test_seal_write(void)
 921{
 922	int fd;
 923
 924	printf("%s SEAL-WRITE\n", memfd_str);
 925
 926	fd = mfd_assert_new("kern_memfd_seal_write",
 927			    mfd_def_size,
 928			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 929	mfd_assert_has_seals(fd, 0);
 930	mfd_assert_add_seals(fd, F_SEAL_WRITE);
 931	mfd_assert_has_seals(fd, F_SEAL_WRITE);
 932
 933	mfd_assert_read(fd);
 934	mfd_fail_write(fd);
 935	mfd_assert_shrink(fd);
 936	mfd_assert_grow(fd);
 937	mfd_fail_grow_write(fd);
 938
 939	close(fd);
 940}
 941
 942/*
 943 * Test SEAL_FUTURE_WRITE
 944 * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
 945 */
 946static void test_seal_future_write(void)
 947{
 948	int fd, fd2;
 949	void *p;
 950
 951	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
 952
 953	fd = mfd_assert_new("kern_memfd_seal_future_write",
 954			    mfd_def_size,
 955			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 956
 957	p = mfd_assert_mmap_shared(fd);
 958
 959	mfd_assert_has_seals(fd, 0);
 960
 961	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
 962	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
 963
 964	/* read should pass, writes should fail */
 965	mfd_assert_read(fd);
 966	mfd_assert_read_shared(fd);
 967	mfd_fail_write(fd);
 968
 969	fd2 = mfd_assert_reopen_fd(fd);
 970	/* read should pass, writes should still fail */
 971	mfd_assert_read(fd2);
 972	mfd_assert_read_shared(fd2);
 973	mfd_fail_write(fd2);
 974
 975	mfd_assert_fork_private_write(fd);
 976
 977	munmap(p, mfd_def_size);
 978	close(fd2);
 979	close(fd);
 980}
 981
 982/*
 983 * Test SEAL_SHRINK
 984 * Test whether SEAL_SHRINK actually prevents shrinking
 985 */
 986static void test_seal_shrink(void)
 987{
 988	int fd;
 989
 990	printf("%s SEAL-SHRINK\n", memfd_str);
 991
 992	fd = mfd_assert_new("kern_memfd_seal_shrink",
 993			    mfd_def_size,
 994			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 995	mfd_assert_has_seals(fd, 0);
 996	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
 997	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
 998
 999	mfd_assert_read(fd);
1000	mfd_assert_write(fd);
1001	mfd_fail_shrink(fd);
1002	mfd_assert_grow(fd);
1003	mfd_assert_grow_write(fd);
1004
1005	close(fd);
1006}
1007
1008/*
1009 * Test SEAL_GROW
1010 * Test whether SEAL_GROW actually prevents growing
1011 */
1012static void test_seal_grow(void)
1013{
1014	int fd;
1015
1016	printf("%s SEAL-GROW\n", memfd_str);
1017
1018	fd = mfd_assert_new("kern_memfd_seal_grow",
1019			    mfd_def_size,
1020			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1021	mfd_assert_has_seals(fd, 0);
1022	mfd_assert_add_seals(fd, F_SEAL_GROW);
1023	mfd_assert_has_seals(fd, F_SEAL_GROW);
1024
1025	mfd_assert_read(fd);
1026	mfd_assert_write(fd);
1027	mfd_assert_shrink(fd);
1028	mfd_fail_grow(fd);
1029	mfd_fail_grow_write(fd);
1030
1031	close(fd);
1032}
1033
1034/*
1035 * Test SEAL_SHRINK | SEAL_GROW
1036 * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1037 */
1038static void test_seal_resize(void)
1039{
1040	int fd;
1041
1042	printf("%s SEAL-RESIZE\n", memfd_str);
1043
1044	fd = mfd_assert_new("kern_memfd_seal_resize",
1045			    mfd_def_size,
1046			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1047	mfd_assert_has_seals(fd, 0);
1048	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1049	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1050
1051	mfd_assert_read(fd);
1052	mfd_assert_write(fd);
1053	mfd_fail_shrink(fd);
1054	mfd_fail_grow(fd);
1055	mfd_fail_grow_write(fd);
1056
1057	close(fd);
1058}
1059
1060/*
1061 * Test SEAL_EXEC
1062 * Test fd is created with exec and allow sealing.
1063 * chmod() cannot change x bits after sealing.
1064 */
1065static void test_exec_seal(void)
1066{
1067	int fd;
1068
1069	printf("%s SEAL-EXEC\n", memfd_str);
1070
1071	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1072	fd = mfd_assert_new("kern_memfd_seal_exec",
1073			    mfd_def_size,
1074			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1075
1076	mfd_assert_mode(fd, 0777);
1077	mfd_assert_chmod(fd, 0644);
1078
1079	mfd_assert_has_seals(fd, 0);
1080	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1081	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1082
1083	mfd_assert_chmod(fd, 0600);
1084	mfd_fail_chmod(fd, 0777);
1085	mfd_fail_chmod(fd, 0670);
1086	mfd_fail_chmod(fd, 0605);
1087	mfd_fail_chmod(fd, 0700);
1088	mfd_fail_chmod(fd, 0100);
1089	mfd_assert_chmod(fd, 0666);
1090	mfd_assert_write(fd);
1091	close(fd);
1092
1093	printf("%s	Apply ALL_SEALS\n", memfd_str);
1094	fd = mfd_assert_new("kern_memfd_seal_exec",
1095			    mfd_def_size,
1096			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1097
1098	mfd_assert_mode(fd, 0777);
1099	mfd_assert_chmod(fd, 0700);
1100
1101	mfd_assert_has_seals(fd, 0);
1102	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1103	mfd_assert_has_seals(fd, F_WX_SEALS);
1104
1105	mfd_fail_chmod(fd, 0711);
1106	mfd_fail_chmod(fd, 0600);
1107	mfd_fail_write(fd);
1108	close(fd);
1109}
1110
1111/*
1112 * Test EXEC_NO_SEAL
1113 * Test fd is created with exec and not allow sealing.
1114 */
1115static void test_exec_no_seal(void)
1116{
1117	int fd;
1118
1119	printf("%s EXEC_NO_SEAL\n", memfd_str);
1120
1121	/* Create with EXEC but without ALLOW_SEALING */
1122	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1123			    mfd_def_size,
1124			    MFD_CLOEXEC | MFD_EXEC);
1125	mfd_assert_mode(fd, 0777);
1126	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1127	mfd_assert_chmod(fd, 0666);
1128	close(fd);
1129}
1130
1131/*
1132 * Test memfd_create with MFD_NOEXEC flag
1133 */
1134static void test_noexec_seal(void)
1135{
1136	int fd;
1137
1138	printf("%s NOEXEC_SEAL\n", memfd_str);
1139
1140	/* Create with NOEXEC and ALLOW_SEALING */
1141	fd = mfd_assert_new("kern_memfd_noexec",
1142			    mfd_def_size,
1143			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1144	mfd_assert_mode(fd, 0666);
1145	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1146	mfd_fail_chmod(fd, 0777);
1147	close(fd);
1148
1149	/* Create with NOEXEC but without ALLOW_SEALING */
1150	fd = mfd_assert_new("kern_memfd_noexec",
1151			    mfd_def_size,
1152			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1153	mfd_assert_mode(fd, 0666);
1154	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1155	mfd_fail_chmod(fd, 0777);
1156	close(fd);
1157}
1158
1159static void test_sysctl_sysctl0(void)
1160{
1161	int fd;
1162
1163	sysctl_assert_equal("0");
1164
1165	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1166			    mfd_def_size,
1167			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1168	mfd_assert_mode(fd, 0777);
1169	mfd_assert_has_seals(fd, 0);
1170	mfd_assert_chmod(fd, 0644);
1171	close(fd);
1172}
1173
1174static void test_sysctl_set_sysctl0(void)
1175{
1176	sysctl_assert_write("0");
1177	test_sysctl_sysctl0();
1178}
1179
1180static void test_sysctl_sysctl1(void)
1181{
1182	int fd;
1183
1184	sysctl_assert_equal("1");
1185
1186	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1187			    mfd_def_size,
1188			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1189	mfd_assert_mode(fd, 0666);
1190	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1191	mfd_fail_chmod(fd, 0777);
1192	close(fd);
1193
1194	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1195			    mfd_def_size,
1196			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1197	mfd_assert_mode(fd, 0777);
1198	mfd_assert_has_seals(fd, 0);
1199	mfd_assert_chmod(fd, 0644);
1200	close(fd);
1201
1202	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1203			    mfd_def_size,
1204			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1205	mfd_assert_mode(fd, 0666);
1206	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1207	mfd_fail_chmod(fd, 0777);
1208	close(fd);
1209}
1210
1211static void test_sysctl_set_sysctl1(void)
1212{
1213	sysctl_assert_write("1");
1214	test_sysctl_sysctl1();
1215}
1216
1217static void test_sysctl_sysctl2(void)
1218{
1219	int fd;
1220
1221	sysctl_assert_equal("2");
1222
1223	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1224			    mfd_def_size,
1225			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1226	mfd_assert_mode(fd, 0666);
1227	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1228	mfd_fail_chmod(fd, 0777);
1229	close(fd);
1230
1231	mfd_fail_new("kern_memfd_sysctl_2_exec",
1232		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1233
1234	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1235			    mfd_def_size,
1236			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1237	mfd_assert_mode(fd, 0666);
1238	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1239	mfd_fail_chmod(fd, 0777);
1240	close(fd);
1241}
1242
1243static void test_sysctl_set_sysctl2(void)
1244{
1245	sysctl_assert_write("2");
1246	test_sysctl_sysctl2();
1247}
1248
1249static int sysctl_simple_child(void *arg)
1250{
 
 
 
1251	printf("%s sysctl 0\n", memfd_str);
1252	test_sysctl_set_sysctl0();
1253
1254	printf("%s sysctl 1\n", memfd_str);
1255	test_sysctl_set_sysctl1();
1256
1257	printf("%s sysctl 0\n", memfd_str);
1258	test_sysctl_set_sysctl0();
1259
1260	printf("%s sysctl 2\n", memfd_str);
1261	test_sysctl_set_sysctl2();
1262
1263	printf("%s sysctl 1\n", memfd_str);
1264	test_sysctl_set_sysctl1();
1265
1266	printf("%s sysctl 0\n", memfd_str);
1267	test_sysctl_set_sysctl0();
1268
1269	return 0;
1270}
1271
1272/*
1273 * Test sysctl
1274 * A very basic test to make sure the core sysctl semantics work.
1275 */
1276static void test_sysctl_simple(void)
1277{
1278	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1279
1280	join_thread(pid);
1281}
1282
1283static int sysctl_nested(void *arg)
1284{
1285	void (*fn)(void) = arg;
1286
1287	fn();
1288	return 0;
1289}
1290
1291static int sysctl_nested_wait(void *arg)
1292{
1293	/* Wait for a SIGCONT. */
1294	kill(getpid(), SIGSTOP);
1295	return sysctl_nested(arg);
1296}
1297
1298static void test_sysctl_sysctl1_failset(void)
1299{
1300	sysctl_fail_write("0");
1301	test_sysctl_sysctl1();
1302}
1303
1304static void test_sysctl_sysctl2_failset(void)
1305{
1306	sysctl_fail_write("1");
1307	test_sysctl_sysctl2();
1308
1309	sysctl_fail_write("0");
1310	test_sysctl_sysctl2();
1311}
1312
1313static int sysctl_nested_child(void *arg)
1314{
 
1315	int pid;
1316
1317	printf("%s nested sysctl 0\n", memfd_str);
1318	sysctl_assert_write("0");
1319	/* A further nested pidns works the same. */
1320	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1321	join_thread(pid);
1322
1323	printf("%s nested sysctl 1\n", memfd_str);
1324	sysctl_assert_write("1");
1325	/* Child inherits our setting. */
1326	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1327	join_thread(pid);
1328	/* Child cannot raise the setting. */
1329	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1330			   test_sysctl_sysctl1_failset);
1331	join_thread(pid);
1332	/* Child can lower the setting. */
1333	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1334			   test_sysctl_set_sysctl2);
1335	join_thread(pid);
1336	/* Child lowering the setting has no effect on our setting. */
1337	test_sysctl_sysctl1();
1338
1339	printf("%s nested sysctl 2\n", memfd_str);
1340	sysctl_assert_write("2");
1341	/* Child inherits our setting. */
1342	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1343	join_thread(pid);
1344	/* Child cannot raise the setting. */
1345	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1346			   test_sysctl_sysctl2_failset);
1347	join_thread(pid);
1348
1349	/* Verify that the rules are actually inherited after fork. */
1350	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1351	sysctl_assert_write("0");
1352
1353	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1354			   test_sysctl_sysctl1_failset);
1355	sysctl_assert_write("1");
1356	kill(pid, SIGCONT);
1357	join_thread(pid);
1358
1359	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1360	sysctl_assert_write("0");
1361
1362	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1363			   test_sysctl_sysctl2_failset);
1364	sysctl_assert_write("2");
1365	kill(pid, SIGCONT);
1366	join_thread(pid);
1367
1368	/*
1369	 * Verify that the current effective setting is saved on fork, meaning
1370	 * that the parent lowering the sysctl doesn't affect already-forked
1371	 * children.
1372	 */
1373	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1374	sysctl_assert_write("2");
1375	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1376			   test_sysctl_sysctl2);
1377	sysctl_assert_write("1");
1378	kill(pid, SIGCONT);
1379	join_thread(pid);
1380
1381	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1382	sysctl_assert_write("2");
1383	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1384			   test_sysctl_sysctl2);
1385	sysctl_assert_write("0");
1386	kill(pid, SIGCONT);
1387	join_thread(pid);
1388
1389	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1390	sysctl_assert_write("1");
1391	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1392			   test_sysctl_sysctl1);
1393	sysctl_assert_write("0");
1394	kill(pid, SIGCONT);
1395	join_thread(pid);
1396
1397	return 0;
1398}
1399
1400/*
1401 * Test sysctl with nested pid namespaces
1402 * Make sure that the sysctl nesting semantics work correctly.
1403 */
1404static void test_sysctl_nested(void)
1405{
1406	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1407
1408	join_thread(pid);
1409}
1410
1411/*
1412 * Test sharing via dup()
1413 * Test that seals are shared between dupped FDs and they're all equal.
1414 */
1415static void test_share_dup(char *banner, char *b_suffix)
1416{
1417	int fd, fd2;
1418
1419	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1420
1421	fd = mfd_assert_new("kern_memfd_share_dup",
1422			    mfd_def_size,
1423			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1424	mfd_assert_has_seals(fd, 0);
1425
1426	fd2 = mfd_assert_dup(fd);
1427	mfd_assert_has_seals(fd2, 0);
1428
1429	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1430	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1431	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1432
1433	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1434	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1435	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1436
1437	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1438	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1439	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1440
1441	mfd_fail_add_seals(fd, F_SEAL_GROW);
1442	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1443	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1444	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1445
1446	close(fd2);
1447
1448	mfd_fail_add_seals(fd, F_SEAL_GROW);
1449	close(fd);
1450}
1451
1452/*
1453 * Test sealing with active mmap()s
1454 * Modifying seals is only allowed if no other mmap() refs exist.
1455 */
1456static void test_share_mmap(char *banner, char *b_suffix)
1457{
1458	int fd;
1459	void *p;
1460
1461	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1462
1463	fd = mfd_assert_new("kern_memfd_share_mmap",
1464			    mfd_def_size,
1465			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1466	mfd_assert_has_seals(fd, 0);
1467
1468	/* shared/writable ref prevents sealing WRITE, but allows others */
1469	p = mfd_assert_mmap_shared(fd);
1470	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1471	mfd_assert_has_seals(fd, 0);
1472	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1473	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1474	munmap(p, mfd_def_size);
1475
1476	/* readable ref allows sealing */
1477	p = mfd_assert_mmap_private(fd);
1478	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1479	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1480	munmap(p, mfd_def_size);
1481
1482	close(fd);
1483}
1484
1485/*
1486 * Test sealing with open(/proc/self/fd/%d)
1487 * Via /proc we can get access to a separate file-context for the same memfd.
1488 * This is *not* like dup(), but like a real separate open(). Make sure the
1489 * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1490 */
1491static void test_share_open(char *banner, char *b_suffix)
1492{
1493	int fd, fd2;
1494
1495	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1496
1497	fd = mfd_assert_new("kern_memfd_share_open",
1498			    mfd_def_size,
1499			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1500	mfd_assert_has_seals(fd, 0);
1501
1502	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1503	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1504	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1505	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1506
1507	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1508	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1509	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1510
1511	close(fd);
1512	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1513
1514	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1515	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1516	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1517
1518	close(fd2);
1519	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1520
1521	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1522	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1523	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1524
1525	close(fd2);
1526	close(fd);
1527}
1528
1529/*
1530 * Test sharing via fork()
1531 * Test whether seal-modifications work as expected with forked childs.
1532 */
1533static void test_share_fork(char *banner, char *b_suffix)
1534{
1535	int fd;
1536	pid_t pid;
1537
1538	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1539
1540	fd = mfd_assert_new("kern_memfd_share_fork",
1541			    mfd_def_size,
1542			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1543	mfd_assert_has_seals(fd, 0);
1544
1545	pid = spawn_idle_thread(0);
1546	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1547	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1548
1549	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1550	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1551
1552	join_idle_thread(pid);
1553
1554	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1555	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1556
1557	close(fd);
1558}
1559
1560int main(int argc, char **argv)
1561{
1562	pid_t pid;
1563
1564	if (argc == 2) {
1565		if (!strcmp(argv[1], "hugetlbfs")) {
1566			unsigned long hpage_size = default_huge_page_size();
1567
1568			if (!hpage_size) {
1569				printf("Unable to determine huge page size\n");
1570				abort();
1571			}
1572
1573			hugetlbfs_test = 1;
1574			memfd_str = MEMFD_HUGE_STR;
1575			mfd_def_size = hpage_size * 2;
1576		} else {
1577			printf("Unknown option: %s\n", argv[1]);
1578			abort();
1579		}
1580	}
1581
1582	test_create();
1583	test_basic();
1584	test_exec_seal();
1585	test_exec_no_seal();
1586	test_noexec_seal();
1587
1588	test_seal_write();
1589	test_seal_future_write();
1590	test_seal_shrink();
1591	test_seal_grow();
1592	test_seal_resize();
1593
1594	test_sysctl_simple();
1595	test_sysctl_nested();
1596
1597	test_share_dup("SHARE-DUP", "");
1598	test_share_mmap("SHARE-MMAP", "");
1599	test_share_open("SHARE-OPEN", "");
1600	test_share_fork("SHARE-FORK", "");
1601
1602	/* Run test-suite in a multi-threaded environment with a shared
1603	 * file-table. */
1604	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1605	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1606	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1607	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1608	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1609	join_idle_thread(pid);
1610
1611	printf("memfd: DONE\n");
1612
1613	return 0;
1614}
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2#define _GNU_SOURCE
   3#define __EXPORTED_HEADERS__
   4
   5#include <errno.h>
   6#include <inttypes.h>
   7#include <limits.h>
   8#include <linux/falloc.h>
   9#include <fcntl.h>
  10#include <linux/memfd.h>
  11#include <sched.h>
  12#include <stdio.h>
  13#include <stdlib.h>
  14#include <signal.h>
  15#include <string.h>
  16#include <sys/mman.h>
  17#include <sys/stat.h>
  18#include <sys/syscall.h>
  19#include <sys/wait.h>
  20#include <unistd.h>
  21#include <ctype.h>
  22
  23#include "common.h"
  24
  25#define MEMFD_STR	"memfd:"
  26#define MEMFD_HUGE_STR	"memfd-hugetlb:"
  27#define SHARED_FT_STR	"(shared file-table)"
  28
  29#define MFD_DEF_SIZE 8192
  30#define STACK_SIZE 65536
  31
  32#define F_SEAL_EXEC	0x0020
  33
  34#define F_WX_SEALS (F_SEAL_SHRINK | \
  35		    F_SEAL_GROW | \
  36		    F_SEAL_WRITE | \
  37		    F_SEAL_FUTURE_WRITE | \
  38		    F_SEAL_EXEC)
  39
  40#define MFD_NOEXEC_SEAL	0x0008U
  41
  42/*
  43 * Default is not to test hugetlbfs
  44 */
  45static size_t mfd_def_size = MFD_DEF_SIZE;
  46static const char *memfd_str = MEMFD_STR;
  47static int newpid_thread_fn2(void *arg);
  48static void join_newpid_thread(pid_t pid);
  49
  50static ssize_t fd2name(int fd, char *buf, size_t bufsize)
  51{
  52	char buf1[PATH_MAX];
  53	int size;
  54	ssize_t nbytes;
  55
  56	size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
  57	if (size < 0) {
  58		printf("snprintf(%d) failed on %m\n", fd);
  59		abort();
  60	}
  61
  62	/*
  63	 * reserver one byte for string termination.
  64	 */
  65	nbytes = readlink(buf1, buf, bufsize-1);
  66	if (nbytes == -1) {
  67		printf("readlink(%s) failed %m\n", buf1);
  68		abort();
  69	}
  70	buf[nbytes] = '\0';
  71	return nbytes;
  72}
  73
  74static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
  75{
  76	int r, fd;
  77
  78	fd = sys_memfd_create(name, flags);
  79	if (fd < 0) {
  80		printf("memfd_create(\"%s\", %u) failed: %m\n",
  81		       name, flags);
  82		abort();
  83	}
  84
  85	r = ftruncate(fd, sz);
  86	if (r < 0) {
  87		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
  88		abort();
  89	}
  90
  91	return fd;
  92}
  93
  94static void sysctl_assert_write(const char *val)
  95{
  96	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
  97
  98	if (fd < 0) {
  99		printf("open sysctl failed: %m\n");
 100		abort();
 101	}
 102
 103	if (write(fd, val, strlen(val)) < 0) {
 104		printf("write sysctl %s failed: %m\n", val);
 105		abort();
 106	}
 107}
 108
 109static void sysctl_fail_write(const char *val)
 110{
 111	int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
 112
 113	if (fd < 0) {
 114		printf("open sysctl failed: %m\n");
 115		abort();
 116	}
 117
 118	if (write(fd, val, strlen(val)) >= 0) {
 119		printf("write sysctl %s succeeded, but failure expected\n",
 120				val);
 121		abort();
 122	}
 123}
 124
 125static void sysctl_assert_equal(const char *val)
 126{
 127	char *p, buf[128] = {};
 128	int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
 129
 130	if (fd < 0) {
 131		printf("open sysctl failed: %m\n");
 132		abort();
 133	}
 134
 135	if (read(fd, buf, sizeof(buf)) < 0) {
 136		printf("read sysctl failed: %m\n");
 137		abort();
 138	}
 139
 140	/* Strip trailing whitespace. */
 141	p = buf;
 142	while (!isspace(*p))
 143		p++;
 144	*p = '\0';
 145
 146	if (strcmp(buf, val) != 0) {
 147		printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
 148		abort();
 149	}
 150}
 151
 152static int mfd_assert_reopen_fd(int fd_in)
 153{
 154	int fd;
 155	char path[100];
 156
 157	sprintf(path, "/proc/self/fd/%d", fd_in);
 158
 159	fd = open(path, O_RDWR);
 160	if (fd < 0) {
 161		printf("re-open of existing fd %d failed\n", fd_in);
 162		abort();
 163	}
 164
 165	return fd;
 166}
 167
 168static void mfd_fail_new(const char *name, unsigned int flags)
 169{
 170	int r;
 171
 172	r = sys_memfd_create(name, flags);
 173	if (r >= 0) {
 174		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
 175		       name, flags);
 176		close(r);
 177		abort();
 178	}
 179}
 180
 181static unsigned int mfd_assert_get_seals(int fd)
 182{
 183	int r;
 184
 185	r = fcntl(fd, F_GET_SEALS);
 186	if (r < 0) {
 187		printf("GET_SEALS(%d) failed: %m\n", fd);
 188		abort();
 189	}
 190
 191	return (unsigned int)r;
 192}
 193
 194static void mfd_assert_has_seals(int fd, unsigned int seals)
 195{
 196	char buf[PATH_MAX];
 197	int nbytes;
 198	unsigned int s;
 199	fd2name(fd, buf, PATH_MAX);
 200
 201	s = mfd_assert_get_seals(fd);
 202	if (s != seals) {
 203		printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
 204		abort();
 205	}
 206}
 207
 208static void mfd_assert_add_seals(int fd, unsigned int seals)
 209{
 210	int r;
 211	unsigned int s;
 212
 213	s = mfd_assert_get_seals(fd);
 214	r = fcntl(fd, F_ADD_SEALS, seals);
 215	if (r < 0) {
 216		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
 217		abort();
 218	}
 219}
 220
 221static void mfd_fail_add_seals(int fd, unsigned int seals)
 222{
 223	int r;
 224	unsigned int s;
 225
 226	r = fcntl(fd, F_GET_SEALS);
 227	if (r < 0)
 228		s = 0;
 229	else
 230		s = (unsigned int)r;
 231
 232	r = fcntl(fd, F_ADD_SEALS, seals);
 233	if (r >= 0) {
 234		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
 235				fd, s, seals);
 236		abort();
 237	}
 238}
 239
 240static void mfd_assert_size(int fd, size_t size)
 241{
 242	struct stat st;
 243	int r;
 244
 245	r = fstat(fd, &st);
 246	if (r < 0) {
 247		printf("fstat(%d) failed: %m\n", fd);
 248		abort();
 249	} else if (st.st_size != size) {
 250		printf("wrong file size %lld, but expected %lld\n",
 251		       (long long)st.st_size, (long long)size);
 252		abort();
 253	}
 254}
 255
 256static int mfd_assert_dup(int fd)
 257{
 258	int r;
 259
 260	r = dup(fd);
 261	if (r < 0) {
 262		printf("dup(%d) failed: %m\n", fd);
 263		abort();
 264	}
 265
 266	return r;
 267}
 268
 269static void *mfd_assert_mmap_shared(int fd)
 270{
 271	void *p;
 272
 273	p = mmap(NULL,
 274		 mfd_def_size,
 275		 PROT_READ | PROT_WRITE,
 276		 MAP_SHARED,
 277		 fd,
 278		 0);
 279	if (p == MAP_FAILED) {
 280		printf("mmap() failed: %m\n");
 281		abort();
 282	}
 283
 284	return p;
 285}
 286
 287static void *mfd_assert_mmap_private(int fd)
 288{
 289	void *p;
 290
 291	p = mmap(NULL,
 292		 mfd_def_size,
 293		 PROT_READ,
 294		 MAP_PRIVATE,
 295		 fd,
 296		 0);
 297	if (p == MAP_FAILED) {
 298		printf("mmap() failed: %m\n");
 299		abort();
 300	}
 301
 302	return p;
 303}
 304
 305static int mfd_assert_open(int fd, int flags, mode_t mode)
 306{
 307	char buf[512];
 308	int r;
 309
 310	sprintf(buf, "/proc/self/fd/%d", fd);
 311	r = open(buf, flags, mode);
 312	if (r < 0) {
 313		printf("open(%s) failed: %m\n", buf);
 314		abort();
 315	}
 316
 317	return r;
 318}
 319
 320static void mfd_fail_open(int fd, int flags, mode_t mode)
 321{
 322	char buf[512];
 323	int r;
 324
 325	sprintf(buf, "/proc/self/fd/%d", fd);
 326	r = open(buf, flags, mode);
 327	if (r >= 0) {
 328		printf("open(%s) didn't fail as expected\n", buf);
 329		abort();
 330	}
 331}
 332
 333static void mfd_assert_read(int fd)
 334{
 335	char buf[16];
 336	void *p;
 337	ssize_t l;
 338
 339	l = read(fd, buf, sizeof(buf));
 340	if (l != sizeof(buf)) {
 341		printf("read() failed: %m\n");
 342		abort();
 343	}
 344
 345	/* verify PROT_READ *is* allowed */
 346	p = mmap(NULL,
 347		 mfd_def_size,
 348		 PROT_READ,
 349		 MAP_PRIVATE,
 350		 fd,
 351		 0);
 352	if (p == MAP_FAILED) {
 353		printf("mmap() failed: %m\n");
 354		abort();
 355	}
 356	munmap(p, mfd_def_size);
 357
 358	/* verify MAP_PRIVATE is *always* allowed (even writable) */
 359	p = mmap(NULL,
 360		 mfd_def_size,
 361		 PROT_READ | PROT_WRITE,
 362		 MAP_PRIVATE,
 363		 fd,
 364		 0);
 365	if (p == MAP_FAILED) {
 366		printf("mmap() failed: %m\n");
 367		abort();
 368	}
 369	munmap(p, mfd_def_size);
 370}
 371
 372/* Test that PROT_READ + MAP_SHARED mappings work. */
 373static void mfd_assert_read_shared(int fd)
 374{
 375	void *p;
 376
 377	/* verify PROT_READ and MAP_SHARED *is* allowed */
 378	p = mmap(NULL,
 379		 mfd_def_size,
 380		 PROT_READ,
 381		 MAP_SHARED,
 382		 fd,
 383		 0);
 384	if (p == MAP_FAILED) {
 385		printf("mmap() failed: %m\n");
 386		abort();
 387	}
 388	munmap(p, mfd_def_size);
 389}
 390
 391static void mfd_assert_fork_private_write(int fd)
 392{
 393	int *p;
 394	pid_t pid;
 395
 396	p = mmap(NULL,
 397		 mfd_def_size,
 398		 PROT_READ | PROT_WRITE,
 399		 MAP_PRIVATE,
 400		 fd,
 401		 0);
 402	if (p == MAP_FAILED) {
 403		printf("mmap() failed: %m\n");
 404		abort();
 405	}
 406
 407	p[0] = 22;
 408
 409	pid = fork();
 410	if (pid == 0) {
 411		p[0] = 33;
 412		exit(0);
 413	} else {
 414		waitpid(pid, NULL, 0);
 415
 416		if (p[0] != 22) {
 417			printf("MAP_PRIVATE copy-on-write failed: %m\n");
 418			abort();
 419		}
 420	}
 421
 422	munmap(p, mfd_def_size);
 423}
 424
 425static void mfd_assert_write(int fd)
 426{
 427	ssize_t l;
 428	void *p;
 429	int r;
 430
 431	/*
 432	 * huegtlbfs does not support write, but we want to
 433	 * verify everything else here.
 434	 */
 435	if (!hugetlbfs_test) {
 436		/* verify write() succeeds */
 437		l = write(fd, "\0\0\0\0", 4);
 438		if (l != 4) {
 439			printf("write() failed: %m\n");
 440			abort();
 441		}
 442	}
 443
 444	/* verify PROT_READ | PROT_WRITE is allowed */
 445	p = mmap(NULL,
 446		 mfd_def_size,
 447		 PROT_READ | PROT_WRITE,
 448		 MAP_SHARED,
 449		 fd,
 450		 0);
 451	if (p == MAP_FAILED) {
 452		printf("mmap() failed: %m\n");
 453		abort();
 454	}
 455	*(char *)p = 0;
 456	munmap(p, mfd_def_size);
 457
 458	/* verify PROT_WRITE is allowed */
 459	p = mmap(NULL,
 460		 mfd_def_size,
 461		 PROT_WRITE,
 462		 MAP_SHARED,
 463		 fd,
 464		 0);
 465	if (p == MAP_FAILED) {
 466		printf("mmap() failed: %m\n");
 467		abort();
 468	}
 469	*(char *)p = 0;
 470	munmap(p, mfd_def_size);
 471
 472	/* verify PROT_READ with MAP_SHARED is allowed and a following
 473	 * mprotect(PROT_WRITE) allows writing */
 474	p = mmap(NULL,
 475		 mfd_def_size,
 476		 PROT_READ,
 477		 MAP_SHARED,
 478		 fd,
 479		 0);
 480	if (p == MAP_FAILED) {
 481		printf("mmap() failed: %m\n");
 482		abort();
 483	}
 484
 485	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
 486	if (r < 0) {
 487		printf("mprotect() failed: %m\n");
 488		abort();
 489	}
 490
 491	*(char *)p = 0;
 492	munmap(p, mfd_def_size);
 493
 494	/* verify PUNCH_HOLE works */
 495	r = fallocate(fd,
 496		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 497		      0,
 498		      mfd_def_size);
 499	if (r < 0) {
 500		printf("fallocate(PUNCH_HOLE) failed: %m\n");
 501		abort();
 502	}
 503}
 504
 505static void mfd_fail_write(int fd)
 506{
 507	ssize_t l;
 508	void *p;
 509	int r;
 510
 511	/* verify write() fails */
 512	l = write(fd, "data", 4);
 513	if (l != -EPERM) {
 514		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
 515		abort();
 516	}
 517
 518	/* verify PROT_READ | PROT_WRITE is not allowed */
 519	p = mmap(NULL,
 520		 mfd_def_size,
 521		 PROT_READ | PROT_WRITE,
 522		 MAP_SHARED,
 523		 fd,
 524		 0);
 525	if (p != MAP_FAILED) {
 526		printf("mmap() didn't fail as expected\n");
 527		abort();
 528	}
 529
 530	/* verify PROT_WRITE is not allowed */
 531	p = mmap(NULL,
 532		 mfd_def_size,
 533		 PROT_WRITE,
 534		 MAP_SHARED,
 535		 fd,
 536		 0);
 537	if (p != MAP_FAILED) {
 538		printf("mmap() didn't fail as expected\n");
 539		abort();
 540	}
 541
 542	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
 543	 * allowed. Note that for r/w the kernel already prevents the mmap. */
 544	p = mmap(NULL,
 545		 mfd_def_size,
 546		 PROT_READ,
 547		 MAP_SHARED,
 548		 fd,
 549		 0);
 550	if (p != MAP_FAILED) {
 551		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
 552		if (r >= 0) {
 553			printf("mmap()+mprotect() didn't fail as expected\n");
 554			abort();
 555		}
 556		munmap(p, mfd_def_size);
 557	}
 558
 559	/* verify PUNCH_HOLE fails */
 560	r = fallocate(fd,
 561		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 562		      0,
 563		      mfd_def_size);
 564	if (r >= 0) {
 565		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
 566		abort();
 567	}
 568}
 569
 570static void mfd_assert_shrink(int fd)
 571{
 572	int r, fd2;
 573
 574	r = ftruncate(fd, mfd_def_size / 2);
 575	if (r < 0) {
 576		printf("ftruncate(SHRINK) failed: %m\n");
 577		abort();
 578	}
 579
 580	mfd_assert_size(fd, mfd_def_size / 2);
 581
 582	fd2 = mfd_assert_open(fd,
 583			      O_RDWR | O_CREAT | O_TRUNC,
 584			      S_IRUSR | S_IWUSR);
 585	close(fd2);
 586
 587	mfd_assert_size(fd, 0);
 588}
 589
 590static void mfd_fail_shrink(int fd)
 591{
 592	int r;
 593
 594	r = ftruncate(fd, mfd_def_size / 2);
 595	if (r >= 0) {
 596		printf("ftruncate(SHRINK) didn't fail as expected\n");
 597		abort();
 598	}
 599
 600	mfd_fail_open(fd,
 601		      O_RDWR | O_CREAT | O_TRUNC,
 602		      S_IRUSR | S_IWUSR);
 603}
 604
 605static void mfd_assert_grow(int fd)
 606{
 607	int r;
 608
 609	r = ftruncate(fd, mfd_def_size * 2);
 610	if (r < 0) {
 611		printf("ftruncate(GROW) failed: %m\n");
 612		abort();
 613	}
 614
 615	mfd_assert_size(fd, mfd_def_size * 2);
 616
 617	r = fallocate(fd,
 618		      0,
 619		      0,
 620		      mfd_def_size * 4);
 621	if (r < 0) {
 622		printf("fallocate(ALLOC) failed: %m\n");
 623		abort();
 624	}
 625
 626	mfd_assert_size(fd, mfd_def_size * 4);
 627}
 628
 629static void mfd_fail_grow(int fd)
 630{
 631	int r;
 632
 633	r = ftruncate(fd, mfd_def_size * 2);
 634	if (r >= 0) {
 635		printf("ftruncate(GROW) didn't fail as expected\n");
 636		abort();
 637	}
 638
 639	r = fallocate(fd,
 640		      0,
 641		      0,
 642		      mfd_def_size * 4);
 643	if (r >= 0) {
 644		printf("fallocate(ALLOC) didn't fail as expected\n");
 645		abort();
 646	}
 647}
 648
 649static void mfd_assert_grow_write(int fd)
 650{
 651	static char *buf;
 652	ssize_t l;
 653
 654	/* hugetlbfs does not support write */
 655	if (hugetlbfs_test)
 656		return;
 657
 658	buf = malloc(mfd_def_size * 8);
 659	if (!buf) {
 660		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
 661		abort();
 662	}
 663
 664	l = pwrite(fd, buf, mfd_def_size * 8, 0);
 665	if (l != (mfd_def_size * 8)) {
 666		printf("pwrite() failed: %m\n");
 667		abort();
 668	}
 669
 670	mfd_assert_size(fd, mfd_def_size * 8);
 671}
 672
 673static void mfd_fail_grow_write(int fd)
 674{
 675	static char *buf;
 676	ssize_t l;
 677
 678	/* hugetlbfs does not support write */
 679	if (hugetlbfs_test)
 680		return;
 681
 682	buf = malloc(mfd_def_size * 8);
 683	if (!buf) {
 684		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
 685		abort();
 686	}
 687
 688	l = pwrite(fd, buf, mfd_def_size * 8, 0);
 689	if (l == (mfd_def_size * 8)) {
 690		printf("pwrite() didn't fail as expected\n");
 691		abort();
 692	}
 693}
 694
 695static void mfd_assert_mode(int fd, int mode)
 696{
 697	struct stat st;
 698	char buf[PATH_MAX];
 699	int nbytes;
 700
 701	fd2name(fd, buf, PATH_MAX);
 702
 703	if (fstat(fd, &st) < 0) {
 704		printf("fstat(%s) failed: %m\n", buf);
 705		abort();
 706	}
 707
 708	if ((st.st_mode & 07777) != mode) {
 709		printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
 710		       buf, (int)st.st_mode & 07777, mode);
 711		abort();
 712	}
 713}
 714
 715static void mfd_assert_chmod(int fd, int mode)
 716{
 717	char buf[PATH_MAX];
 718	int nbytes;
 719
 720	fd2name(fd, buf, PATH_MAX);
 721
 722	if (fchmod(fd, mode) < 0) {
 723		printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
 724		abort();
 725	}
 726
 727	mfd_assert_mode(fd, mode);
 728}
 729
 730static void mfd_fail_chmod(int fd, int mode)
 731{
 732	struct stat st;
 733	char buf[PATH_MAX];
 734	int nbytes;
 735
 736	fd2name(fd, buf, PATH_MAX);
 737
 738	if (fstat(fd, &st) < 0) {
 739		printf("fstat(%s) failed: %m\n", buf);
 740		abort();
 741	}
 742
 743	if (fchmod(fd, mode) == 0) {
 744		printf("fchmod(%s, 0%04o) didn't fail as expected\n",
 745		       buf, mode);
 746		abort();
 747	}
 748
 749	/* verify that file mode bits did not change */
 750	mfd_assert_mode(fd, st.st_mode & 07777);
 751}
 752
 753static int idle_thread_fn(void *arg)
 754{
 755	sigset_t set;
 756	int sig;
 757
 758	/* dummy waiter; SIGTERM terminates us anyway */
 759	sigemptyset(&set);
 760	sigaddset(&set, SIGTERM);
 761	sigwait(&set, &sig);
 762
 763	return 0;
 764}
 765
 766static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
 767{
 768	uint8_t *stack;
 769	pid_t pid;
 770
 771	stack = malloc(STACK_SIZE);
 772	if (!stack) {
 773		printf("malloc(STACK_SIZE) failed: %m\n");
 774		abort();
 775	}
 776
 777	pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
 778	if (pid < 0) {
 779		printf("clone() failed: %m\n");
 780		abort();
 781	}
 782
 783	return pid;
 784}
 785
 786static void join_thread(pid_t pid)
 787{
 788	int wstatus;
 789
 790	if (waitpid(pid, &wstatus, 0) < 0) {
 791		printf("newpid thread: waitpid() failed: %m\n");
 792		abort();
 793	}
 794
 795	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
 796		printf("newpid thread: exited with non-zero error code %d\n",
 797		       WEXITSTATUS(wstatus));
 798		abort();
 799	}
 800
 801	if (WIFSIGNALED(wstatus)) {
 802		printf("newpid thread: killed by signal %d\n",
 803		       WTERMSIG(wstatus));
 804		abort();
 805	}
 806}
 807
 808static pid_t spawn_idle_thread(unsigned int flags)
 809{
 810	return spawn_thread(flags, idle_thread_fn, NULL);
 811}
 812
 813static void join_idle_thread(pid_t pid)
 814{
 815	kill(pid, SIGTERM);
 816	waitpid(pid, NULL, 0);
 817}
 818
 819/*
 820 * Test memfd_create() syscall
 821 * Verify syscall-argument validation, including name checks, flag validation
 822 * and more.
 823 */
 824static void test_create(void)
 825{
 826	char buf[2048];
 827	int fd;
 828
 829	printf("%s CREATE\n", memfd_str);
 830
 831	/* test NULL name */
 832	mfd_fail_new(NULL, 0);
 833
 834	/* test over-long name (not zero-terminated) */
 835	memset(buf, 0xff, sizeof(buf));
 836	mfd_fail_new(buf, 0);
 837
 838	/* test over-long zero-terminated name */
 839	memset(buf, 0xff, sizeof(buf));
 840	buf[sizeof(buf) - 1] = 0;
 841	mfd_fail_new(buf, 0);
 842
 843	/* verify "" is a valid name */
 844	fd = mfd_assert_new("", 0, 0);
 845	close(fd);
 846
 847	/* verify invalid O_* open flags */
 848	mfd_fail_new("", 0x0100);
 849	mfd_fail_new("", ~MFD_CLOEXEC);
 850	mfd_fail_new("", ~MFD_ALLOW_SEALING);
 851	mfd_fail_new("", ~0);
 852	mfd_fail_new("", 0x80000000U);
 853
 854	/* verify EXEC and NOEXEC_SEAL can't both be set */
 855	mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
 856
 857	/* verify MFD_CLOEXEC is allowed */
 858	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
 859	close(fd);
 860
 861	/* verify MFD_ALLOW_SEALING is allowed */
 862	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
 863	close(fd);
 864
 865	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
 866	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
 867	close(fd);
 868}
 869
 870/*
 871 * Test basic sealing
 872 * A very basic sealing test to see whether setting/retrieving seals works.
 873 */
 874static void test_basic(void)
 875{
 876	int fd;
 877
 878	printf("%s BASIC\n", memfd_str);
 879
 880	fd = mfd_assert_new("kern_memfd_basic",
 881			    mfd_def_size,
 882			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 883
 884	/* add basic seals */
 885	mfd_assert_has_seals(fd, 0);
 886	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
 887				 F_SEAL_WRITE);
 888	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
 889				 F_SEAL_WRITE);
 890
 891	/* add them again */
 892	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
 893				 F_SEAL_WRITE);
 894	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
 895				 F_SEAL_WRITE);
 896
 897	/* add more seals and seal against sealing */
 898	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
 899	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
 900				 F_SEAL_GROW |
 901				 F_SEAL_WRITE |
 902				 F_SEAL_SEAL);
 903
 904	/* verify that sealing no longer works */
 905	mfd_fail_add_seals(fd, F_SEAL_GROW);
 906	mfd_fail_add_seals(fd, 0);
 907
 908	close(fd);
 909
 910	/* verify sealing does not work without MFD_ALLOW_SEALING */
 911	fd = mfd_assert_new("kern_memfd_basic",
 912			    mfd_def_size,
 913			    MFD_CLOEXEC);
 914	mfd_assert_has_seals(fd, F_SEAL_SEAL);
 915	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
 916			       F_SEAL_GROW |
 917			       F_SEAL_WRITE);
 918	mfd_assert_has_seals(fd, F_SEAL_SEAL);
 919	close(fd);
 920}
 921
 922/*
 923 * Test SEAL_WRITE
 924 * Test whether SEAL_WRITE actually prevents modifications.
 925 */
 926static void test_seal_write(void)
 927{
 928	int fd;
 929
 930	printf("%s SEAL-WRITE\n", memfd_str);
 931
 932	fd = mfd_assert_new("kern_memfd_seal_write",
 933			    mfd_def_size,
 934			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 935	mfd_assert_has_seals(fd, 0);
 936	mfd_assert_add_seals(fd, F_SEAL_WRITE);
 937	mfd_assert_has_seals(fd, F_SEAL_WRITE);
 938
 939	mfd_assert_read(fd);
 940	mfd_fail_write(fd);
 941	mfd_assert_shrink(fd);
 942	mfd_assert_grow(fd);
 943	mfd_fail_grow_write(fd);
 944
 945	close(fd);
 946}
 947
 948/*
 949 * Test SEAL_FUTURE_WRITE
 950 * Test whether SEAL_FUTURE_WRITE actually prevents modifications.
 951 */
 952static void test_seal_future_write(void)
 953{
 954	int fd, fd2;
 955	void *p;
 956
 957	printf("%s SEAL-FUTURE-WRITE\n", memfd_str);
 958
 959	fd = mfd_assert_new("kern_memfd_seal_future_write",
 960			    mfd_def_size,
 961			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 962
 963	p = mfd_assert_mmap_shared(fd);
 964
 965	mfd_assert_has_seals(fd, 0);
 966
 967	mfd_assert_add_seals(fd, F_SEAL_FUTURE_WRITE);
 968	mfd_assert_has_seals(fd, F_SEAL_FUTURE_WRITE);
 969
 970	/* read should pass, writes should fail */
 971	mfd_assert_read(fd);
 972	mfd_assert_read_shared(fd);
 973	mfd_fail_write(fd);
 974
 975	fd2 = mfd_assert_reopen_fd(fd);
 976	/* read should pass, writes should still fail */
 977	mfd_assert_read(fd2);
 978	mfd_assert_read_shared(fd2);
 979	mfd_fail_write(fd2);
 980
 981	mfd_assert_fork_private_write(fd);
 982
 983	munmap(p, mfd_def_size);
 984	close(fd2);
 985	close(fd);
 986}
 987
 988/*
 989 * Test SEAL_SHRINK
 990 * Test whether SEAL_SHRINK actually prevents shrinking
 991 */
 992static void test_seal_shrink(void)
 993{
 994	int fd;
 995
 996	printf("%s SEAL-SHRINK\n", memfd_str);
 997
 998	fd = mfd_assert_new("kern_memfd_seal_shrink",
 999			    mfd_def_size,
1000			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1001	mfd_assert_has_seals(fd, 0);
1002	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1003	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1004
1005	mfd_assert_read(fd);
1006	mfd_assert_write(fd);
1007	mfd_fail_shrink(fd);
1008	mfd_assert_grow(fd);
1009	mfd_assert_grow_write(fd);
1010
1011	close(fd);
1012}
1013
1014/*
1015 * Test SEAL_GROW
1016 * Test whether SEAL_GROW actually prevents growing
1017 */
1018static void test_seal_grow(void)
1019{
1020	int fd;
1021
1022	printf("%s SEAL-GROW\n", memfd_str);
1023
1024	fd = mfd_assert_new("kern_memfd_seal_grow",
1025			    mfd_def_size,
1026			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1027	mfd_assert_has_seals(fd, 0);
1028	mfd_assert_add_seals(fd, F_SEAL_GROW);
1029	mfd_assert_has_seals(fd, F_SEAL_GROW);
1030
1031	mfd_assert_read(fd);
1032	mfd_assert_write(fd);
1033	mfd_assert_shrink(fd);
1034	mfd_fail_grow(fd);
1035	mfd_fail_grow_write(fd);
1036
1037	close(fd);
1038}
1039
1040/*
1041 * Test SEAL_SHRINK | SEAL_GROW
1042 * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
1043 */
1044static void test_seal_resize(void)
1045{
1046	int fd;
1047
1048	printf("%s SEAL-RESIZE\n", memfd_str);
1049
1050	fd = mfd_assert_new("kern_memfd_seal_resize",
1051			    mfd_def_size,
1052			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1053	mfd_assert_has_seals(fd, 0);
1054	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1055	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
1056
1057	mfd_assert_read(fd);
1058	mfd_assert_write(fd);
1059	mfd_fail_shrink(fd);
1060	mfd_fail_grow(fd);
1061	mfd_fail_grow_write(fd);
1062
1063	close(fd);
1064}
1065
1066/*
1067 * Test SEAL_EXEC
1068 * Test fd is created with exec and allow sealing.
1069 * chmod() cannot change x bits after sealing.
1070 */
1071static void test_exec_seal(void)
1072{
1073	int fd;
1074
1075	printf("%s SEAL-EXEC\n", memfd_str);
1076
1077	printf("%s	Apply SEAL_EXEC\n", memfd_str);
1078	fd = mfd_assert_new("kern_memfd_seal_exec",
1079			    mfd_def_size,
1080			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1081
1082	mfd_assert_mode(fd, 0777);
1083	mfd_assert_chmod(fd, 0644);
1084
1085	mfd_assert_has_seals(fd, 0);
1086	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1087	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1088
1089	mfd_assert_chmod(fd, 0600);
1090	mfd_fail_chmod(fd, 0777);
1091	mfd_fail_chmod(fd, 0670);
1092	mfd_fail_chmod(fd, 0605);
1093	mfd_fail_chmod(fd, 0700);
1094	mfd_fail_chmod(fd, 0100);
1095	mfd_assert_chmod(fd, 0666);
1096	mfd_assert_write(fd);
1097	close(fd);
1098
1099	printf("%s	Apply ALL_SEALS\n", memfd_str);
1100	fd = mfd_assert_new("kern_memfd_seal_exec",
1101			    mfd_def_size,
1102			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
1103
1104	mfd_assert_mode(fd, 0777);
1105	mfd_assert_chmod(fd, 0700);
1106
1107	mfd_assert_has_seals(fd, 0);
1108	mfd_assert_add_seals(fd, F_SEAL_EXEC);
1109	mfd_assert_has_seals(fd, F_WX_SEALS);
1110
1111	mfd_fail_chmod(fd, 0711);
1112	mfd_fail_chmod(fd, 0600);
1113	mfd_fail_write(fd);
1114	close(fd);
1115}
1116
1117/*
1118 * Test EXEC_NO_SEAL
1119 * Test fd is created with exec and not allow sealing.
1120 */
1121static void test_exec_no_seal(void)
1122{
1123	int fd;
1124
1125	printf("%s EXEC_NO_SEAL\n", memfd_str);
1126
1127	/* Create with EXEC but without ALLOW_SEALING */
1128	fd = mfd_assert_new("kern_memfd_exec_no_sealing",
1129			    mfd_def_size,
1130			    MFD_CLOEXEC | MFD_EXEC);
1131	mfd_assert_mode(fd, 0777);
1132	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1133	mfd_assert_chmod(fd, 0666);
1134	close(fd);
1135}
1136
1137/*
1138 * Test memfd_create with MFD_NOEXEC flag
1139 */
1140static void test_noexec_seal(void)
1141{
1142	int fd;
1143
1144	printf("%s NOEXEC_SEAL\n", memfd_str);
1145
1146	/* Create with NOEXEC and ALLOW_SEALING */
1147	fd = mfd_assert_new("kern_memfd_noexec",
1148			    mfd_def_size,
1149			    MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
1150	mfd_assert_mode(fd, 0666);
1151	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1152	mfd_fail_chmod(fd, 0777);
1153	close(fd);
1154
1155	/* Create with NOEXEC but without ALLOW_SEALING */
1156	fd = mfd_assert_new("kern_memfd_noexec",
1157			    mfd_def_size,
1158			    MFD_CLOEXEC | MFD_NOEXEC_SEAL);
1159	mfd_assert_mode(fd, 0666);
1160	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1161	mfd_fail_chmod(fd, 0777);
1162	close(fd);
1163}
1164
1165static void test_sysctl_sysctl0(void)
1166{
1167	int fd;
1168
1169	sysctl_assert_equal("0");
1170
1171	fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
1172			    mfd_def_size,
1173			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1174	mfd_assert_mode(fd, 0777);
1175	mfd_assert_has_seals(fd, 0);
1176	mfd_assert_chmod(fd, 0644);
1177	close(fd);
1178}
1179
1180static void test_sysctl_set_sysctl0(void)
1181{
1182	sysctl_assert_write("0");
1183	test_sysctl_sysctl0();
1184}
1185
1186static void test_sysctl_sysctl1(void)
1187{
1188	int fd;
1189
1190	sysctl_assert_equal("1");
1191
1192	fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
1193			    mfd_def_size,
1194			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1195	mfd_assert_mode(fd, 0666);
1196	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1197	mfd_fail_chmod(fd, 0777);
1198	close(fd);
1199
1200	fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
1201			    mfd_def_size,
1202			    MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1203	mfd_assert_mode(fd, 0777);
1204	mfd_assert_has_seals(fd, 0);
1205	mfd_assert_chmod(fd, 0644);
1206	close(fd);
1207
1208	fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
1209			    mfd_def_size,
1210			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1211	mfd_assert_mode(fd, 0666);
1212	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1213	mfd_fail_chmod(fd, 0777);
1214	close(fd);
1215}
1216
1217static void test_sysctl_set_sysctl1(void)
1218{
1219	sysctl_assert_write("1");
1220	test_sysctl_sysctl1();
1221}
1222
1223static void test_sysctl_sysctl2(void)
1224{
1225	int fd;
1226
1227	sysctl_assert_equal("2");
1228
1229	fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
1230			    mfd_def_size,
1231			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1232	mfd_assert_mode(fd, 0666);
1233	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1234	mfd_fail_chmod(fd, 0777);
1235	close(fd);
1236
1237	mfd_fail_new("kern_memfd_sysctl_2_exec",
1238		     MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
1239
1240	fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
1241			    mfd_def_size,
1242			    MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
1243	mfd_assert_mode(fd, 0666);
1244	mfd_assert_has_seals(fd, F_SEAL_EXEC);
1245	mfd_fail_chmod(fd, 0777);
1246	close(fd);
1247}
1248
1249static void test_sysctl_set_sysctl2(void)
1250{
1251	sysctl_assert_write("2");
1252	test_sysctl_sysctl2();
1253}
1254
1255static int sysctl_simple_child(void *arg)
1256{
1257	int fd;
1258	int pid;
1259
1260	printf("%s sysctl 0\n", memfd_str);
1261	test_sysctl_set_sysctl0();
1262
1263	printf("%s sysctl 1\n", memfd_str);
1264	test_sysctl_set_sysctl1();
1265
1266	printf("%s sysctl 0\n", memfd_str);
1267	test_sysctl_set_sysctl0();
1268
1269	printf("%s sysctl 2\n", memfd_str);
1270	test_sysctl_set_sysctl2();
1271
1272	printf("%s sysctl 1\n", memfd_str);
1273	test_sysctl_set_sysctl1();
1274
1275	printf("%s sysctl 0\n", memfd_str);
1276	test_sysctl_set_sysctl0();
1277
1278	return 0;
1279}
1280
1281/*
1282 * Test sysctl
1283 * A very basic test to make sure the core sysctl semantics work.
1284 */
1285static void test_sysctl_simple(void)
1286{
1287	int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1288
1289	join_thread(pid);
1290}
1291
1292static int sysctl_nested(void *arg)
1293{
1294	void (*fn)(void) = arg;
1295
1296	fn();
1297	return 0;
1298}
1299
1300static int sysctl_nested_wait(void *arg)
1301{
1302	/* Wait for a SIGCONT. */
1303	kill(getpid(), SIGSTOP);
1304	return sysctl_nested(arg);
1305}
1306
1307static void test_sysctl_sysctl1_failset(void)
1308{
1309	sysctl_fail_write("0");
1310	test_sysctl_sysctl1();
1311}
1312
1313static void test_sysctl_sysctl2_failset(void)
1314{
1315	sysctl_fail_write("1");
1316	test_sysctl_sysctl2();
1317
1318	sysctl_fail_write("0");
1319	test_sysctl_sysctl2();
1320}
1321
1322static int sysctl_nested_child(void *arg)
1323{
1324	int fd;
1325	int pid;
1326
1327	printf("%s nested sysctl 0\n", memfd_str);
1328	sysctl_assert_write("0");
1329	/* A further nested pidns works the same. */
1330	pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
1331	join_thread(pid);
1332
1333	printf("%s nested sysctl 1\n", memfd_str);
1334	sysctl_assert_write("1");
1335	/* Child inherits our setting. */
1336	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
1337	join_thread(pid);
1338	/* Child cannot raise the setting. */
1339	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1340			   test_sysctl_sysctl1_failset);
1341	join_thread(pid);
1342	/* Child can lower the setting. */
1343	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1344			   test_sysctl_set_sysctl2);
1345	join_thread(pid);
1346	/* Child lowering the setting has no effect on our setting. */
1347	test_sysctl_sysctl1();
1348
1349	printf("%s nested sysctl 2\n", memfd_str);
1350	sysctl_assert_write("2");
1351	/* Child inherits our setting. */
1352	pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
1353	join_thread(pid);
1354	/* Child cannot raise the setting. */
1355	pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
1356			   test_sysctl_sysctl2_failset);
1357	join_thread(pid);
1358
1359	/* Verify that the rules are actually inherited after fork. */
1360	printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
1361	sysctl_assert_write("0");
1362
1363	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1364			   test_sysctl_sysctl1_failset);
1365	sysctl_assert_write("1");
1366	kill(pid, SIGCONT);
1367	join_thread(pid);
1368
1369	printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
1370	sysctl_assert_write("0");
1371
1372	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1373			   test_sysctl_sysctl2_failset);
1374	sysctl_assert_write("2");
1375	kill(pid, SIGCONT);
1376	join_thread(pid);
1377
1378	/*
1379	 * Verify that the current effective setting is saved on fork, meaning
1380	 * that the parent lowering the sysctl doesn't affect already-forked
1381	 * children.
1382	 */
1383	printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
1384	sysctl_assert_write("2");
1385	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1386			   test_sysctl_sysctl2);
1387	sysctl_assert_write("1");
1388	kill(pid, SIGCONT);
1389	join_thread(pid);
1390
1391	printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
1392	sysctl_assert_write("2");
1393	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1394			   test_sysctl_sysctl2);
1395	sysctl_assert_write("0");
1396	kill(pid, SIGCONT);
1397	join_thread(pid);
1398
1399	printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
1400	sysctl_assert_write("1");
1401	pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
1402			   test_sysctl_sysctl1);
1403	sysctl_assert_write("0");
1404	kill(pid, SIGCONT);
1405	join_thread(pid);
1406
1407	return 0;
1408}
1409
1410/*
1411 * Test sysctl with nested pid namespaces
1412 * Make sure that the sysctl nesting semantics work correctly.
1413 */
1414static void test_sysctl_nested(void)
1415{
1416	int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
1417
1418	join_thread(pid);
1419}
1420
1421/*
1422 * Test sharing via dup()
1423 * Test that seals are shared between dupped FDs and they're all equal.
1424 */
1425static void test_share_dup(char *banner, char *b_suffix)
1426{
1427	int fd, fd2;
1428
1429	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1430
1431	fd = mfd_assert_new("kern_memfd_share_dup",
1432			    mfd_def_size,
1433			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1434	mfd_assert_has_seals(fd, 0);
1435
1436	fd2 = mfd_assert_dup(fd);
1437	mfd_assert_has_seals(fd2, 0);
1438
1439	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1440	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1441	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1442
1443	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1444	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1445	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1446
1447	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1448	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1449	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1450
1451	mfd_fail_add_seals(fd, F_SEAL_GROW);
1452	mfd_fail_add_seals(fd2, F_SEAL_GROW);
1453	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1454	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
1455
1456	close(fd2);
1457
1458	mfd_fail_add_seals(fd, F_SEAL_GROW);
1459	close(fd);
1460}
1461
1462/*
1463 * Test sealing with active mmap()s
1464 * Modifying seals is only allowed if no other mmap() refs exist.
1465 */
1466static void test_share_mmap(char *banner, char *b_suffix)
1467{
1468	int fd;
1469	void *p;
1470
1471	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
1472
1473	fd = mfd_assert_new("kern_memfd_share_mmap",
1474			    mfd_def_size,
1475			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1476	mfd_assert_has_seals(fd, 0);
1477
1478	/* shared/writable ref prevents sealing WRITE, but allows others */
1479	p = mfd_assert_mmap_shared(fd);
1480	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1481	mfd_assert_has_seals(fd, 0);
1482	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
1483	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
1484	munmap(p, mfd_def_size);
1485
1486	/* readable ref allows sealing */
1487	p = mfd_assert_mmap_private(fd);
1488	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1489	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1490	munmap(p, mfd_def_size);
1491
1492	close(fd);
1493}
1494
1495/*
1496 * Test sealing with open(/proc/self/fd/%d)
1497 * Via /proc we can get access to a separate file-context for the same memfd.
1498 * This is *not* like dup(), but like a real separate open(). Make sure the
1499 * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
1500 */
1501static void test_share_open(char *banner, char *b_suffix)
1502{
1503	int fd, fd2;
1504
1505	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1506
1507	fd = mfd_assert_new("kern_memfd_share_open",
1508			    mfd_def_size,
1509			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1510	mfd_assert_has_seals(fd, 0);
1511
1512	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1513	mfd_assert_add_seals(fd, F_SEAL_WRITE);
1514	mfd_assert_has_seals(fd, F_SEAL_WRITE);
1515	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
1516
1517	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
1518	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1519	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1520
1521	close(fd);
1522	fd = mfd_assert_open(fd2, O_RDONLY, 0);
1523
1524	mfd_fail_add_seals(fd, F_SEAL_SEAL);
1525	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
1526	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
1527
1528	close(fd2);
1529	fd2 = mfd_assert_open(fd, O_RDWR, 0);
1530
1531	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
1532	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1533	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
1534
1535	close(fd2);
1536	close(fd);
1537}
1538
1539/*
1540 * Test sharing via fork()
1541 * Test whether seal-modifications work as expected with forked childs.
1542 */
1543static void test_share_fork(char *banner, char *b_suffix)
1544{
1545	int fd;
1546	pid_t pid;
1547
1548	printf("%s %s %s\n", memfd_str, banner, b_suffix);
1549
1550	fd = mfd_assert_new("kern_memfd_share_fork",
1551			    mfd_def_size,
1552			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
1553	mfd_assert_has_seals(fd, 0);
1554
1555	pid = spawn_idle_thread(0);
1556	mfd_assert_add_seals(fd, F_SEAL_SEAL);
1557	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1558
1559	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1560	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1561
1562	join_idle_thread(pid);
1563
1564	mfd_fail_add_seals(fd, F_SEAL_WRITE);
1565	mfd_assert_has_seals(fd, F_SEAL_SEAL);
1566
1567	close(fd);
1568}
1569
1570int main(int argc, char **argv)
1571{
1572	pid_t pid;
1573
1574	if (argc == 2) {
1575		if (!strcmp(argv[1], "hugetlbfs")) {
1576			unsigned long hpage_size = default_huge_page_size();
1577
1578			if (!hpage_size) {
1579				printf("Unable to determine huge page size\n");
1580				abort();
1581			}
1582
1583			hugetlbfs_test = 1;
1584			memfd_str = MEMFD_HUGE_STR;
1585			mfd_def_size = hpage_size * 2;
1586		} else {
1587			printf("Unknown option: %s\n", argv[1]);
1588			abort();
1589		}
1590	}
1591
1592	test_create();
1593	test_basic();
1594	test_exec_seal();
1595	test_exec_no_seal();
1596	test_noexec_seal();
1597
1598	test_seal_write();
1599	test_seal_future_write();
1600	test_seal_shrink();
1601	test_seal_grow();
1602	test_seal_resize();
1603
1604	test_sysctl_simple();
1605	test_sysctl_nested();
1606
1607	test_share_dup("SHARE-DUP", "");
1608	test_share_mmap("SHARE-MMAP", "");
1609	test_share_open("SHARE-OPEN", "");
1610	test_share_fork("SHARE-FORK", "");
1611
1612	/* Run test-suite in a multi-threaded environment with a shared
1613	 * file-table. */
1614	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
1615	test_share_dup("SHARE-DUP", SHARED_FT_STR);
1616	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
1617	test_share_open("SHARE-OPEN", SHARED_FT_STR);
1618	test_share_fork("SHARE-FORK", SHARED_FT_STR);
1619	join_idle_thread(pid);
1620
1621	printf("memfd: DONE\n");
1622
1623	return 0;
1624}