Loading...
1/*
2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16/*
17 * Fork and exec tiny 1 page executable which precisely controls its VM.
18 * Test /proc/$PID/maps
19 * Test /proc/$PID/smaps
20 * Test /proc/$PID/smaps_rollup
21 * Test /proc/$PID/statm
22 *
23 * FIXME require CONFIG_TMPFS which can be disabled
24 * FIXME test other values from "smaps"
25 * FIXME support other archs
26 */
27#undef NDEBUG
28#include <assert.h>
29#include <errno.h>
30#include <sched.h>
31#include <signal.h>
32#include <stdbool.h>
33#include <stdint.h>
34#include <stdio.h>
35#include <string.h>
36#include <stdlib.h>
37#include <sys/mount.h>
38#include <sys/types.h>
39#include <sys/stat.h>
40#include <sys/wait.h>
41#include <fcntl.h>
42#include <unistd.h>
43#include <sys/syscall.h>
44#include <sys/uio.h>
45#include <linux/kdev_t.h>
46#include <sys/time.h>
47#include <sys/resource.h>
48#include <linux/fs.h>
49
50#include "../kselftest.h"
51
52static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
53{
54 return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
55}
56
57static void make_private_tmp(void)
58{
59 if (unshare(CLONE_NEWNS) == -1) {
60 if (errno == ENOSYS || errno == EPERM) {
61 exit(4);
62 }
63 exit(1);
64 }
65 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
66 exit(1);
67 }
68 if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
69 exit(1);
70 }
71}
72
73static pid_t pid = -1;
74static void ate(void)
75{
76 if (pid > 0) {
77 kill(pid, SIGTERM);
78 }
79}
80
81struct elf64_hdr {
82 uint8_t e_ident[16];
83 uint16_t e_type;
84 uint16_t e_machine;
85 uint32_t e_version;
86 uint64_t e_entry;
87 uint64_t e_phoff;
88 uint64_t e_shoff;
89 uint32_t e_flags;
90 uint16_t e_ehsize;
91 uint16_t e_phentsize;
92 uint16_t e_phnum;
93 uint16_t e_shentsize;
94 uint16_t e_shnum;
95 uint16_t e_shstrndx;
96};
97
98struct elf64_phdr {
99 uint32_t p_type;
100 uint32_t p_flags;
101 uint64_t p_offset;
102 uint64_t p_vaddr;
103 uint64_t p_paddr;
104 uint64_t p_filesz;
105 uint64_t p_memsz;
106 uint64_t p_align;
107};
108
109#ifdef __x86_64__
110#define PAGE_SIZE 4096
111#define VADDR (1UL << 32)
112#define MAPS_OFFSET 73
113
114#define syscall 0x0f, 0x05
115#define mov_rdi(x) \
116 0x48, 0xbf, \
117 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
118 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
119
120#define mov_rsi(x) \
121 0x48, 0xbe, \
122 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
123 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
124
125#define mov_eax(x) \
126 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
127
128static const uint8_t payload[] = {
129 /* Casually unmap stack, vDSO and everything else. */
130 /* munmap */
131 mov_rdi(VADDR + 4096),
132 mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
133 mov_eax(11),
134 syscall,
135
136 /* Ping parent. */
137 /* write(0, &c, 1); */
138 0x31, 0xff, /* xor edi, edi */
139 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */
140 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */
141 mov_eax(1),
142 syscall,
143
144 /* 1: pause(); */
145 mov_eax(34),
146 syscall,
147
148 0xeb, 0xf7, /* jmp 1b */
149};
150
151static int make_exe(const uint8_t *payload, size_t len)
152{
153 struct elf64_hdr h;
154 struct elf64_phdr ph;
155
156 struct iovec iov[3] = {
157 {&h, sizeof(struct elf64_hdr)},
158 {&ph, sizeof(struct elf64_phdr)},
159 {(void *)payload, len},
160 };
161 int fd, fd1;
162 char buf[64];
163
164 memset(&h, 0, sizeof(h));
165 h.e_ident[0] = 0x7f;
166 h.e_ident[1] = 'E';
167 h.e_ident[2] = 'L';
168 h.e_ident[3] = 'F';
169 h.e_ident[4] = 2;
170 h.e_ident[5] = 1;
171 h.e_ident[6] = 1;
172 h.e_ident[7] = 0;
173 h.e_type = 2;
174 h.e_machine = 0x3e;
175 h.e_version = 1;
176 h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
177 h.e_phoff = sizeof(struct elf64_hdr);
178 h.e_shoff = 0;
179 h.e_flags = 0;
180 h.e_ehsize = sizeof(struct elf64_hdr);
181 h.e_phentsize = sizeof(struct elf64_phdr);
182 h.e_phnum = 1;
183 h.e_shentsize = 0;
184 h.e_shnum = 0;
185 h.e_shstrndx = 0;
186
187 memset(&ph, 0, sizeof(ph));
188 ph.p_type = 1;
189 ph.p_flags = (1<<2)|1;
190 ph.p_offset = 0;
191 ph.p_vaddr = VADDR;
192 ph.p_paddr = 0;
193 ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
194 ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
195 ph.p_align = 4096;
196
197 fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
198 if (fd == -1) {
199 exit(1);
200 }
201
202 if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
203 exit(1);
204 }
205
206 /* Avoid ETXTBSY on exec. */
207 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
208 fd1 = open(buf, O_RDONLY|O_CLOEXEC);
209 close(fd);
210
211 return fd1;
212}
213#endif
214
215/*
216 * 0: vsyscall VMA doesn't exist vsyscall=none
217 * 1: vsyscall VMA is --xp vsyscall=xonly
218 * 2: vsyscall VMA is r-xp vsyscall=emulate
219 */
220static volatile int g_vsyscall;
221static const char *str_vsyscall;
222
223static const char str_vsyscall_0[] = "";
224static const char str_vsyscall_1[] =
225"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
226static const char str_vsyscall_2[] =
227"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
228
229#ifdef __x86_64__
230static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
231{
232 _exit(g_vsyscall);
233}
234
235/*
236 * vsyscall page can't be unmapped, probe it directly.
237 */
238static void vsyscall(void)
239{
240 pid_t pid;
241 int wstatus;
242
243 pid = fork();
244 if (pid < 0) {
245 fprintf(stderr, "fork, errno %d\n", errno);
246 exit(1);
247 }
248 if (pid == 0) {
249 struct rlimit rlim = {0, 0};
250 (void)setrlimit(RLIMIT_CORE, &rlim);
251
252 /* Hide "segfault at ffffffffff600000" messages. */
253 struct sigaction act;
254 memset(&act, 0, sizeof(struct sigaction));
255 act.sa_flags = SA_SIGINFO;
256 act.sa_sigaction = sigaction_SIGSEGV;
257 (void)sigaction(SIGSEGV, &act, NULL);
258
259 g_vsyscall = 0;
260 /* gettimeofday(NULL, NULL); */
261 uint64_t rax = 0xffffffffff600000;
262 asm volatile (
263 "call *%[rax]"
264 : [rax] "+a" (rax)
265 : "D" (NULL), "S" (NULL)
266 : "rcx", "r11"
267 );
268
269 g_vsyscall = 1;
270 *(volatile int *)0xffffffffff600000UL;
271
272 g_vsyscall = 2;
273 exit(g_vsyscall);
274 }
275 waitpid(pid, &wstatus, 0);
276 if (WIFEXITED(wstatus)) {
277 g_vsyscall = WEXITSTATUS(wstatus);
278 } else {
279 fprintf(stderr, "error: wstatus %08x\n", wstatus);
280 exit(1);
281 }
282}
283
284int main(void)
285{
286 int pipefd[2];
287 int exec_fd;
288
289 vsyscall();
290 switch (g_vsyscall) {
291 case 0:
292 str_vsyscall = str_vsyscall_0;
293 break;
294 case 1:
295 str_vsyscall = str_vsyscall_1;
296 break;
297 case 2:
298 str_vsyscall = str_vsyscall_2;
299 break;
300 default:
301 abort();
302 }
303
304 atexit(ate);
305
306 make_private_tmp();
307
308 /* Reserve fd 0 for 1-byte pipe ping from child. */
309 close(0);
310 if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
311 return 1;
312 }
313
314 exec_fd = make_exe(payload, sizeof(payload));
315
316 if (pipe(pipefd) == -1) {
317 return 1;
318 }
319 if (dup2(pipefd[1], 0) != 0) {
320 return 1;
321 }
322
323 pid = fork();
324 if (pid == -1) {
325 return 1;
326 }
327 if (pid == 0) {
328 sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
329 return 1;
330 }
331
332 char _;
333 if (read(pipefd[0], &_, 1) != 1) {
334 return 1;
335 }
336
337 struct stat st;
338 if (fstat(exec_fd, &st) == -1) {
339 return 1;
340 }
341
342 /* Generate "head -n1 /proc/$PID/maps" */
343 char buf0[256];
344 memset(buf0, ' ', sizeof(buf0));
345 int len = snprintf(buf0, sizeof(buf0),
346 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
347 VADDR, VADDR + PAGE_SIZE,
348 MAJOR(st.st_dev), MINOR(st.st_dev),
349 (unsigned long long)st.st_ino);
350 buf0[len] = ' ';
351 snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
352 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
353
354 /* Test /proc/$PID/maps */
355 {
356 const size_t len = strlen(buf0) + strlen(str_vsyscall);
357 char buf[256];
358 ssize_t rv;
359 int fd;
360
361 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
362 fd = open(buf, O_RDONLY);
363 if (fd == -1) {
364 return 1;
365 }
366 rv = read(fd, buf, sizeof(buf));
367 assert(rv == len);
368 assert(memcmp(buf, buf0, strlen(buf0)) == 0);
369 if (g_vsyscall > 0) {
370 assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
371 }
372 }
373
374 /* Test /proc/$PID/smaps */
375 {
376 char buf[4096];
377 ssize_t rv;
378 int fd;
379
380 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
381 fd = open(buf, O_RDONLY);
382 if (fd == -1) {
383 return 1;
384 }
385 rv = read(fd, buf, sizeof(buf));
386 assert(0 <= rv && rv <= sizeof(buf));
387
388 assert(rv >= strlen(buf0));
389 assert(memcmp(buf, buf0, strlen(buf0)) == 0);
390
391#define RSS1 "Rss: 4 kB\n"
392#define RSS2 "Rss: 0 kB\n"
393#define PSS1 "Pss: 4 kB\n"
394#define PSS2 "Pss: 0 kB\n"
395 assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
396 memmem(buf, rv, RSS2, strlen(RSS2)));
397 assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
398 memmem(buf, rv, PSS2, strlen(PSS2)));
399
400 static const char *S[] = {
401 "Size: 4 kB\n",
402 "KernelPageSize: 4 kB\n",
403 "MMUPageSize: 4 kB\n",
404 "Anonymous: 0 kB\n",
405 "AnonHugePages: 0 kB\n",
406 "Shared_Hugetlb: 0 kB\n",
407 "Private_Hugetlb: 0 kB\n",
408 "Locked: 0 kB\n",
409 };
410 int i;
411
412 for (i = 0; i < ARRAY_SIZE(S); i++) {
413 assert(memmem(buf, rv, S[i], strlen(S[i])));
414 }
415
416 if (g_vsyscall > 0) {
417 assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
418 }
419 }
420
421 /* Test /proc/$PID/smaps_rollup */
422 {
423 char bufr[256];
424 memset(bufr, ' ', sizeof(bufr));
425 len = snprintf(bufr, sizeof(bufr),
426 "%08lx-%08lx ---p 00000000 00:00 0",
427 VADDR, VADDR + PAGE_SIZE);
428 bufr[len] = ' ';
429 snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
430 "[rollup]\n");
431
432 char buf[1024];
433 ssize_t rv;
434 int fd;
435
436 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
437 fd = open(buf, O_RDONLY);
438 if (fd == -1) {
439 return 1;
440 }
441 rv = read(fd, buf, sizeof(buf));
442 assert(0 <= rv && rv <= sizeof(buf));
443
444 assert(rv >= strlen(bufr));
445 assert(memcmp(buf, bufr, strlen(bufr)) == 0);
446
447 assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
448 memmem(buf, rv, RSS2, strlen(RSS2)));
449 assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
450 memmem(buf, rv, PSS2, strlen(PSS2)));
451
452 static const char *S[] = {
453 "Anonymous: 0 kB\n",
454 "AnonHugePages: 0 kB\n",
455 "Shared_Hugetlb: 0 kB\n",
456 "Private_Hugetlb: 0 kB\n",
457 "Locked: 0 kB\n",
458 };
459 int i;
460
461 for (i = 0; i < ARRAY_SIZE(S); i++) {
462 assert(memmem(buf, rv, S[i], strlen(S[i])));
463 }
464 }
465
466 /* Test /proc/$PID/statm */
467 {
468 char buf[64];
469 ssize_t rv;
470 int fd;
471
472 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
473 fd = open(buf, O_RDONLY);
474 if (fd == -1) {
475 return 1;
476 }
477 rv = read(fd, buf, sizeof(buf));
478 assert(rv == 7 * 2);
479
480 assert(buf[0] == '1'); /* ->total_vm */
481 assert(buf[1] == ' ');
482 assert(buf[2] == '0' || buf[2] == '1'); /* rss */
483 assert(buf[3] == ' ');
484 assert(buf[4] == '0' || buf[2] == '1'); /* file rss */
485 assert(buf[5] == ' ');
486 assert(buf[6] == '1'); /* ELF executable segments */
487 assert(buf[7] == ' ');
488 assert(buf[8] == '0');
489 assert(buf[9] == ' ');
490 assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */
491 assert(buf[11] == ' ');
492 assert(buf[12] == '0');
493 assert(buf[13] == '\n');
494 }
495
496 /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */
497 {
498 char path_buf[256], exp_path_buf[256];
499 struct procmap_query q;
500 int fd, err;
501
502 snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid);
503 fd = open(path_buf, O_RDONLY);
504 if (fd == -1)
505 return 1;
506
507 /* CASE 1: exact MATCH at VADDR */
508 memset(&q, 0, sizeof(q));
509 q.size = sizeof(q);
510 q.query_addr = VADDR;
511 q.query_flags = 0;
512 q.vma_name_addr = (__u64)(unsigned long)path_buf;
513 q.vma_name_size = sizeof(path_buf);
514
515 err = ioctl(fd, PROCMAP_QUERY, &q);
516 assert(err == 0);
517
518 assert(q.query_addr == VADDR);
519 assert(q.query_flags == 0);
520
521 assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE));
522 assert(q.vma_start == VADDR);
523 assert(q.vma_end == VADDR + PAGE_SIZE);
524 assert(q.vma_page_size == PAGE_SIZE);
525
526 assert(q.vma_offset == 0);
527 assert(q.inode == st.st_ino);
528 assert(q.dev_major == MAJOR(st.st_dev));
529 assert(q.dev_minor == MINOR(st.st_dev));
530
531 snprintf(exp_path_buf, sizeof(exp_path_buf),
532 "/tmp/#%llu (deleted)", (unsigned long long)st.st_ino);
533 assert(q.vma_name_size == strlen(exp_path_buf) + 1);
534 assert(strcmp(path_buf, exp_path_buf) == 0);
535
536 /* CASE 2: NO MATCH at VADDR-1 */
537 memset(&q, 0, sizeof(q));
538 q.size = sizeof(q);
539 q.query_addr = VADDR - 1;
540 q.query_flags = 0; /* exact match */
541
542 err = ioctl(fd, PROCMAP_QUERY, &q);
543 err = err < 0 ? -errno : 0;
544 assert(err == -ENOENT);
545
546 /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */
547 memset(&q, 0, sizeof(q));
548 q.size = sizeof(q);
549 q.query_addr = VADDR - 1;
550 q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA;
551
552 err = ioctl(fd, PROCMAP_QUERY, &q);
553 assert(err == 0);
554
555 assert(q.query_addr == VADDR - 1);
556 assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA);
557 assert(q.vma_start == VADDR);
558 assert(q.vma_end == VADDR + PAGE_SIZE);
559
560 /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */
561 memset(&q, 0, sizeof(q));
562 q.size = sizeof(q);
563 q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */
564 q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA;
565
566 err = ioctl(fd, PROCMAP_QUERY, &q);
567 err = err < 0 ? -errno : 0;
568 assert(err == -ENOENT);
569
570 /* CASE 5: NO MATCH WRITABLE at VADDR */
571 memset(&q, 0, sizeof(q));
572 q.size = sizeof(q);
573 q.query_addr = VADDR;
574 q.query_flags = PROCMAP_QUERY_VMA_WRITABLE;
575
576 err = ioctl(fd, PROCMAP_QUERY, &q);
577 err = err < 0 ? -errno : 0;
578 assert(err == -ENOENT);
579 }
580
581 return 0;
582}
583#else
584int main(void)
585{
586 return 4;
587}
588#endif
1/*
2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16/*
17 * Fork and exec tiny 1 page executable which precisely controls its VM.
18 * Test /proc/$PID/maps
19 * Test /proc/$PID/smaps
20 * Test /proc/$PID/smaps_rollup
21 * Test /proc/$PID/statm
22 *
23 * FIXME require CONFIG_TMPFS which can be disabled
24 * FIXME test other values from "smaps"
25 * FIXME support other archs
26 */
27#undef NDEBUG
28#include <assert.h>
29#include <errno.h>
30#include <sched.h>
31#include <signal.h>
32#include <stdbool.h>
33#include <stdint.h>
34#include <stdio.h>
35#include <string.h>
36#include <stdlib.h>
37#include <sys/mount.h>
38#include <sys/types.h>
39#include <sys/stat.h>
40#include <sys/wait.h>
41#include <fcntl.h>
42#include <unistd.h>
43#include <sys/syscall.h>
44#include <sys/uio.h>
45#include <linux/kdev_t.h>
46#include <sys/time.h>
47#include <sys/resource.h>
48
49#include "../kselftest.h"
50
51static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
52{
53 return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
54}
55
56static void make_private_tmp(void)
57{
58 if (unshare(CLONE_NEWNS) == -1) {
59 if (errno == ENOSYS || errno == EPERM) {
60 exit(4);
61 }
62 exit(1);
63 }
64 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
65 exit(1);
66 }
67 if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
68 exit(1);
69 }
70}
71
72static pid_t pid = -1;
73static void ate(void)
74{
75 if (pid > 0) {
76 kill(pid, SIGTERM);
77 }
78}
79
80struct elf64_hdr {
81 uint8_t e_ident[16];
82 uint16_t e_type;
83 uint16_t e_machine;
84 uint32_t e_version;
85 uint64_t e_entry;
86 uint64_t e_phoff;
87 uint64_t e_shoff;
88 uint32_t e_flags;
89 uint16_t e_ehsize;
90 uint16_t e_phentsize;
91 uint16_t e_phnum;
92 uint16_t e_shentsize;
93 uint16_t e_shnum;
94 uint16_t e_shstrndx;
95};
96
97struct elf64_phdr {
98 uint32_t p_type;
99 uint32_t p_flags;
100 uint64_t p_offset;
101 uint64_t p_vaddr;
102 uint64_t p_paddr;
103 uint64_t p_filesz;
104 uint64_t p_memsz;
105 uint64_t p_align;
106};
107
108#ifdef __x86_64__
109#define PAGE_SIZE 4096
110#define VADDR (1UL << 32)
111#define MAPS_OFFSET 73
112
113#define syscall 0x0f, 0x05
114#define mov_rdi(x) \
115 0x48, 0xbf, \
116 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
117 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
118
119#define mov_rsi(x) \
120 0x48, 0xbe, \
121 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
122 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
123
124#define mov_eax(x) \
125 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
126
127static const uint8_t payload[] = {
128 /* Casually unmap stack, vDSO and everything else. */
129 /* munmap */
130 mov_rdi(VADDR + 4096),
131 mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
132 mov_eax(11),
133 syscall,
134
135 /* Ping parent. */
136 /* write(0, &c, 1); */
137 0x31, 0xff, /* xor edi, edi */
138 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */
139 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */
140 mov_eax(1),
141 syscall,
142
143 /* 1: pause(); */
144 mov_eax(34),
145 syscall,
146
147 0xeb, 0xf7, /* jmp 1b */
148};
149
150static int make_exe(const uint8_t *payload, size_t len)
151{
152 struct elf64_hdr h;
153 struct elf64_phdr ph;
154
155 struct iovec iov[3] = {
156 {&h, sizeof(struct elf64_hdr)},
157 {&ph, sizeof(struct elf64_phdr)},
158 {(void *)payload, len},
159 };
160 int fd, fd1;
161 char buf[64];
162
163 memset(&h, 0, sizeof(h));
164 h.e_ident[0] = 0x7f;
165 h.e_ident[1] = 'E';
166 h.e_ident[2] = 'L';
167 h.e_ident[3] = 'F';
168 h.e_ident[4] = 2;
169 h.e_ident[5] = 1;
170 h.e_ident[6] = 1;
171 h.e_ident[7] = 0;
172 h.e_type = 2;
173 h.e_machine = 0x3e;
174 h.e_version = 1;
175 h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
176 h.e_phoff = sizeof(struct elf64_hdr);
177 h.e_shoff = 0;
178 h.e_flags = 0;
179 h.e_ehsize = sizeof(struct elf64_hdr);
180 h.e_phentsize = sizeof(struct elf64_phdr);
181 h.e_phnum = 1;
182 h.e_shentsize = 0;
183 h.e_shnum = 0;
184 h.e_shstrndx = 0;
185
186 memset(&ph, 0, sizeof(ph));
187 ph.p_type = 1;
188 ph.p_flags = (1<<2)|1;
189 ph.p_offset = 0;
190 ph.p_vaddr = VADDR;
191 ph.p_paddr = 0;
192 ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
193 ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
194 ph.p_align = 4096;
195
196 fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
197 if (fd == -1) {
198 exit(1);
199 }
200
201 if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
202 exit(1);
203 }
204
205 /* Avoid ETXTBSY on exec. */
206 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
207 fd1 = open(buf, O_RDONLY|O_CLOEXEC);
208 close(fd);
209
210 return fd1;
211}
212#endif
213
214/*
215 * 0: vsyscall VMA doesn't exist vsyscall=none
216 * 1: vsyscall VMA is --xp vsyscall=xonly
217 * 2: vsyscall VMA is r-xp vsyscall=emulate
218 */
219static volatile int g_vsyscall;
220static const char *str_vsyscall;
221
222static const char str_vsyscall_0[] = "";
223static const char str_vsyscall_1[] =
224"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
225static const char str_vsyscall_2[] =
226"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
227
228#ifdef __x86_64__
229static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
230{
231 _exit(g_vsyscall);
232}
233
234/*
235 * vsyscall page can't be unmapped, probe it directly.
236 */
237static void vsyscall(void)
238{
239 pid_t pid;
240 int wstatus;
241
242 pid = fork();
243 if (pid < 0) {
244 fprintf(stderr, "fork, errno %d\n", errno);
245 exit(1);
246 }
247 if (pid == 0) {
248 struct rlimit rlim = {0, 0};
249 (void)setrlimit(RLIMIT_CORE, &rlim);
250
251 /* Hide "segfault at ffffffffff600000" messages. */
252 struct sigaction act;
253 memset(&act, 0, sizeof(struct sigaction));
254 act.sa_flags = SA_SIGINFO;
255 act.sa_sigaction = sigaction_SIGSEGV;
256 (void)sigaction(SIGSEGV, &act, NULL);
257
258 g_vsyscall = 0;
259 /* gettimeofday(NULL, NULL); */
260 uint64_t rax = 0xffffffffff600000;
261 asm volatile (
262 "call *%[rax]"
263 : [rax] "+a" (rax)
264 : "D" (NULL), "S" (NULL)
265 : "rcx", "r11"
266 );
267
268 g_vsyscall = 1;
269 *(volatile int *)0xffffffffff600000UL;
270
271 g_vsyscall = 2;
272 exit(g_vsyscall);
273 }
274 waitpid(pid, &wstatus, 0);
275 if (WIFEXITED(wstatus)) {
276 g_vsyscall = WEXITSTATUS(wstatus);
277 } else {
278 fprintf(stderr, "error: wstatus %08x\n", wstatus);
279 exit(1);
280 }
281}
282
283int main(void)
284{
285 int pipefd[2];
286 int exec_fd;
287
288 vsyscall();
289 switch (g_vsyscall) {
290 case 0:
291 str_vsyscall = str_vsyscall_0;
292 break;
293 case 1:
294 str_vsyscall = str_vsyscall_1;
295 break;
296 case 2:
297 str_vsyscall = str_vsyscall_2;
298 break;
299 default:
300 abort();
301 }
302
303 atexit(ate);
304
305 make_private_tmp();
306
307 /* Reserve fd 0 for 1-byte pipe ping from child. */
308 close(0);
309 if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
310 return 1;
311 }
312
313 exec_fd = make_exe(payload, sizeof(payload));
314
315 if (pipe(pipefd) == -1) {
316 return 1;
317 }
318 if (dup2(pipefd[1], 0) != 0) {
319 return 1;
320 }
321
322 pid = fork();
323 if (pid == -1) {
324 return 1;
325 }
326 if (pid == 0) {
327 sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
328 return 1;
329 }
330
331 char _;
332 if (read(pipefd[0], &_, 1) != 1) {
333 return 1;
334 }
335
336 struct stat st;
337 if (fstat(exec_fd, &st) == -1) {
338 return 1;
339 }
340
341 /* Generate "head -n1 /proc/$PID/maps" */
342 char buf0[256];
343 memset(buf0, ' ', sizeof(buf0));
344 int len = snprintf(buf0, sizeof(buf0),
345 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
346 VADDR, VADDR + PAGE_SIZE,
347 MAJOR(st.st_dev), MINOR(st.st_dev),
348 (unsigned long long)st.st_ino);
349 buf0[len] = ' ';
350 snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
351 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
352
353 /* Test /proc/$PID/maps */
354 {
355 const size_t len = strlen(buf0) + strlen(str_vsyscall);
356 char buf[256];
357 ssize_t rv;
358 int fd;
359
360 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
361 fd = open(buf, O_RDONLY);
362 if (fd == -1) {
363 return 1;
364 }
365 rv = read(fd, buf, sizeof(buf));
366 assert(rv == len);
367 assert(memcmp(buf, buf0, strlen(buf0)) == 0);
368 if (g_vsyscall > 0) {
369 assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
370 }
371 }
372
373 /* Test /proc/$PID/smaps */
374 {
375 char buf[4096];
376 ssize_t rv;
377 int fd;
378
379 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
380 fd = open(buf, O_RDONLY);
381 if (fd == -1) {
382 return 1;
383 }
384 rv = read(fd, buf, sizeof(buf));
385 assert(0 <= rv && rv <= sizeof(buf));
386
387 assert(rv >= strlen(buf0));
388 assert(memcmp(buf, buf0, strlen(buf0)) == 0);
389
390#define RSS1 "Rss: 4 kB\n"
391#define RSS2 "Rss: 0 kB\n"
392#define PSS1 "Pss: 4 kB\n"
393#define PSS2 "Pss: 0 kB\n"
394 assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
395 memmem(buf, rv, RSS2, strlen(RSS2)));
396 assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
397 memmem(buf, rv, PSS2, strlen(PSS2)));
398
399 static const char *S[] = {
400 "Size: 4 kB\n",
401 "KernelPageSize: 4 kB\n",
402 "MMUPageSize: 4 kB\n",
403 "Anonymous: 0 kB\n",
404 "AnonHugePages: 0 kB\n",
405 "Shared_Hugetlb: 0 kB\n",
406 "Private_Hugetlb: 0 kB\n",
407 "Locked: 0 kB\n",
408 };
409 int i;
410
411 for (i = 0; i < ARRAY_SIZE(S); i++) {
412 assert(memmem(buf, rv, S[i], strlen(S[i])));
413 }
414
415 if (g_vsyscall > 0) {
416 assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
417 }
418 }
419
420 /* Test /proc/$PID/smaps_rollup */
421 {
422 char bufr[256];
423 memset(bufr, ' ', sizeof(bufr));
424 len = snprintf(bufr, sizeof(bufr),
425 "%08lx-%08lx ---p 00000000 00:00 0",
426 VADDR, VADDR + PAGE_SIZE);
427 bufr[len] = ' ';
428 snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
429 "[rollup]\n");
430
431 char buf[1024];
432 ssize_t rv;
433 int fd;
434
435 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
436 fd = open(buf, O_RDONLY);
437 if (fd == -1) {
438 return 1;
439 }
440 rv = read(fd, buf, sizeof(buf));
441 assert(0 <= rv && rv <= sizeof(buf));
442
443 assert(rv >= strlen(bufr));
444 assert(memcmp(buf, bufr, strlen(bufr)) == 0);
445
446 assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
447 memmem(buf, rv, RSS2, strlen(RSS2)));
448 assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
449 memmem(buf, rv, PSS2, strlen(PSS2)));
450
451 static const char *S[] = {
452 "Anonymous: 0 kB\n",
453 "AnonHugePages: 0 kB\n",
454 "Shared_Hugetlb: 0 kB\n",
455 "Private_Hugetlb: 0 kB\n",
456 "Locked: 0 kB\n",
457 };
458 int i;
459
460 for (i = 0; i < ARRAY_SIZE(S); i++) {
461 assert(memmem(buf, rv, S[i], strlen(S[i])));
462 }
463 }
464
465 /* Test /proc/$PID/statm */
466 {
467 char buf[64];
468 ssize_t rv;
469 int fd;
470
471 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
472 fd = open(buf, O_RDONLY);
473 if (fd == -1) {
474 return 1;
475 }
476 rv = read(fd, buf, sizeof(buf));
477 assert(rv == 7 * 2);
478
479 assert(buf[0] == '1'); /* ->total_vm */
480 assert(buf[1] == ' ');
481 assert(buf[2] == '0' || buf[2] == '1'); /* rss */
482 assert(buf[3] == ' ');
483 assert(buf[4] == '0' || buf[2] == '1'); /* file rss */
484 assert(buf[5] == ' ');
485 assert(buf[6] == '1'); /* ELF executable segments */
486 assert(buf[7] == ' ');
487 assert(buf[8] == '0');
488 assert(buf[9] == ' ');
489 assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */
490 assert(buf[11] == ' ');
491 assert(buf[12] == '0');
492 assert(buf[13] == '\n');
493 }
494
495 return 0;
496}
497#else
498int main(void)
499{
500 return 4;
501}
502#endif