Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * KVM userfaultfd util
4 * Adapted from demand_paging_test.c
5 *
6 * Copyright (C) 2018, Red Hat, Inc.
7 * Copyright (C) 2019-2022 Google LLC
8 */
9
10#define _GNU_SOURCE /* for pipe2 */
11
12#include <inttypes.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <time.h>
16#include <poll.h>
17#include <pthread.h>
18#include <linux/userfaultfd.h>
19#include <sys/syscall.h>
20
21#include "kvm_util.h"
22#include "test_util.h"
23#include "memstress.h"
24#include "userfaultfd_util.h"
25
26#ifdef __NR_userfaultfd
27
28static void *uffd_handler_thread_fn(void *arg)
29{
30 struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
31 int uffd = uffd_desc->uffd;
32 int pipefd = uffd_desc->pipefds[0];
33 useconds_t delay = uffd_desc->delay;
34 int64_t pages = 0;
35 struct timespec start;
36 struct timespec ts_diff;
37
38 clock_gettime(CLOCK_MONOTONIC, &start);
39 while (1) {
40 struct uffd_msg msg;
41 struct pollfd pollfd[2];
42 char tmp_chr;
43 int r;
44
45 pollfd[0].fd = uffd;
46 pollfd[0].events = POLLIN;
47 pollfd[1].fd = pipefd;
48 pollfd[1].events = POLLIN;
49
50 r = poll(pollfd, 2, -1);
51 switch (r) {
52 case -1:
53 pr_info("poll err");
54 continue;
55 case 0:
56 continue;
57 case 1:
58 break;
59 default:
60 pr_info("Polling uffd returned %d", r);
61 return NULL;
62 }
63
64 if (pollfd[0].revents & POLLERR) {
65 pr_info("uffd revents has POLLERR");
66 return NULL;
67 }
68
69 if (pollfd[1].revents & POLLIN) {
70 r = read(pollfd[1].fd, &tmp_chr, 1);
71 TEST_ASSERT(r == 1,
72 "Error reading pipefd in UFFD thread\n");
73 return NULL;
74 }
75
76 if (!(pollfd[0].revents & POLLIN))
77 continue;
78
79 r = read(uffd, &msg, sizeof(msg));
80 if (r == -1) {
81 if (errno == EAGAIN)
82 continue;
83 pr_info("Read of uffd got errno %d\n", errno);
84 return NULL;
85 }
86
87 if (r != sizeof(msg)) {
88 pr_info("Read on uffd returned unexpected size: %d bytes", r);
89 return NULL;
90 }
91
92 if (!(msg.event & UFFD_EVENT_PAGEFAULT))
93 continue;
94
95 if (delay)
96 usleep(delay);
97 r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
98 if (r < 0)
99 return NULL;
100 pages++;
101 }
102
103 ts_diff = timespec_elapsed(start);
104 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
105 pages, ts_diff.tv_sec, ts_diff.tv_nsec,
106 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
107
108 return NULL;
109}
110
111struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
112 void *hva, uint64_t len,
113 uffd_handler_t handler)
114{
115 struct uffd_desc *uffd_desc;
116 bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
117 int uffd;
118 struct uffdio_api uffdio_api;
119 struct uffdio_register uffdio_register;
120 uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
121 int ret;
122
123 PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
124 is_minor ? "MINOR" : "MISSING",
125 is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
126
127 uffd_desc = malloc(sizeof(struct uffd_desc));
128 TEST_ASSERT(uffd_desc, "malloc failed");
129
130 /* In order to get minor faults, prefault via the alias. */
131 if (is_minor)
132 expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
133
134 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
135 TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
136
137 uffdio_api.api = UFFD_API;
138 uffdio_api.features = 0;
139 TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
140 "ioctl UFFDIO_API failed: %" PRIu64,
141 (uint64_t)uffdio_api.api);
142
143 uffdio_register.range.start = (uint64_t)hva;
144 uffdio_register.range.len = len;
145 uffdio_register.mode = uffd_mode;
146 TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
147 "ioctl UFFDIO_REGISTER failed");
148 TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
149 expected_ioctls, "missing userfaultfd ioctls");
150
151 ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
152 TEST_ASSERT(!ret, "Failed to set up pipefd");
153
154 uffd_desc->uffd_mode = uffd_mode;
155 uffd_desc->uffd = uffd;
156 uffd_desc->delay = delay;
157 uffd_desc->handler = handler;
158 pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
159 uffd_desc);
160
161 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
162 hva, hva + len);
163
164 return uffd_desc;
165}
166
167void uffd_stop_demand_paging(struct uffd_desc *uffd)
168{
169 char c = 0;
170 int ret;
171
172 ret = write(uffd->pipefds[1], &c, 1);
173 TEST_ASSERT(ret == 1, "Unable to write to pipefd");
174
175 ret = pthread_join(uffd->thread, NULL);
176 TEST_ASSERT(ret == 0, "Pthread_join failed.");
177
178 close(uffd->uffd);
179
180 close(uffd->pipefds[1]);
181 close(uffd->pipefds[0]);
182
183 free(uffd);
184}
185
186#endif /* __NR_userfaultfd */
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * KVM userfaultfd util
4 * Adapted from demand_paging_test.c
5 *
6 * Copyright (C) 2018, Red Hat, Inc.
7 * Copyright (C) 2019-2022 Google LLC
8 */
9#include <inttypes.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <time.h>
13#include <poll.h>
14#include <pthread.h>
15#include <linux/userfaultfd.h>
16#include <sys/epoll.h>
17#include <sys/syscall.h>
18
19#include "kvm_util.h"
20#include "test_util.h"
21#include "memstress.h"
22#include "userfaultfd_util.h"
23
24#ifdef __NR_userfaultfd
25
26static void *uffd_handler_thread_fn(void *arg)
27{
28 struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg;
29 int uffd = reader_args->uffd;
30 int64_t pages = 0;
31 struct timespec start;
32 struct timespec ts_diff;
33 struct epoll_event evt;
34 int epollfd;
35
36 epollfd = epoll_create(1);
37 TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
38
39 evt.events = EPOLLIN | EPOLLEXCLUSIVE;
40 evt.data.u32 = 0;
41 TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt),
42 "Failed to add uffd to epollfd");
43
44 evt.events = EPOLLIN;
45 evt.data.u32 = 1;
46 TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt),
47 "Failed to add pipe to epollfd");
48
49 clock_gettime(CLOCK_MONOTONIC, &start);
50 while (1) {
51 struct uffd_msg msg;
52 int r;
53
54 r = epoll_wait(epollfd, &evt, 1, -1);
55 TEST_ASSERT(r == 1,
56 "Unexpected number of events (%d) from epoll, errno = %d",
57 r, errno);
58
59 if (evt.data.u32 == 1) {
60 char tmp_chr;
61
62 TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
63 "Reader thread received EPOLLERR or EPOLLHUP on pipe.");
64 r = read(reader_args->pipe, &tmp_chr, 1);
65 TEST_ASSERT(r == 1,
66 "Error reading pipefd in uffd reader thread");
67 break;
68 }
69
70 TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
71 "Reader thread received EPOLLERR or EPOLLHUP on uffd.");
72
73 r = read(uffd, &msg, sizeof(msg));
74 if (r == -1) {
75 TEST_ASSERT(errno == EAGAIN,
76 "Error reading from UFFD: errno = %d", errno);
77 continue;
78 }
79
80 TEST_ASSERT(r == sizeof(msg),
81 "Read on uffd returned unexpected number of bytes (%d)", r);
82
83 if (!(msg.event & UFFD_EVENT_PAGEFAULT))
84 continue;
85
86 if (reader_args->delay)
87 usleep(reader_args->delay);
88 r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
89 TEST_ASSERT(r >= 0,
90 "Reader thread handler fn returned negative value %d", r);
91 pages++;
92 }
93
94 ts_diff = timespec_elapsed(start);
95 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
96 pages, ts_diff.tv_sec, ts_diff.tv_nsec,
97 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
98
99 return NULL;
100}
101
102struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
103 void *hva, uint64_t len,
104 uint64_t num_readers,
105 uffd_handler_t handler)
106{
107 struct uffd_desc *uffd_desc;
108 bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
109 int uffd;
110 struct uffdio_api uffdio_api;
111 struct uffdio_register uffdio_register;
112 uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
113 int ret, i;
114
115 PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
116 is_minor ? "MINOR" : "MISSING",
117 is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
118
119 uffd_desc = malloc(sizeof(struct uffd_desc));
120 TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
121
122 uffd_desc->pipefds = calloc(sizeof(int), num_readers);
123 TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes");
124
125 uffd_desc->readers = calloc(sizeof(pthread_t), num_readers);
126 TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads");
127
128 uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers);
129 TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args");
130
131 uffd_desc->num_readers = num_readers;
132
133 /* In order to get minor faults, prefault via the alias. */
134 if (is_minor)
135 expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
136
137 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
138 TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
139
140 uffdio_api.api = UFFD_API;
141 uffdio_api.features = 0;
142 TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
143 "ioctl UFFDIO_API failed: %" PRIu64,
144 (uint64_t)uffdio_api.api);
145
146 uffdio_register.range.start = (uint64_t)hva;
147 uffdio_register.range.len = len;
148 uffdio_register.mode = uffd_mode;
149 TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
150 "ioctl UFFDIO_REGISTER failed");
151 TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
152 expected_ioctls, "missing userfaultfd ioctls");
153
154 uffd_desc->uffd = uffd;
155 for (i = 0; i < uffd_desc->num_readers; ++i) {
156 int pipes[2];
157
158 ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK);
159 TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p",
160 i, uffd_desc);
161
162 uffd_desc->pipefds[i] = pipes[1];
163
164 uffd_desc->reader_args[i].uffd_mode = uffd_mode;
165 uffd_desc->reader_args[i].uffd = uffd;
166 uffd_desc->reader_args[i].delay = delay;
167 uffd_desc->reader_args[i].handler = handler;
168 uffd_desc->reader_args[i].pipe = pipes[0];
169
170 pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn,
171 &uffd_desc->reader_args[i]);
172
173 PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n",
174 i, hva, hva + len);
175 }
176
177 return uffd_desc;
178}
179
180void uffd_stop_demand_paging(struct uffd_desc *uffd)
181{
182 char c = 0;
183 int i;
184
185 for (i = 0; i < uffd->num_readers; ++i)
186 TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1,
187 "Unable to write to pipefd %i for uffd_desc %p", i, uffd);
188
189 for (i = 0; i < uffd->num_readers; ++i)
190 TEST_ASSERT(!pthread_join(uffd->readers[i], NULL),
191 "Pthread_join failed on reader %i for uffd_desc %p", i, uffd);
192
193 close(uffd->uffd);
194
195 for (i = 0; i < uffd->num_readers; ++i) {
196 close(uffd->pipefds[i]);
197 close(uffd->reader_args[i].pipe);
198 }
199
200 free(uffd->pipefds);
201 free(uffd->readers);
202 free(uffd->reader_args);
203 free(uffd);
204}
205
206#endif /* __NR_userfaultfd */