Loading...
1/*
2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <unistd.h>
7#include <sched.h>
8#include <signal.h>
9#include <errno.h>
10#include <sys/time.h>
11#include <asm/unistd.h>
12#include "aio.h"
13#include "init.h"
14#include "kern_util.h"
15#include "os.h"
16
17struct aio_thread_req {
18 enum aio_type type;
19 int io_fd;
20 unsigned long long offset;
21 char *buf;
22 int len;
23 struct aio_context *aio;
24};
25
26#if defined(HAVE_AIO_ABI)
27#include <linux/aio_abi.h>
28
29/*
30 * If we have the headers, we are going to build with AIO enabled.
31 * If we don't have aio in libc, we define the necessary stubs here.
32 */
33
34#if !defined(HAVE_AIO_LIBC)
35
36static long io_setup(int n, aio_context_t *ctxp)
37{
38 return syscall(__NR_io_setup, n, ctxp);
39}
40
41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
42{
43 return syscall(__NR_io_submit, ctx, nr, iocbpp);
44}
45
46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
47 struct io_event *events, struct timespec *timeout)
48{
49 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
50}
51
52#endif
53
54/*
55 * The AIO_MMAP cases force the mmapped page into memory here
56 * rather than in whatever place first touches the data. I used
57 * to do this by touching the page, but that's delicate because
58 * gcc is prone to optimizing that away. So, what's done here
59 * is we read from the descriptor from which the page was
60 * mapped. The caller is required to pass an offset which is
61 * inside the page that was mapped. Thus, when the read
62 * returns, we know that the page is in the page cache, and
63 * that it now backs the mmapped area.
64 */
65
66static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
67 int len, unsigned long long offset, struct aio_context *aio)
68{
69 struct iocb *iocbp = & ((struct iocb) {
70 .aio_data = (unsigned long) aio,
71 .aio_fildes = fd,
72 .aio_buf = (unsigned long) buf,
73 .aio_nbytes = len,
74 .aio_offset = offset
75 });
76 char c;
77
78 switch (type) {
79 case AIO_READ:
80 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
81 break;
82 case AIO_WRITE:
83 iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
84 break;
85 case AIO_MMAP:
86 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
87 iocbp->aio_buf = (unsigned long) &c;
88 iocbp->aio_nbytes = sizeof(c);
89 break;
90 default:
91 printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
92 return -EINVAL;
93 }
94
95 return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
96}
97
98/* Initialized in an initcall and unchanged thereafter */
99static aio_context_t ctx = 0;
100
101static int aio_thread(void *arg)
102{
103 struct aio_thread_reply reply;
104 struct io_event event;
105 int err, n, reply_fd;
106
107 signal(SIGWINCH, SIG_IGN);
108
109 while (1) {
110 n = io_getevents(ctx, 1, 1, &event, NULL);
111 if (n < 0) {
112 if (errno == EINTR)
113 continue;
114 printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
115 "errno = %d\n", errno);
116 }
117 else {
118 reply = ((struct aio_thread_reply)
119 { .data = (void *) (long) event.data,
120 .err = event.res });
121 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
122 err = write(reply_fd, &reply, sizeof(reply));
123 if (err != sizeof(reply))
124 printk(UM_KERN_ERR "aio_thread - write failed, "
125 "fd = %d, err = %d\n", reply_fd, errno);
126 }
127 }
128 return 0;
129}
130
131#endif
132
133static int do_not_aio(struct aio_thread_req *req)
134{
135 char c;
136 unsigned long long actual;
137 int n;
138
139 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
140 if (actual != req->offset)
141 return -errno;
142
143 switch (req->type) {
144 case AIO_READ:
145 n = read(req->io_fd, req->buf, req->len);
146 break;
147 case AIO_WRITE:
148 n = write(req->io_fd, req->buf, req->len);
149 break;
150 case AIO_MMAP:
151 n = read(req->io_fd, &c, sizeof(c));
152 break;
153 default:
154 printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
155 req->type);
156 return -EINVAL;
157 }
158
159 if (n < 0)
160 return -errno;
161 return 0;
162}
163
164/* These are initialized in initcalls and not changed */
165static int aio_req_fd_r = -1;
166static int aio_req_fd_w = -1;
167static int aio_pid = -1;
168static unsigned long aio_stack;
169
170static int not_aio_thread(void *arg)
171{
172 struct aio_thread_req req;
173 struct aio_thread_reply reply;
174 int err;
175
176 signal(SIGWINCH, SIG_IGN);
177 while (1) {
178 err = read(aio_req_fd_r, &req, sizeof(req));
179 if (err != sizeof(req)) {
180 if (err < 0)
181 printk(UM_KERN_ERR "not_aio_thread - "
182 "read failed, fd = %d, err = %d\n",
183 aio_req_fd_r,
184 errno);
185 else {
186 printk(UM_KERN_ERR "not_aio_thread - short "
187 "read, fd = %d, length = %d\n",
188 aio_req_fd_r, err);
189 }
190 continue;
191 }
192 err = do_not_aio(&req);
193 reply = ((struct aio_thread_reply) { .data = req.aio,
194 .err = err });
195 err = write(req.aio->reply_fd, &reply, sizeof(reply));
196 if (err != sizeof(reply))
197 printk(UM_KERN_ERR "not_aio_thread - write failed, "
198 "fd = %d, err = %d\n", req.aio->reply_fd, errno);
199 }
200
201 return 0;
202}
203
204static int init_aio_24(void)
205{
206 int fds[2], err;
207
208 err = os_pipe(fds, 1, 1);
209 if (err)
210 goto out;
211
212 aio_req_fd_w = fds[0];
213 aio_req_fd_r = fds[1];
214
215 err = os_set_fd_block(aio_req_fd_w, 0);
216 if (err)
217 goto out_close_pipe;
218
219 err = run_helper_thread(not_aio_thread, NULL,
220 CLONE_FILES | CLONE_VM, &aio_stack);
221 if (err < 0)
222 goto out_close_pipe;
223
224 aio_pid = err;
225 goto out;
226
227out_close_pipe:
228 close(fds[0]);
229 close(fds[1]);
230 aio_req_fd_w = -1;
231 aio_req_fd_r = -1;
232out:
233#ifndef HAVE_AIO_ABI
234 printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
235 "build\n");
236#endif
237 printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
238 "I/O thread\n");
239 return 0;
240}
241
242#ifdef HAVE_AIO_ABI
243#define DEFAULT_24_AIO 0
244static int init_aio_26(void)
245{
246 int err;
247
248 if (io_setup(256, &ctx)) {
249 err = -errno;
250 printk(UM_KERN_ERR "aio_thread failed to initialize context, "
251 "err = %d\n", errno);
252 return err;
253 }
254
255 err = run_helper_thread(aio_thread, NULL,
256 CLONE_FILES | CLONE_VM, &aio_stack);
257 if (err < 0)
258 return err;
259
260 aio_pid = err;
261
262 printk(UM_KERN_INFO "Using 2.6 host AIO\n");
263 return 0;
264}
265
266static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
267 unsigned long long offset, struct aio_context *aio)
268{
269 struct aio_thread_reply reply;
270 int err;
271
272 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
273 if (err) {
274 reply = ((struct aio_thread_reply) { .data = aio,
275 .err = err });
276 err = write(aio->reply_fd, &reply, sizeof(reply));
277 if (err != sizeof(reply)) {
278 err = -errno;
279 printk(UM_KERN_ERR "submit_aio_26 - write failed, "
280 "fd = %d, err = %d\n", aio->reply_fd, -err);
281 }
282 else err = 0;
283 }
284
285 return err;
286}
287
288#else
289#define DEFAULT_24_AIO 1
290static int init_aio_26(void)
291{
292 return -ENOSYS;
293}
294
295static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
296 unsigned long long offset, struct aio_context *aio)
297{
298 return -ENOSYS;
299}
300#endif
301
302/* Initialized in an initcall and unchanged thereafter */
303static int aio_24 = DEFAULT_24_AIO;
304
305static int __init set_aio_24(char *name, int *add)
306{
307 aio_24 = 1;
308 return 0;
309}
310
311__uml_setup("aio=2.4", set_aio_24,
312"aio=2.4\n"
313" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
314" available. 2.4 AIO is a single thread that handles one request at a\n"
315" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
316" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
317" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
318" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
319" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
320" your /usr/include/linux in order to build an AIO-capable UML\n\n"
321);
322
323static int init_aio(void)
324{
325 int err;
326
327 if (!aio_24) {
328 err = init_aio_26();
329 if (err && (errno == ENOSYS)) {
330 printk(UM_KERN_INFO "2.6 AIO not supported on the "
331 "host - reverting to 2.4 AIO\n");
332 aio_24 = 1;
333 }
334 else return err;
335 }
336
337 if (aio_24)
338 return init_aio_24();
339
340 return 0;
341}
342
343/*
344 * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
345 * needs to be called when the kernel is running because it calls run_helper,
346 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
347 * kernel does not run __exitcalls on shutdown, and can't because many of them
348 * break when called outside of module unloading.
349 */
350__initcall(init_aio);
351
352static void exit_aio(void)
353{
354 if (aio_pid != -1) {
355 os_kill_process(aio_pid, 1);
356 free_stack(aio_stack, 0);
357 }
358}
359
360__uml_exitcall(exit_aio);
361
362static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
363 unsigned long long offset, struct aio_context *aio)
364{
365 struct aio_thread_req req = { .type = type,
366 .io_fd = io_fd,
367 .offset = offset,
368 .buf = buf,
369 .len = len,
370 .aio = aio,
371 };
372 int err;
373
374 err = write(aio_req_fd_w, &req, sizeof(req));
375 if (err == sizeof(req))
376 err = 0;
377 else err = -errno;
378
379 return err;
380}
381
382int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
383 unsigned long long offset, int reply_fd,
384 struct aio_context *aio)
385{
386 aio->reply_fd = reply_fd;
387 if (aio_24)
388 return submit_aio_24(type, io_fd, buf, len, offset, aio);
389 else
390 return submit_aio_26(type, io_fd, buf, len, offset, aio);
391}
1/*
2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <unistd.h>
7#include <sched.h>
8#include <signal.h>
9#include <errno.h>
10#include <sys/time.h>
11#include <asm/unistd.h>
12#include "aio.h"
13#include "init.h"
14#include "kern_constants.h"
15#include "kern_util.h"
16#include "os.h"
17#include "user.h"
18
19struct aio_thread_req {
20 enum aio_type type;
21 int io_fd;
22 unsigned long long offset;
23 char *buf;
24 int len;
25 struct aio_context *aio;
26};
27
28#if defined(HAVE_AIO_ABI)
29#include <linux/aio_abi.h>
30
31/*
32 * If we have the headers, we are going to build with AIO enabled.
33 * If we don't have aio in libc, we define the necessary stubs here.
34 */
35
36#if !defined(HAVE_AIO_LIBC)
37
38static long io_setup(int n, aio_context_t *ctxp)
39{
40 return syscall(__NR_io_setup, n, ctxp);
41}
42
43static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
44{
45 return syscall(__NR_io_submit, ctx, nr, iocbpp);
46}
47
48static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
49 struct io_event *events, struct timespec *timeout)
50{
51 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
52}
53
54#endif
55
56/*
57 * The AIO_MMAP cases force the mmapped page into memory here
58 * rather than in whatever place first touches the data. I used
59 * to do this by touching the page, but that's delicate because
60 * gcc is prone to optimizing that away. So, what's done here
61 * is we read from the descriptor from which the page was
62 * mapped. The caller is required to pass an offset which is
63 * inside the page that was mapped. Thus, when the read
64 * returns, we know that the page is in the page cache, and
65 * that it now backs the mmapped area.
66 */
67
68static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
69 int len, unsigned long long offset, struct aio_context *aio)
70{
71 struct iocb *iocbp = & ((struct iocb) {
72 .aio_data = (unsigned long) aio,
73 .aio_fildes = fd,
74 .aio_buf = (unsigned long) buf,
75 .aio_nbytes = len,
76 .aio_offset = offset
77 });
78 char c;
79
80 switch (type) {
81 case AIO_READ:
82 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
83 break;
84 case AIO_WRITE:
85 iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
86 break;
87 case AIO_MMAP:
88 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
89 iocbp->aio_buf = (unsigned long) &c;
90 iocbp->aio_nbytes = sizeof(c);
91 break;
92 default:
93 printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
94 return -EINVAL;
95 }
96
97 return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
98}
99
100/* Initialized in an initcall and unchanged thereafter */
101static aio_context_t ctx = 0;
102
103static int aio_thread(void *arg)
104{
105 struct aio_thread_reply reply;
106 struct io_event event;
107 int err, n, reply_fd;
108
109 signal(SIGWINCH, SIG_IGN);
110
111 while (1) {
112 n = io_getevents(ctx, 1, 1, &event, NULL);
113 if (n < 0) {
114 if (errno == EINTR)
115 continue;
116 printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
117 "errno = %d\n", errno);
118 }
119 else {
120 reply = ((struct aio_thread_reply)
121 { .data = (void *) (long) event.data,
122 .err = event.res });
123 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
124 err = write(reply_fd, &reply, sizeof(reply));
125 if (err != sizeof(reply))
126 printk(UM_KERN_ERR "aio_thread - write failed, "
127 "fd = %d, err = %d\n", reply_fd, errno);
128 }
129 }
130 return 0;
131}
132
133#endif
134
135static int do_not_aio(struct aio_thread_req *req)
136{
137 char c;
138 unsigned long long actual;
139 int n;
140
141 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
142 if (actual != req->offset)
143 return -errno;
144
145 switch (req->type) {
146 case AIO_READ:
147 n = read(req->io_fd, req->buf, req->len);
148 break;
149 case AIO_WRITE:
150 n = write(req->io_fd, req->buf, req->len);
151 break;
152 case AIO_MMAP:
153 n = read(req->io_fd, &c, sizeof(c));
154 break;
155 default:
156 printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
157 req->type);
158 return -EINVAL;
159 }
160
161 if (n < 0)
162 return -errno;
163 return 0;
164}
165
166/* These are initialized in initcalls and not changed */
167static int aio_req_fd_r = -1;
168static int aio_req_fd_w = -1;
169static int aio_pid = -1;
170static unsigned long aio_stack;
171
172static int not_aio_thread(void *arg)
173{
174 struct aio_thread_req req;
175 struct aio_thread_reply reply;
176 int err;
177
178 signal(SIGWINCH, SIG_IGN);
179 while (1) {
180 err = read(aio_req_fd_r, &req, sizeof(req));
181 if (err != sizeof(req)) {
182 if (err < 0)
183 printk(UM_KERN_ERR "not_aio_thread - "
184 "read failed, fd = %d, err = %d\n",
185 aio_req_fd_r,
186 errno);
187 else {
188 printk(UM_KERN_ERR "not_aio_thread - short "
189 "read, fd = %d, length = %d\n",
190 aio_req_fd_r, err);
191 }
192 continue;
193 }
194 err = do_not_aio(&req);
195 reply = ((struct aio_thread_reply) { .data = req.aio,
196 .err = err });
197 err = write(req.aio->reply_fd, &reply, sizeof(reply));
198 if (err != sizeof(reply))
199 printk(UM_KERN_ERR "not_aio_thread - write failed, "
200 "fd = %d, err = %d\n", req.aio->reply_fd, errno);
201 }
202
203 return 0;
204}
205
206static int init_aio_24(void)
207{
208 int fds[2], err;
209
210 err = os_pipe(fds, 1, 1);
211 if (err)
212 goto out;
213
214 aio_req_fd_w = fds[0];
215 aio_req_fd_r = fds[1];
216
217 err = os_set_fd_block(aio_req_fd_w, 0);
218 if (err)
219 goto out_close_pipe;
220
221 err = run_helper_thread(not_aio_thread, NULL,
222 CLONE_FILES | CLONE_VM, &aio_stack);
223 if (err < 0)
224 goto out_close_pipe;
225
226 aio_pid = err;
227 goto out;
228
229out_close_pipe:
230 close(fds[0]);
231 close(fds[1]);
232 aio_req_fd_w = -1;
233 aio_req_fd_r = -1;
234out:
235#ifndef HAVE_AIO_ABI
236 printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
237 "build\n");
238#endif
239 printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
240 "I/O thread\n");
241 return 0;
242}
243
244#ifdef HAVE_AIO_ABI
245#define DEFAULT_24_AIO 0
246static int init_aio_26(void)
247{
248 int err;
249
250 if (io_setup(256, &ctx)) {
251 err = -errno;
252 printk(UM_KERN_ERR "aio_thread failed to initialize context, "
253 "err = %d\n", errno);
254 return err;
255 }
256
257 err = run_helper_thread(aio_thread, NULL,
258 CLONE_FILES | CLONE_VM, &aio_stack);
259 if (err < 0)
260 return err;
261
262 aio_pid = err;
263
264 printk(UM_KERN_INFO "Using 2.6 host AIO\n");
265 return 0;
266}
267
268static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
269 unsigned long long offset, struct aio_context *aio)
270{
271 struct aio_thread_reply reply;
272 int err;
273
274 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
275 if (err) {
276 reply = ((struct aio_thread_reply) { .data = aio,
277 .err = err });
278 err = write(aio->reply_fd, &reply, sizeof(reply));
279 if (err != sizeof(reply)) {
280 err = -errno;
281 printk(UM_KERN_ERR "submit_aio_26 - write failed, "
282 "fd = %d, err = %d\n", aio->reply_fd, -err);
283 }
284 else err = 0;
285 }
286
287 return err;
288}
289
290#else
291#define DEFAULT_24_AIO 1
292static int init_aio_26(void)
293{
294 return -ENOSYS;
295}
296
297static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
298 unsigned long long offset, struct aio_context *aio)
299{
300 return -ENOSYS;
301}
302#endif
303
304/* Initialized in an initcall and unchanged thereafter */
305static int aio_24 = DEFAULT_24_AIO;
306
307static int __init set_aio_24(char *name, int *add)
308{
309 aio_24 = 1;
310 return 0;
311}
312
313__uml_setup("aio=2.4", set_aio_24,
314"aio=2.4\n"
315" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
316" available. 2.4 AIO is a single thread that handles one request at a\n"
317" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
318" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
319" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
320" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
321" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
322" your /usr/include/linux in order to build an AIO-capable UML\n\n"
323);
324
325static int init_aio(void)
326{
327 int err;
328
329 if (!aio_24) {
330 err = init_aio_26();
331 if (err && (errno == ENOSYS)) {
332 printk(UM_KERN_INFO "2.6 AIO not supported on the "
333 "host - reverting to 2.4 AIO\n");
334 aio_24 = 1;
335 }
336 else return err;
337 }
338
339 if (aio_24)
340 return init_aio_24();
341
342 return 0;
343}
344
345/*
346 * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
347 * needs to be called when the kernel is running because it calls run_helper,
348 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
349 * kernel does not run __exitcalls on shutdown, and can't because many of them
350 * break when called outside of module unloading.
351 */
352__initcall(init_aio);
353
354static void exit_aio(void)
355{
356 if (aio_pid != -1) {
357 os_kill_process(aio_pid, 1);
358 free_stack(aio_stack, 0);
359 }
360}
361
362__uml_exitcall(exit_aio);
363
364static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
365 unsigned long long offset, struct aio_context *aio)
366{
367 struct aio_thread_req req = { .type = type,
368 .io_fd = io_fd,
369 .offset = offset,
370 .buf = buf,
371 .len = len,
372 .aio = aio,
373 };
374 int err;
375
376 err = write(aio_req_fd_w, &req, sizeof(req));
377 if (err == sizeof(req))
378 err = 0;
379 else err = -errno;
380
381 return err;
382}
383
384int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
385 unsigned long long offset, int reply_fd,
386 struct aio_context *aio)
387{
388 aio->reply_fd = reply_fd;
389 if (aio_24)
390 return submit_aio_24(type, io_fd, buf, len, offset, aio);
391 else
392 return submit_aio_26(type, io_fd, buf, len, offset, aio);
393}