Loading...
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define UBD_SHIFT 4
21
22#include <linux/module.h>
23#include <linux/init.h>
24#include <linux/blkdev.h>
25#include <linux/ata.h>
26#include <linux/hdreg.h>
27#include <linux/cdrom.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h>
30#include <linux/ctype.h>
31#include <linux/slab.h>
32#include <linux/vmalloc.h>
33#include <linux/platform_device.h>
34#include <linux/scatterlist.h>
35#include <asm/tlbflush.h>
36#include "kern_util.h"
37#include "mconsole_kern.h"
38#include "init.h"
39#include "irq_kern.h"
40#include "ubd.h"
41#include "os.h"
42#include "cow.h"
43
44enum ubd_req { UBD_READ, UBD_WRITE };
45
46struct io_thread_req {
47 struct request *req;
48 enum ubd_req op;
49 int fds[2];
50 unsigned long offsets[2];
51 unsigned long long offset;
52 unsigned long length;
53 char *buffer;
54 int sectorsize;
55 unsigned long sector_mask;
56 unsigned long long cow_offset;
57 unsigned long bitmap_words[2];
58 int error;
59};
60
61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
62{
63 __u64 n;
64 int bits, off;
65
66 bits = sizeof(data[0]) * 8;
67 n = bit / bits;
68 off = bit % bits;
69 return (data[n] & (1 << off)) != 0;
70}
71
72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
73{
74 __u64 n;
75 int bits, off;
76
77 bits = sizeof(data[0]) * 8;
78 n = bit / bits;
79 off = bit % bits;
80 data[n] |= (1 << off);
81}
82/*End stuff from ubd_user.h*/
83
84#define DRIVER_NAME "uml-blkdev"
85
86static DEFINE_MUTEX(ubd_lock);
87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
88
89static int ubd_open(struct block_device *bdev, fmode_t mode);
90static int ubd_release(struct gendisk *disk, fmode_t mode);
91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
92 unsigned int cmd, unsigned long arg);
93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
94
95#define MAX_DEV (16)
96
97static const struct block_device_operations ubd_blops = {
98 .owner = THIS_MODULE,
99 .open = ubd_open,
100 .release = ubd_release,
101 .ioctl = ubd_ioctl,
102 .getgeo = ubd_getgeo,
103};
104
105/* Protected by ubd_lock */
106static int fake_major = UBD_MAJOR;
107static struct gendisk *ubd_gendisk[MAX_DEV];
108static struct gendisk *fake_gendisk[MAX_DEV];
109
110#ifdef CONFIG_BLK_DEV_UBD_SYNC
111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
112 .cl = 1 })
113#else
114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
115 .cl = 1 })
116#endif
117static struct openflags global_openflags = OPEN_FLAGS;
118
119struct cow {
120 /* backing file name */
121 char *file;
122 /* backing file fd */
123 int fd;
124 unsigned long *bitmap;
125 unsigned long bitmap_len;
126 int bitmap_offset;
127 int data_offset;
128};
129
130#define MAX_SG 64
131
132struct ubd {
133 struct list_head restart;
134 /* name (and fd, below) of the file opened for writing, either the
135 * backing or the cow file. */
136 char *file;
137 int count;
138 int fd;
139 __u64 size;
140 struct openflags boot_openflags;
141 struct openflags openflags;
142 unsigned shared:1;
143 unsigned no_cow:1;
144 struct cow cow;
145 struct platform_device pdev;
146 struct request_queue *queue;
147 spinlock_t lock;
148 struct scatterlist sg[MAX_SG];
149 struct request *request;
150 int start_sg, end_sg;
151 sector_t rq_pos;
152};
153
154#define DEFAULT_COW { \
155 .file = NULL, \
156 .fd = -1, \
157 .bitmap = NULL, \
158 .bitmap_offset = 0, \
159 .data_offset = 0, \
160}
161
162#define DEFAULT_UBD { \
163 .file = NULL, \
164 .count = 0, \
165 .fd = -1, \
166 .size = -1, \
167 .boot_openflags = OPEN_FLAGS, \
168 .openflags = OPEN_FLAGS, \
169 .no_cow = 0, \
170 .shared = 0, \
171 .cow = DEFAULT_COW, \
172 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
173 .request = NULL, \
174 .start_sg = 0, \
175 .end_sg = 0, \
176 .rq_pos = 0, \
177}
178
179/* Protected by ubd_lock */
180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
181
182/* Only changed by fake_ide_setup which is a setup */
183static int fake_ide = 0;
184static struct proc_dir_entry *proc_ide_root = NULL;
185static struct proc_dir_entry *proc_ide = NULL;
186
187static void make_proc_ide(void)
188{
189 proc_ide_root = proc_mkdir("ide", NULL);
190 proc_ide = proc_mkdir("ide0", proc_ide_root);
191}
192
193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
194{
195 seq_puts(m, "disk\n");
196 return 0;
197}
198
199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, fake_ide_media_proc_show, NULL);
202}
203
204static const struct file_operations fake_ide_media_proc_fops = {
205 .owner = THIS_MODULE,
206 .open = fake_ide_media_proc_open,
207 .read = seq_read,
208 .llseek = seq_lseek,
209 .release = single_release,
210};
211
212static void make_ide_entries(const char *dev_name)
213{
214 struct proc_dir_entry *dir, *ent;
215 char name[64];
216
217 if(proc_ide_root == NULL) make_proc_ide();
218
219 dir = proc_mkdir(dev_name, proc_ide);
220 if(!dir) return;
221
222 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
223 if(!ent) return;
224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
231 return 1;
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
249 return -1;
250 *ptr = end;
251 }
252 else if (('a' <= *str) && (*str <= 'z')) {
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
257 return n;
258}
259
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
264static int ubd_setup_common(char *str, int *index_out, char **error_out)
265{
266 struct ubd *ubd_dev;
267 struct openflags flags = global_openflags;
268 char *backing_file;
269 int n, err = 0, i;
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
280 goto out1;
281 }
282
283 err = -EINVAL;
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
286 *error_out = "Didn't parse major number";
287 goto out1;
288 }
289
290 mutex_lock(&ubd_lock);
291 if (fake_major != UBD_MAJOR) {
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
295
296 fake_major = major;
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
310 }
311 if(n >= MAX_DEV){
312 *error_out = "Device number out of range";
313 return 1;
314 }
315
316 err = -EBUSY;
317 mutex_lock(&ubd_lock);
318
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
321 *error_out = "Device is already configured";
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
328 err = -EINVAL;
329 for (i = 0; i < sizeof("rscd="); i++) {
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
338 ubd_dev->no_cow = 1;
339 break;
340 case 'c':
341 ubd_dev->shared = 1;
342 break;
343 case '=':
344 str++;
345 goto break_loop;
346 default:
347 *error_out = "Expected '=' or flag letter "
348 "(r, s, c, or d)";
349 goto out;
350 }
351 str++;
352 }
353
354 if (*str == '=')
355 *error_out = "Too many flags specified";
356 else
357 *error_out = "Missing '='";
358 goto out;
359
360break_loop:
361 backing_file = strchr(str, ',');
362
363 if (backing_file == NULL)
364 backing_file = strchr(str, ':');
365
366 if(backing_file != NULL){
367 if(ubd_dev->no_cow){
368 *error_out = "Can't specify both 'd' and a cow file";
369 goto out;
370 }
371 else {
372 *backing_file = '\0';
373 backing_file++;
374 }
375 }
376 err = 0;
377 ubd_dev->file = str;
378 ubd_dev->cow.file = backing_file;
379 ubd_dev->boot_openflags = flags;
380out:
381 mutex_unlock(&ubd_lock);
382 return err;
383}
384
385static int ubd_setup(char *str)
386{
387 char *error;
388 int err;
389
390 err = ubd_setup_common(str, NULL, &error);
391 if(err)
392 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393 "%s\n", str, error);
394 return 1;
395}
396
397__setup("ubd", ubd_setup);
398__uml_help(ubd_setup,
399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400" This is used to associate a device with a file in the underlying\n"
401" filesystem. When specifying two filenames, the first one is the\n"
402" COW name and the second is the backing file name. As separator you can\n"
403" use either a ':' or a ',': the first one allows writing things like;\n"
404" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405" while with a ',' the shell would not expand the 2nd '~'.\n"
406" When using only one filename, UML will detect whether to treat it like\n"
407" a COW file or a backing file. To override this detection, add the 'd'\n"
408" flag:\n"
409" ubd0d=BackingFile\n"
410" Usually, there is a filesystem in the file, but \n"
411" that's not required. Swap devices containing swap files can be\n"
412" specified like this. Also, a file which doesn't contain a\n"
413" filesystem can have its contents read in the virtual \n"
414" machine by running 'dd' on the device. <n> must be in the range\n"
415" 0 to 7. Appending an 'r' to the number will cause that device\n"
416" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
417" an 's' will cause data to be written to disk on the host immediately.\n"
418" 'c' will cause the device to be treated as being shared between multiple\n"
419" UMLs and file locking will be turned off - this is appropriate for a\n"
420" cluster filesystem and inappropriate at almost all other times.\n\n"
421);
422
423static int udb_setup(char *str)
424{
425 printk("udb%s specified on command line is almost certainly a ubd -> "
426 "udb TYPO\n", str);
427 return 1;
428}
429
430__setup("udb", udb_setup);
431__uml_help(udb_setup,
432"udb\n"
433" This option is here solely to catch ubd -> udb typos, which can be\n"
434" to impossible to catch visually unless you specifically look for\n"
435" them. The only result of any option starting with 'udb' is an error\n"
436" in the boot output.\n\n"
437);
438
439static void do_ubd_request(struct request_queue * q);
440
441/* Only changed by ubd_init, which is an initcall. */
442static int thread_fd = -1;
443static LIST_HEAD(restart);
444
445/* XXX - move this inside ubd_intr. */
446/* Called without dev->lock held, and only in interrupt context. */
447static void ubd_handler(void)
448{
449 struct io_thread_req *req;
450 struct ubd *ubd;
451 struct list_head *list, *next_ele;
452 unsigned long flags;
453 int n;
454
455 while(1){
456 n = os_read_file(thread_fd, &req,
457 sizeof(struct io_thread_req *));
458 if(n != sizeof(req)){
459 if(n == -EAGAIN)
460 break;
461 printk(KERN_ERR "spurious interrupt in ubd_handler, "
462 "err = %d\n", -n);
463 return;
464 }
465
466 blk_end_request(req->req, 0, req->length);
467 kfree(req);
468 }
469 reactivate_fd(thread_fd, UBD_IRQ);
470
471 list_for_each_safe(list, next_ele, &restart){
472 ubd = container_of(list, struct ubd, restart);
473 list_del_init(&ubd->restart);
474 spin_lock_irqsave(&ubd->lock, flags);
475 do_ubd_request(ubd->queue);
476 spin_unlock_irqrestore(&ubd->lock, flags);
477 }
478}
479
480static irqreturn_t ubd_intr(int irq, void *dev)
481{
482 ubd_handler();
483 return IRQ_HANDLED;
484}
485
486/* Only changed by ubd_init, which is an initcall. */
487static int io_pid = -1;
488
489static void kill_io_thread(void)
490{
491 if(io_pid != -1)
492 os_kill_process(io_pid, 1);
493}
494
495__uml_exitcall(kill_io_thread);
496
497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
498{
499 char *file;
500 int fd;
501 int err;
502
503 __u32 version;
504 __u32 align;
505 char *backing_file;
506 time_t mtime;
507 unsigned long long size;
508 int sector_size;
509 int bitmap_offset;
510
511 if (ubd_dev->file && ubd_dev->cow.file) {
512 file = ubd_dev->cow.file;
513
514 goto out;
515 }
516
517 fd = os_open_file(ubd_dev->file, global_openflags, 0);
518 if (fd < 0)
519 return fd;
520
521 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
522 &mtime, &size, §or_size, &align, &bitmap_offset);
523 os_close_file(fd);
524
525 if(err == -EINVAL)
526 file = ubd_dev->file;
527 else
528 file = backing_file;
529
530out:
531 return os_file_size(file, size_out);
532}
533
534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
535{
536 int err;
537
538 err = os_seek_file(fd, offset);
539 if (err < 0)
540 return err;
541
542 err = os_read_file(fd, buf, len);
543 if (err < 0)
544 return err;
545
546 return 0;
547}
548
549static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
550{
551 unsigned long modtime;
552 unsigned long long actual;
553 int err;
554
555 err = os_file_modtime(file, &modtime);
556 if (err < 0) {
557 printk(KERN_ERR "Failed to get modification time of backing "
558 "file \"%s\", err = %d\n", file, -err);
559 return err;
560 }
561
562 err = os_file_size(file, &actual);
563 if (err < 0) {
564 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
565 "err = %d\n", file, -err);
566 return err;
567 }
568
569 if (actual != size) {
570 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
571 * the typecast.*/
572 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
573 "vs backing file\n", (unsigned long long) size, actual);
574 return -EINVAL;
575 }
576 if (modtime != mtime) {
577 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
578 "backing file\n", mtime, modtime);
579 return -EINVAL;
580 }
581 return 0;
582}
583
584static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
585{
586 struct uml_stat buf1, buf2;
587 int err;
588
589 if (from_cmdline == NULL)
590 return 0;
591 if (!strcmp(from_cmdline, from_cow))
592 return 0;
593
594 err = os_stat_file(from_cmdline, &buf1);
595 if (err < 0) {
596 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
597 -err);
598 return 0;
599 }
600 err = os_stat_file(from_cow, &buf2);
601 if (err < 0) {
602 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
603 -err);
604 return 1;
605 }
606 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
607 return 0;
608
609 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
610 "\"%s\" specified in COW header of \"%s\"\n",
611 from_cmdline, from_cow, cow);
612 return 1;
613}
614
615static int open_ubd_file(char *file, struct openflags *openflags, int shared,
616 char **backing_file_out, int *bitmap_offset_out,
617 unsigned long *bitmap_len_out, int *data_offset_out,
618 int *create_cow_out)
619{
620 time_t mtime;
621 unsigned long long size;
622 __u32 version, align;
623 char *backing_file;
624 int fd, err, sectorsize, asked_switch, mode = 0644;
625
626 fd = os_open_file(file, *openflags, mode);
627 if (fd < 0) {
628 if ((fd == -ENOENT) && (create_cow_out != NULL))
629 *create_cow_out = 1;
630 if (!openflags->w ||
631 ((fd != -EROFS) && (fd != -EACCES)))
632 return fd;
633 openflags->w = 0;
634 fd = os_open_file(file, *openflags, mode);
635 if (fd < 0)
636 return fd;
637 }
638
639 if (shared)
640 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
641 else {
642 err = os_lock_file(fd, openflags->w);
643 if (err < 0) {
644 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
645 file, -err);
646 goto out_close;
647 }
648 }
649
650 /* Successful return case! */
651 if (backing_file_out == NULL)
652 return fd;
653
654 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
655 &size, §orsize, &align, bitmap_offset_out);
656 if (err && (*backing_file_out != NULL)) {
657 printk(KERN_ERR "Failed to read COW header from COW file "
658 "\"%s\", errno = %d\n", file, -err);
659 goto out_close;
660 }
661 if (err)
662 return fd;
663
664 asked_switch = path_requires_switch(*backing_file_out, backing_file,
665 file);
666
667 /* Allow switching only if no mismatch. */
668 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
669 mtime)) {
670 printk(KERN_ERR "Switching backing file to '%s'\n",
671 *backing_file_out);
672 err = write_cow_header(file, fd, *backing_file_out,
673 sectorsize, align, &size);
674 if (err) {
675 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
676 goto out_close;
677 }
678 } else {
679 *backing_file_out = backing_file;
680 err = backing_file_mismatch(*backing_file_out, size, mtime);
681 if (err)
682 goto out_close;
683 }
684
685 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
686 bitmap_len_out, data_offset_out);
687
688 return fd;
689 out_close:
690 os_close_file(fd);
691 return err;
692}
693
694static int create_cow_file(char *cow_file, char *backing_file,
695 struct openflags flags,
696 int sectorsize, int alignment, int *bitmap_offset_out,
697 unsigned long *bitmap_len_out, int *data_offset_out)
698{
699 int err, fd;
700
701 flags.c = 1;
702 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
703 if (fd < 0) {
704 err = fd;
705 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
706 cow_file, -err);
707 goto out;
708 }
709
710 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
711 bitmap_offset_out, bitmap_len_out,
712 data_offset_out);
713 if (!err)
714 return fd;
715 os_close_file(fd);
716 out:
717 return err;
718}
719
720static void ubd_close_dev(struct ubd *ubd_dev)
721{
722 os_close_file(ubd_dev->fd);
723 if(ubd_dev->cow.file == NULL)
724 return;
725
726 os_close_file(ubd_dev->cow.fd);
727 vfree(ubd_dev->cow.bitmap);
728 ubd_dev->cow.bitmap = NULL;
729}
730
731static int ubd_open_dev(struct ubd *ubd_dev)
732{
733 struct openflags flags;
734 char **back_ptr;
735 int err, create_cow, *create_ptr;
736 int fd;
737
738 ubd_dev->openflags = ubd_dev->boot_openflags;
739 create_cow = 0;
740 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
741 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
742
743 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
744 back_ptr, &ubd_dev->cow.bitmap_offset,
745 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
746 create_ptr);
747
748 if((fd == -ENOENT) && create_cow){
749 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
750 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
751 &ubd_dev->cow.bitmap_offset,
752 &ubd_dev->cow.bitmap_len,
753 &ubd_dev->cow.data_offset);
754 if(fd >= 0){
755 printk(KERN_INFO "Creating \"%s\" as COW file for "
756 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
757 }
758 }
759
760 if(fd < 0){
761 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
762 -fd);
763 return fd;
764 }
765 ubd_dev->fd = fd;
766
767 if(ubd_dev->cow.file != NULL){
768 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
769
770 err = -ENOMEM;
771 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
772 if(ubd_dev->cow.bitmap == NULL){
773 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
774 goto error;
775 }
776 flush_tlb_kernel_vm();
777
778 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
779 ubd_dev->cow.bitmap_offset,
780 ubd_dev->cow.bitmap_len);
781 if(err < 0)
782 goto error;
783
784 flags = ubd_dev->openflags;
785 flags.w = 0;
786 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
787 NULL, NULL, NULL, NULL);
788 if(err < 0) goto error;
789 ubd_dev->cow.fd = err;
790 }
791 return 0;
792 error:
793 os_close_file(ubd_dev->fd);
794 return err;
795}
796
797static void ubd_device_release(struct device *dev)
798{
799 struct ubd *ubd_dev = dev_get_drvdata(dev);
800
801 blk_cleanup_queue(ubd_dev->queue);
802 *ubd_dev = ((struct ubd) DEFAULT_UBD);
803}
804
805static int ubd_disk_register(int major, u64 size, int unit,
806 struct gendisk **disk_out)
807{
808 struct gendisk *disk;
809
810 disk = alloc_disk(1 << UBD_SHIFT);
811 if(disk == NULL)
812 return -ENOMEM;
813
814 disk->major = major;
815 disk->first_minor = unit << UBD_SHIFT;
816 disk->fops = &ubd_blops;
817 set_capacity(disk, size / 512);
818 if (major == UBD_MAJOR)
819 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
820 else
821 sprintf(disk->disk_name, "ubd_fake%d", unit);
822
823 /* sysfs register (not for ide fake devices) */
824 if (major == UBD_MAJOR) {
825 ubd_devs[unit].pdev.id = unit;
826 ubd_devs[unit].pdev.name = DRIVER_NAME;
827 ubd_devs[unit].pdev.dev.release = ubd_device_release;
828 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
829 platform_device_register(&ubd_devs[unit].pdev);
830 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
831 }
832
833 disk->private_data = &ubd_devs[unit];
834 disk->queue = ubd_devs[unit].queue;
835 add_disk(disk);
836
837 *disk_out = disk;
838 return 0;
839}
840
841#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
842
843static int ubd_add(int n, char **error_out)
844{
845 struct ubd *ubd_dev = &ubd_devs[n];
846 int err = 0;
847
848 if(ubd_dev->file == NULL)
849 goto out;
850
851 err = ubd_file_size(ubd_dev, &ubd_dev->size);
852 if(err < 0){
853 *error_out = "Couldn't determine size of device's file";
854 goto out;
855 }
856
857 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
858
859 INIT_LIST_HEAD(&ubd_dev->restart);
860 sg_init_table(ubd_dev->sg, MAX_SG);
861
862 err = -ENOMEM;
863 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
864 if (ubd_dev->queue == NULL) {
865 *error_out = "Failed to initialize device queue";
866 goto out;
867 }
868 ubd_dev->queue->queuedata = ubd_dev;
869
870 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
871 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
872 if(err){
873 *error_out = "Failed to register device";
874 goto out_cleanup;
875 }
876
877 if (fake_major != UBD_MAJOR)
878 ubd_disk_register(fake_major, ubd_dev->size, n,
879 &fake_gendisk[n]);
880
881 /*
882 * Perhaps this should also be under the "if (fake_major)" above
883 * using the fake_disk->disk_name
884 */
885 if (fake_ide)
886 make_ide_entries(ubd_gendisk[n]->disk_name);
887
888 err = 0;
889out:
890 return err;
891
892out_cleanup:
893 blk_cleanup_queue(ubd_dev->queue);
894 goto out;
895}
896
897static int ubd_config(char *str, char **error_out)
898{
899 int n, ret;
900
901 /* This string is possibly broken up and stored, so it's only
902 * freed if ubd_setup_common fails, or if only general options
903 * were set.
904 */
905 str = kstrdup(str, GFP_KERNEL);
906 if (str == NULL) {
907 *error_out = "Failed to allocate memory";
908 return -ENOMEM;
909 }
910
911 ret = ubd_setup_common(str, &n, error_out);
912 if (ret)
913 goto err_free;
914
915 if (n == -1) {
916 ret = 0;
917 goto err_free;
918 }
919
920 mutex_lock(&ubd_lock);
921 ret = ubd_add(n, error_out);
922 if (ret)
923 ubd_devs[n].file = NULL;
924 mutex_unlock(&ubd_lock);
925
926out:
927 return ret;
928
929err_free:
930 kfree(str);
931 goto out;
932}
933
934static int ubd_get_config(char *name, char *str, int size, char **error_out)
935{
936 struct ubd *ubd_dev;
937 int n, len = 0;
938
939 n = parse_unit(&name);
940 if((n >= MAX_DEV) || (n < 0)){
941 *error_out = "ubd_get_config : device number out of range";
942 return -1;
943 }
944
945 ubd_dev = &ubd_devs[n];
946 mutex_lock(&ubd_lock);
947
948 if(ubd_dev->file == NULL){
949 CONFIG_CHUNK(str, size, len, "", 1);
950 goto out;
951 }
952
953 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
954
955 if(ubd_dev->cow.file != NULL){
956 CONFIG_CHUNK(str, size, len, ",", 0);
957 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
958 }
959 else CONFIG_CHUNK(str, size, len, "", 1);
960
961 out:
962 mutex_unlock(&ubd_lock);
963 return len;
964}
965
966static int ubd_id(char **str, int *start_out, int *end_out)
967{
968 int n;
969
970 n = parse_unit(str);
971 *start_out = 0;
972 *end_out = MAX_DEV - 1;
973 return n;
974}
975
976static int ubd_remove(int n, char **error_out)
977{
978 struct gendisk *disk = ubd_gendisk[n];
979 struct ubd *ubd_dev;
980 int err = -ENODEV;
981
982 mutex_lock(&ubd_lock);
983
984 ubd_dev = &ubd_devs[n];
985
986 if(ubd_dev->file == NULL)
987 goto out;
988
989 /* you cannot remove a open disk */
990 err = -EBUSY;
991 if(ubd_dev->count > 0)
992 goto out;
993
994 ubd_gendisk[n] = NULL;
995 if(disk != NULL){
996 del_gendisk(disk);
997 put_disk(disk);
998 }
999
1000 if(fake_gendisk[n] != NULL){
1001 del_gendisk(fake_gendisk[n]);
1002 put_disk(fake_gendisk[n]);
1003 fake_gendisk[n] = NULL;
1004 }
1005
1006 err = 0;
1007 platform_device_unregister(&ubd_dev->pdev);
1008out:
1009 mutex_unlock(&ubd_lock);
1010 return err;
1011}
1012
1013/* All these are called by mconsole in process context and without
1014 * ubd-specific locks. The structure itself is const except for .list.
1015 */
1016static struct mc_device ubd_mc = {
1017 .list = LIST_HEAD_INIT(ubd_mc.list),
1018 .name = "ubd",
1019 .config = ubd_config,
1020 .get_config = ubd_get_config,
1021 .id = ubd_id,
1022 .remove = ubd_remove,
1023};
1024
1025static int __init ubd_mc_init(void)
1026{
1027 mconsole_register_dev(&ubd_mc);
1028 return 0;
1029}
1030
1031__initcall(ubd_mc_init);
1032
1033static int __init ubd0_init(void)
1034{
1035 struct ubd *ubd_dev = &ubd_devs[0];
1036
1037 mutex_lock(&ubd_lock);
1038 if(ubd_dev->file == NULL)
1039 ubd_dev->file = "root_fs";
1040 mutex_unlock(&ubd_lock);
1041
1042 return 0;
1043}
1044
1045__initcall(ubd0_init);
1046
1047/* Used in ubd_init, which is an initcall */
1048static struct platform_driver ubd_driver = {
1049 .driver = {
1050 .name = DRIVER_NAME,
1051 },
1052};
1053
1054static int __init ubd_init(void)
1055{
1056 char *error;
1057 int i, err;
1058
1059 if (register_blkdev(UBD_MAJOR, "ubd"))
1060 return -1;
1061
1062 if (fake_major != UBD_MAJOR) {
1063 char name[sizeof("ubd_nnn\0")];
1064
1065 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1066 if (register_blkdev(fake_major, "ubd"))
1067 return -1;
1068 }
1069 platform_driver_register(&ubd_driver);
1070 mutex_lock(&ubd_lock);
1071 for (i = 0; i < MAX_DEV; i++){
1072 err = ubd_add(i, &error);
1073 if(err)
1074 printk(KERN_ERR "Failed to initialize ubd device %d :"
1075 "%s\n", i, error);
1076 }
1077 mutex_unlock(&ubd_lock);
1078 return 0;
1079}
1080
1081late_initcall(ubd_init);
1082
1083static int __init ubd_driver_init(void){
1084 unsigned long stack;
1085 int err;
1086
1087 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1088 if(global_openflags.s){
1089 printk(KERN_INFO "ubd: Synchronous mode\n");
1090 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1091 * enough. So use anyway the io thread. */
1092 }
1093 stack = alloc_stack(0, 0);
1094 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1095 &thread_fd);
1096 if(io_pid < 0){
1097 printk(KERN_ERR
1098 "ubd : Failed to start I/O thread (errno = %d) - "
1099 "falling back to synchronous I/O\n", -io_pid);
1100 io_pid = -1;
1101 return 0;
1102 }
1103 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1104 0, "ubd", ubd_devs);
1105 if(err != 0)
1106 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1107 return 0;
1108}
1109
1110device_initcall(ubd_driver_init);
1111
1112static int ubd_open(struct block_device *bdev, fmode_t mode)
1113{
1114 struct gendisk *disk = bdev->bd_disk;
1115 struct ubd *ubd_dev = disk->private_data;
1116 int err = 0;
1117
1118 mutex_lock(&ubd_mutex);
1119 if(ubd_dev->count == 0){
1120 err = ubd_open_dev(ubd_dev);
1121 if(err){
1122 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1123 disk->disk_name, ubd_dev->file, -err);
1124 goto out;
1125 }
1126 }
1127 ubd_dev->count++;
1128 set_disk_ro(disk, !ubd_dev->openflags.w);
1129
1130 /* This should no more be needed. And it didn't work anyway to exclude
1131 * read-write remounting of filesystems.*/
1132 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1133 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1134 err = -EROFS;
1135 }*/
1136out:
1137 mutex_unlock(&ubd_mutex);
1138 return err;
1139}
1140
1141static int ubd_release(struct gendisk *disk, fmode_t mode)
1142{
1143 struct ubd *ubd_dev = disk->private_data;
1144
1145 mutex_lock(&ubd_mutex);
1146 if(--ubd_dev->count == 0)
1147 ubd_close_dev(ubd_dev);
1148 mutex_unlock(&ubd_mutex);
1149 return 0;
1150}
1151
1152static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1153 __u64 *cow_offset, unsigned long *bitmap,
1154 __u64 bitmap_offset, unsigned long *bitmap_words,
1155 __u64 bitmap_len)
1156{
1157 __u64 sector = io_offset >> 9;
1158 int i, update_bitmap = 0;
1159
1160 for(i = 0; i < length >> 9; i++){
1161 if(cow_mask != NULL)
1162 ubd_set_bit(i, (unsigned char *) cow_mask);
1163 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1164 continue;
1165
1166 update_bitmap = 1;
1167 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1168 }
1169
1170 if(!update_bitmap)
1171 return;
1172
1173 *cow_offset = sector / (sizeof(unsigned long) * 8);
1174
1175 /* This takes care of the case where we're exactly at the end of the
1176 * device, and *cow_offset + 1 is off the end. So, just back it up
1177 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1178 * for the original diagnosis.
1179 */
1180 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1181 sizeof(unsigned long)) - 1))
1182 (*cow_offset)--;
1183
1184 bitmap_words[0] = bitmap[*cow_offset];
1185 bitmap_words[1] = bitmap[*cow_offset + 1];
1186
1187 *cow_offset *= sizeof(unsigned long);
1188 *cow_offset += bitmap_offset;
1189}
1190
1191static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1192 __u64 bitmap_offset, __u64 bitmap_len)
1193{
1194 __u64 sector = req->offset >> 9;
1195 int i;
1196
1197 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1198 panic("Operation too long");
1199
1200 if(req->op == UBD_READ) {
1201 for(i = 0; i < req->length >> 9; i++){
1202 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1203 ubd_set_bit(i, (unsigned char *)
1204 &req->sector_mask);
1205 }
1206 }
1207 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1208 &req->cow_offset, bitmap, bitmap_offset,
1209 req->bitmap_words, bitmap_len);
1210}
1211
1212/* Called with dev->lock held */
1213static void prepare_request(struct request *req, struct io_thread_req *io_req,
1214 unsigned long long offset, int page_offset,
1215 int len, struct page *page)
1216{
1217 struct gendisk *disk = req->rq_disk;
1218 struct ubd *ubd_dev = disk->private_data;
1219
1220 io_req->req = req;
1221 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1222 ubd_dev->fd;
1223 io_req->fds[1] = ubd_dev->fd;
1224 io_req->cow_offset = -1;
1225 io_req->offset = offset;
1226 io_req->length = len;
1227 io_req->error = 0;
1228 io_req->sector_mask = 0;
1229
1230 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1231 io_req->offsets[0] = 0;
1232 io_req->offsets[1] = ubd_dev->cow.data_offset;
1233 io_req->buffer = page_address(page) + page_offset;
1234 io_req->sectorsize = 1 << 9;
1235
1236 if(ubd_dev->cow.file != NULL)
1237 cowify_req(io_req, ubd_dev->cow.bitmap,
1238 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1239
1240}
1241
1242/* Called with dev->lock held */
1243static void do_ubd_request(struct request_queue *q)
1244{
1245 struct io_thread_req *io_req;
1246 struct request *req;
1247 int n;
1248
1249 while(1){
1250 struct ubd *dev = q->queuedata;
1251 if(dev->end_sg == 0){
1252 struct request *req = blk_fetch_request(q);
1253 if(req == NULL)
1254 return;
1255
1256 dev->request = req;
1257 dev->rq_pos = blk_rq_pos(req);
1258 dev->start_sg = 0;
1259 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1260 }
1261
1262 req = dev->request;
1263 while(dev->start_sg < dev->end_sg){
1264 struct scatterlist *sg = &dev->sg[dev->start_sg];
1265
1266 io_req = kmalloc(sizeof(struct io_thread_req),
1267 GFP_ATOMIC);
1268 if(io_req == NULL){
1269 if(list_empty(&dev->restart))
1270 list_add(&dev->restart, &restart);
1271 return;
1272 }
1273 prepare_request(req, io_req,
1274 (unsigned long long)dev->rq_pos << 9,
1275 sg->offset, sg->length, sg_page(sg));
1276
1277 n = os_write_file(thread_fd, &io_req,
1278 sizeof(struct io_thread_req *));
1279 if(n != sizeof(struct io_thread_req *)){
1280 if(n != -EAGAIN)
1281 printk("write to io thread failed, "
1282 "errno = %d\n", -n);
1283 else if(list_empty(&dev->restart))
1284 list_add(&dev->restart, &restart);
1285 kfree(io_req);
1286 return;
1287 }
1288
1289 dev->rq_pos += sg->length >> 9;
1290 dev->start_sg++;
1291 }
1292 dev->end_sg = 0;
1293 dev->request = NULL;
1294 }
1295}
1296
1297static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1298{
1299 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1300
1301 geo->heads = 128;
1302 geo->sectors = 32;
1303 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1304 return 0;
1305}
1306
1307static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1308 unsigned int cmd, unsigned long arg)
1309{
1310 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1311 u16 ubd_id[ATA_ID_WORDS];
1312
1313 switch (cmd) {
1314 struct cdrom_volctrl volume;
1315 case HDIO_GET_IDENTITY:
1316 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1317 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1318 ubd_id[ATA_ID_HEADS] = 128;
1319 ubd_id[ATA_ID_SECTORS] = 32;
1320 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1321 sizeof(ubd_id)))
1322 return -EFAULT;
1323 return 0;
1324
1325 case CDROMVOLREAD:
1326 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1327 return -EFAULT;
1328 volume.channel0 = 255;
1329 volume.channel1 = 255;
1330 volume.channel2 = 255;
1331 volume.channel3 = 255;
1332 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1333 return -EFAULT;
1334 return 0;
1335 }
1336 return -EINVAL;
1337}
1338
1339static int update_bitmap(struct io_thread_req *req)
1340{
1341 int n;
1342
1343 if(req->cow_offset == -1)
1344 return 0;
1345
1346 n = os_seek_file(req->fds[1], req->cow_offset);
1347 if(n < 0){
1348 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1349 return 1;
1350 }
1351
1352 n = os_write_file(req->fds[1], &req->bitmap_words,
1353 sizeof(req->bitmap_words));
1354 if(n != sizeof(req->bitmap_words)){
1355 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1356 req->fds[1]);
1357 return 1;
1358 }
1359
1360 return 0;
1361}
1362
1363static void do_io(struct io_thread_req *req)
1364{
1365 char *buf;
1366 unsigned long len;
1367 int n, nsectors, start, end, bit;
1368 int err;
1369 __u64 off;
1370
1371 nsectors = req->length / req->sectorsize;
1372 start = 0;
1373 do {
1374 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1375 end = start;
1376 while((end < nsectors) &&
1377 (ubd_test_bit(end, (unsigned char *)
1378 &req->sector_mask) == bit))
1379 end++;
1380
1381 off = req->offset + req->offsets[bit] +
1382 start * req->sectorsize;
1383 len = (end - start) * req->sectorsize;
1384 buf = &req->buffer[start * req->sectorsize];
1385
1386 err = os_seek_file(req->fds[bit], off);
1387 if(err < 0){
1388 printk("do_io - lseek failed : err = %d\n", -err);
1389 req->error = 1;
1390 return;
1391 }
1392 if(req->op == UBD_READ){
1393 n = 0;
1394 do {
1395 buf = &buf[n];
1396 len -= n;
1397 n = os_read_file(req->fds[bit], buf, len);
1398 if (n < 0) {
1399 printk("do_io - read failed, err = %d "
1400 "fd = %d\n", -n, req->fds[bit]);
1401 req->error = 1;
1402 return;
1403 }
1404 } while((n < len) && (n != 0));
1405 if (n < len) memset(&buf[n], 0, len - n);
1406 } else {
1407 n = os_write_file(req->fds[bit], buf, len);
1408 if(n != len){
1409 printk("do_io - write failed err = %d "
1410 "fd = %d\n", -n, req->fds[bit]);
1411 req->error = 1;
1412 return;
1413 }
1414 }
1415
1416 start = end;
1417 } while(start < nsectors);
1418
1419 req->error = update_bitmap(req);
1420}
1421
1422/* Changed in start_io_thread, which is serialized by being called only
1423 * from ubd_init, which is an initcall.
1424 */
1425int kernel_fd = -1;
1426
1427/* Only changed by the io thread. XXX: currently unused. */
1428static int io_count = 0;
1429
1430int io_thread(void *arg)
1431{
1432 struct io_thread_req *req;
1433 int n;
1434
1435 ignore_sigwinch_sig();
1436 while(1){
1437 n = os_read_file(kernel_fd, &req,
1438 sizeof(struct io_thread_req *));
1439 if(n != sizeof(struct io_thread_req *)){
1440 if(n < 0)
1441 printk("io_thread - read failed, fd = %d, "
1442 "err = %d\n", kernel_fd, -n);
1443 else {
1444 printk("io_thread - short read, fd = %d, "
1445 "length = %d\n", kernel_fd, n);
1446 }
1447 continue;
1448 }
1449 io_count++;
1450 do_io(req);
1451 n = os_write_file(kernel_fd, &req,
1452 sizeof(struct io_thread_req *));
1453 if(n != sizeof(struct io_thread_req *))
1454 printk("io_thread - write failed, fd = %d, err = %d\n",
1455 kernel_fd, -n);
1456 }
1457
1458 return 0;
1459}
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Cambridge Greys Ltd
4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
27#include <linux/blk-mq.h>
28#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/major.h>
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
39#include <asm/tlbflush.h>
40#include <kern_util.h>
41#include "mconsole_kern.h"
42#include <init.h>
43#include <irq_kern.h>
44#include "ubd.h"
45#include <os.h>
46#include "cow.h"
47
48/* Max request size is determined by sector mask - 32K */
49#define UBD_MAX_REQUEST (8 * sizeof(long))
50
51struct io_desc {
52 char *buffer;
53 unsigned long length;
54 unsigned long sector_mask;
55 unsigned long long cow_offset;
56 unsigned long bitmap_words[2];
57};
58
59struct io_thread_req {
60 struct request *req;
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
64 int sectorsize;
65 int error;
66
67 int desc_cnt;
68 /* io_desc has to be the last element of the struct */
69 struct io_desc io_desc[];
70};
71
72
73static struct io_thread_req * (*irq_req_buffer)[];
74static struct io_thread_req *irq_remainder;
75static int irq_remainder_size;
76
77static struct io_thread_req * (*io_req_buffer)[];
78static struct io_thread_req *io_remainder;
79static int io_remainder_size;
80
81
82
83static inline int ubd_test_bit(__u64 bit, unsigned char *data)
84{
85 __u64 n;
86 int bits, off;
87
88 bits = sizeof(data[0]) * 8;
89 n = bit / bits;
90 off = bit % bits;
91 return (data[n] & (1 << off)) != 0;
92}
93
94static inline void ubd_set_bit(__u64 bit, unsigned char *data)
95{
96 __u64 n;
97 int bits, off;
98
99 bits = sizeof(data[0]) * 8;
100 n = bit / bits;
101 off = bit % bits;
102 data[n] |= (1 << off);
103}
104/*End stuff from ubd_user.h*/
105
106#define DRIVER_NAME "uml-blkdev"
107
108static DEFINE_MUTEX(ubd_lock);
109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
110
111static int ubd_open(struct gendisk *disk, blk_mode_t mode);
112static void ubd_release(struct gendisk *disk);
113static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
114 unsigned int cmd, unsigned long arg);
115static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
116
117#define MAX_DEV (16)
118
119static const struct block_device_operations ubd_blops = {
120 .owner = THIS_MODULE,
121 .open = ubd_open,
122 .release = ubd_release,
123 .ioctl = ubd_ioctl,
124 .compat_ioctl = blkdev_compat_ptr_ioctl,
125 .getgeo = ubd_getgeo,
126};
127
128/* Protected by ubd_lock */
129static struct gendisk *ubd_gendisk[MAX_DEV];
130
131#ifdef CONFIG_BLK_DEV_UBD_SYNC
132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
133 .cl = 1 })
134#else
135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
136 .cl = 1 })
137#endif
138static struct openflags global_openflags = OPEN_FLAGS;
139
140struct cow {
141 /* backing file name */
142 char *file;
143 /* backing file fd */
144 int fd;
145 unsigned long *bitmap;
146 unsigned long bitmap_len;
147 int bitmap_offset;
148 int data_offset;
149};
150
151#define MAX_SG 64
152
153struct ubd {
154 /* name (and fd, below) of the file opened for writing, either the
155 * backing or the cow file. */
156 char *file;
157 char *serial;
158 int count;
159 int fd;
160 __u64 size;
161 struct openflags boot_openflags;
162 struct openflags openflags;
163 unsigned shared:1;
164 unsigned no_cow:1;
165 unsigned no_trim:1;
166 struct cow cow;
167 struct platform_device pdev;
168 struct request_queue *queue;
169 struct blk_mq_tag_set tag_set;
170 spinlock_t lock;
171};
172
173#define DEFAULT_COW { \
174 .file = NULL, \
175 .fd = -1, \
176 .bitmap = NULL, \
177 .bitmap_offset = 0, \
178 .data_offset = 0, \
179}
180
181#define DEFAULT_UBD { \
182 .file = NULL, \
183 .serial = NULL, \
184 .count = 0, \
185 .fd = -1, \
186 .size = -1, \
187 .boot_openflags = OPEN_FLAGS, \
188 .openflags = OPEN_FLAGS, \
189 .no_cow = 0, \
190 .no_trim = 0, \
191 .shared = 0, \
192 .cow = DEFAULT_COW, \
193 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
194}
195
196/* Protected by ubd_lock */
197static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
198
199static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
200 const struct blk_mq_queue_data *bd);
201
202static int fake_ide_setup(char *str)
203{
204 pr_warn("The fake_ide option has been removed\n");
205 return 1;
206}
207__setup("fake_ide", fake_ide_setup);
208
209__uml_help(fake_ide_setup,
210"fake_ide\n"
211" Obsolete stub.\n\n"
212);
213
214static int parse_unit(char **ptr)
215{
216 char *str = *ptr, *end;
217 int n = -1;
218
219 if(isdigit(*str)) {
220 n = simple_strtoul(str, &end, 0);
221 if(end == str)
222 return -1;
223 *ptr = end;
224 }
225 else if (('a' <= *str) && (*str <= 'z')) {
226 n = *str - 'a';
227 str++;
228 *ptr = str;
229 }
230 return n;
231}
232
233/* If *index_out == -1 at exit, the passed option was a general one;
234 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
235 * should not be freed on exit.
236 */
237static int ubd_setup_common(char *str, int *index_out, char **error_out)
238{
239 struct ubd *ubd_dev;
240 struct openflags flags = global_openflags;
241 char *file, *backing_file, *serial;
242 int n, err = 0, i;
243
244 if(index_out) *index_out = -1;
245 n = *str;
246 if(n == '='){
247 str++;
248 if(!strcmp(str, "sync")){
249 global_openflags = of_sync(global_openflags);
250 return err;
251 }
252
253 pr_warn("fake major not supported any more\n");
254 return 0;
255 }
256
257 n = parse_unit(&str);
258 if(n < 0){
259 *error_out = "Couldn't parse device number";
260 return -EINVAL;
261 }
262 if(n >= MAX_DEV){
263 *error_out = "Device number out of range";
264 return 1;
265 }
266
267 err = -EBUSY;
268 mutex_lock(&ubd_lock);
269
270 ubd_dev = &ubd_devs[n];
271 if(ubd_dev->file != NULL){
272 *error_out = "Device is already configured";
273 goto out;
274 }
275
276 if (index_out)
277 *index_out = n;
278
279 err = -EINVAL;
280 for (i = 0; i < sizeof("rscdt="); i++) {
281 switch (*str) {
282 case 'r':
283 flags.w = 0;
284 break;
285 case 's':
286 flags.s = 1;
287 break;
288 case 'd':
289 ubd_dev->no_cow = 1;
290 break;
291 case 'c':
292 ubd_dev->shared = 1;
293 break;
294 case 't':
295 ubd_dev->no_trim = 1;
296 break;
297 case '=':
298 str++;
299 goto break_loop;
300 default:
301 *error_out = "Expected '=' or flag letter "
302 "(r, s, c, t or d)";
303 goto out;
304 }
305 str++;
306 }
307
308 if (*str == '=')
309 *error_out = "Too many flags specified";
310 else
311 *error_out = "Missing '='";
312 goto out;
313
314break_loop:
315 file = strsep(&str, ",:");
316 if (*file == '\0')
317 file = NULL;
318
319 backing_file = strsep(&str, ",:");
320 if (backing_file && *backing_file == '\0')
321 backing_file = NULL;
322
323 serial = strsep(&str, ",:");
324 if (serial && *serial == '\0')
325 serial = NULL;
326
327 if (backing_file && ubd_dev->no_cow) {
328 *error_out = "Can't specify both 'd' and a cow file";
329 goto out;
330 }
331
332 err = 0;
333 ubd_dev->file = file;
334 ubd_dev->cow.file = backing_file;
335 ubd_dev->serial = serial;
336 ubd_dev->boot_openflags = flags;
337out:
338 mutex_unlock(&ubd_lock);
339 return err;
340}
341
342static int ubd_setup(char *str)
343{
344 char *error;
345 int err;
346
347 err = ubd_setup_common(str, NULL, &error);
348 if(err)
349 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
350 "%s\n", str, error);
351 return 1;
352}
353
354__setup("ubd", ubd_setup);
355__uml_help(ubd_setup,
356"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
357" This is used to associate a device with a file in the underlying\n"
358" filesystem. When specifying two filenames, the first one is the\n"
359" COW name and the second is the backing file name. As separator you can\n"
360" use either a ':' or a ',': the first one allows writing things like;\n"
361" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
362" while with a ',' the shell would not expand the 2nd '~'.\n"
363" When using only one filename, UML will detect whether to treat it like\n"
364" a COW file or a backing file. To override this detection, add the 'd'\n"
365" flag:\n"
366" ubd0d=BackingFile\n"
367" Usually, there is a filesystem in the file, but \n"
368" that's not required. Swap devices containing swap files can be\n"
369" specified like this. Also, a file which doesn't contain a\n"
370" filesystem can have its contents read in the virtual \n"
371" machine by running 'dd' on the device. <n> must be in the range\n"
372" 0 to 7. Appending an 'r' to the number will cause that device\n"
373" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
374" an 's' will cause data to be written to disk on the host immediately.\n"
375" 'c' will cause the device to be treated as being shared between multiple\n"
376" UMLs and file locking will be turned off - this is appropriate for a\n"
377" cluster filesystem and inappropriate at almost all other times.\n\n"
378" 't' will disable trim/discard support on the device (enabled by default).\n\n"
379" An optional device serial number can be exposed using the serial parameter\n"
380" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
381" useful when a unique number should be given to the device. Note when\n"
382" specifying a label, the filename2 must be also presented. It can be\n"
383" an empty string, in which case the backing file is not used:\n"
384" ubd0=File,,Serial\n"
385);
386
387static int udb_setup(char *str)
388{
389 printk("udb%s specified on command line is almost certainly a ubd -> "
390 "udb TYPO\n", str);
391 return 1;
392}
393
394__setup("udb", udb_setup);
395__uml_help(udb_setup,
396"udb\n"
397" This option is here solely to catch ubd -> udb typos, which can be\n"
398" to impossible to catch visually unless you specifically look for\n"
399" them. The only result of any option starting with 'udb' is an error\n"
400" in the boot output.\n\n"
401);
402
403/* Only changed by ubd_init, which is an initcall. */
404static int thread_fd = -1;
405
406/* Function to read several request pointers at a time
407* handling fractional reads if (and as) needed
408*/
409
410static int bulk_req_safe_read(
411 int fd,
412 struct io_thread_req * (*request_buffer)[],
413 struct io_thread_req **remainder,
414 int *remainder_size,
415 int max_recs
416 )
417{
418 int n = 0;
419 int res = 0;
420
421 if (*remainder_size > 0) {
422 memmove(
423 (char *) request_buffer,
424 (char *) remainder, *remainder_size
425 );
426 n = *remainder_size;
427 }
428
429 res = os_read_file(
430 fd,
431 ((char *) request_buffer) + *remainder_size,
432 sizeof(struct io_thread_req *)*max_recs
433 - *remainder_size
434 );
435 if (res > 0) {
436 n += res;
437 if ((n % sizeof(struct io_thread_req *)) > 0) {
438 /*
439 * Read somehow returned not a multiple of dword
440 * theoretically possible, but never observed in the
441 * wild, so read routine must be able to handle it
442 */
443 *remainder_size = n % sizeof(struct io_thread_req *);
444 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
445 memmove(
446 remainder,
447 ((char *) request_buffer) +
448 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
449 *remainder_size
450 );
451 n = n - *remainder_size;
452 }
453 } else {
454 n = res;
455 }
456 return n;
457}
458
459/* Called without dev->lock held, and only in interrupt context. */
460static void ubd_handler(void)
461{
462 int n;
463 int count;
464
465 while(1){
466 n = bulk_req_safe_read(
467 thread_fd,
468 irq_req_buffer,
469 &irq_remainder,
470 &irq_remainder_size,
471 UBD_REQ_BUFFER_SIZE
472 );
473 if (n < 0) {
474 if(n == -EAGAIN)
475 break;
476 printk(KERN_ERR "spurious interrupt in ubd_handler, "
477 "err = %d\n", -n);
478 return;
479 }
480 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
481 struct io_thread_req *io_req = (*irq_req_buffer)[count];
482
483 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
484 blk_queue_max_discard_sectors(io_req->req->q, 0);
485 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
486 }
487 blk_mq_end_request(io_req->req, io_req->error);
488 kfree(io_req);
489 }
490 }
491}
492
493static irqreturn_t ubd_intr(int irq, void *dev)
494{
495 ubd_handler();
496 return IRQ_HANDLED;
497}
498
499/* Only changed by ubd_init, which is an initcall. */
500static int io_pid = -1;
501
502static void kill_io_thread(void)
503{
504 if(io_pid != -1)
505 os_kill_process(io_pid, 1);
506}
507
508__uml_exitcall(kill_io_thread);
509
510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
511{
512 char *file;
513 int fd;
514 int err;
515
516 __u32 version;
517 __u32 align;
518 char *backing_file;
519 time64_t mtime;
520 unsigned long long size;
521 int sector_size;
522 int bitmap_offset;
523
524 if (ubd_dev->file && ubd_dev->cow.file) {
525 file = ubd_dev->cow.file;
526
527 goto out;
528 }
529
530 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
531 if (fd < 0)
532 return fd;
533
534 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
535 &mtime, &size, §or_size, &align, &bitmap_offset);
536 os_close_file(fd);
537
538 if(err == -EINVAL)
539 file = ubd_dev->file;
540 else
541 file = backing_file;
542
543out:
544 return os_file_size(file, size_out);
545}
546
547static int read_cow_bitmap(int fd, void *buf, int offset, int len)
548{
549 int err;
550
551 err = os_pread_file(fd, buf, len, offset);
552 if (err < 0)
553 return err;
554
555 return 0;
556}
557
558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
559{
560 time64_t modtime;
561 unsigned long long actual;
562 int err;
563
564 err = os_file_modtime(file, &modtime);
565 if (err < 0) {
566 printk(KERN_ERR "Failed to get modification time of backing "
567 "file \"%s\", err = %d\n", file, -err);
568 return err;
569 }
570
571 err = os_file_size(file, &actual);
572 if (err < 0) {
573 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
574 "err = %d\n", file, -err);
575 return err;
576 }
577
578 if (actual != size) {
579 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
580 * the typecast.*/
581 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
582 "vs backing file\n", (unsigned long long) size, actual);
583 return -EINVAL;
584 }
585 if (modtime != mtime) {
586 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
587 "backing file\n", mtime, modtime);
588 return -EINVAL;
589 }
590 return 0;
591}
592
593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
594{
595 struct uml_stat buf1, buf2;
596 int err;
597
598 if (from_cmdline == NULL)
599 return 0;
600 if (!strcmp(from_cmdline, from_cow))
601 return 0;
602
603 err = os_stat_file(from_cmdline, &buf1);
604 if (err < 0) {
605 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
606 -err);
607 return 0;
608 }
609 err = os_stat_file(from_cow, &buf2);
610 if (err < 0) {
611 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
612 -err);
613 return 1;
614 }
615 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
616 return 0;
617
618 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
619 "\"%s\" specified in COW header of \"%s\"\n",
620 from_cmdline, from_cow, cow);
621 return 1;
622}
623
624static int open_ubd_file(char *file, struct openflags *openflags, int shared,
625 char **backing_file_out, int *bitmap_offset_out,
626 unsigned long *bitmap_len_out, int *data_offset_out,
627 int *create_cow_out)
628{
629 time64_t mtime;
630 unsigned long long size;
631 __u32 version, align;
632 char *backing_file;
633 int fd, err, sectorsize, asked_switch, mode = 0644;
634
635 fd = os_open_file(file, *openflags, mode);
636 if (fd < 0) {
637 if ((fd == -ENOENT) && (create_cow_out != NULL))
638 *create_cow_out = 1;
639 if (!openflags->w ||
640 ((fd != -EROFS) && (fd != -EACCES)))
641 return fd;
642 openflags->w = 0;
643 fd = os_open_file(file, *openflags, mode);
644 if (fd < 0)
645 return fd;
646 }
647
648 if (shared)
649 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
650 else {
651 err = os_lock_file(fd, openflags->w);
652 if (err < 0) {
653 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
654 file, -err);
655 goto out_close;
656 }
657 }
658
659 /* Successful return case! */
660 if (backing_file_out == NULL)
661 return fd;
662
663 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
664 &size, §orsize, &align, bitmap_offset_out);
665 if (err && (*backing_file_out != NULL)) {
666 printk(KERN_ERR "Failed to read COW header from COW file "
667 "\"%s\", errno = %d\n", file, -err);
668 goto out_close;
669 }
670 if (err)
671 return fd;
672
673 asked_switch = path_requires_switch(*backing_file_out, backing_file,
674 file);
675
676 /* Allow switching only if no mismatch. */
677 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
678 mtime)) {
679 printk(KERN_ERR "Switching backing file to '%s'\n",
680 *backing_file_out);
681 err = write_cow_header(file, fd, *backing_file_out,
682 sectorsize, align, &size);
683 if (err) {
684 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
685 goto out_close;
686 }
687 } else {
688 *backing_file_out = backing_file;
689 err = backing_file_mismatch(*backing_file_out, size, mtime);
690 if (err)
691 goto out_close;
692 }
693
694 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
695 bitmap_len_out, data_offset_out);
696
697 return fd;
698 out_close:
699 os_close_file(fd);
700 return err;
701}
702
703static int create_cow_file(char *cow_file, char *backing_file,
704 struct openflags flags,
705 int sectorsize, int alignment, int *bitmap_offset_out,
706 unsigned long *bitmap_len_out, int *data_offset_out)
707{
708 int err, fd;
709
710 flags.c = 1;
711 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
712 if (fd < 0) {
713 err = fd;
714 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
715 cow_file, -err);
716 goto out;
717 }
718
719 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
720 bitmap_offset_out, bitmap_len_out,
721 data_offset_out);
722 if (!err)
723 return fd;
724 os_close_file(fd);
725 out:
726 return err;
727}
728
729static void ubd_close_dev(struct ubd *ubd_dev)
730{
731 os_close_file(ubd_dev->fd);
732 if(ubd_dev->cow.file == NULL)
733 return;
734
735 os_close_file(ubd_dev->cow.fd);
736 vfree(ubd_dev->cow.bitmap);
737 ubd_dev->cow.bitmap = NULL;
738}
739
740static int ubd_open_dev(struct ubd *ubd_dev)
741{
742 struct openflags flags;
743 char **back_ptr;
744 int err, create_cow, *create_ptr;
745 int fd;
746
747 ubd_dev->openflags = ubd_dev->boot_openflags;
748 create_cow = 0;
749 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
750 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
751
752 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
753 back_ptr, &ubd_dev->cow.bitmap_offset,
754 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
755 create_ptr);
756
757 if((fd == -ENOENT) && create_cow){
758 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
759 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
760 &ubd_dev->cow.bitmap_offset,
761 &ubd_dev->cow.bitmap_len,
762 &ubd_dev->cow.data_offset);
763 if(fd >= 0){
764 printk(KERN_INFO "Creating \"%s\" as COW file for "
765 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
766 }
767 }
768
769 if(fd < 0){
770 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
771 -fd);
772 return fd;
773 }
774 ubd_dev->fd = fd;
775
776 if(ubd_dev->cow.file != NULL){
777 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
778
779 err = -ENOMEM;
780 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
781 if(ubd_dev->cow.bitmap == NULL){
782 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
783 goto error;
784 }
785 flush_tlb_kernel_vm();
786
787 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
788 ubd_dev->cow.bitmap_offset,
789 ubd_dev->cow.bitmap_len);
790 if(err < 0)
791 goto error;
792
793 flags = ubd_dev->openflags;
794 flags.w = 0;
795 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
796 NULL, NULL, NULL, NULL);
797 if(err < 0) goto error;
798 ubd_dev->cow.fd = err;
799 }
800 if (ubd_dev->no_trim == 0) {
801 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
802 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
803 }
804 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
805 return 0;
806 error:
807 os_close_file(ubd_dev->fd);
808 return err;
809}
810
811static void ubd_device_release(struct device *dev)
812{
813 struct ubd *ubd_dev = dev_get_drvdata(dev);
814
815 blk_mq_free_tag_set(&ubd_dev->tag_set);
816 *ubd_dev = ((struct ubd) DEFAULT_UBD);
817}
818
819static ssize_t serial_show(struct device *dev,
820 struct device_attribute *attr, char *buf)
821{
822 struct gendisk *disk = dev_to_disk(dev);
823 struct ubd *ubd_dev = disk->private_data;
824
825 if (!ubd_dev)
826 return 0;
827
828 return sprintf(buf, "%s", ubd_dev->serial);
829}
830
831static DEVICE_ATTR_RO(serial);
832
833static struct attribute *ubd_attrs[] = {
834 &dev_attr_serial.attr,
835 NULL,
836};
837
838static umode_t ubd_attrs_are_visible(struct kobject *kobj,
839 struct attribute *a, int n)
840{
841 return a->mode;
842}
843
844static const struct attribute_group ubd_attr_group = {
845 .attrs = ubd_attrs,
846 .is_visible = ubd_attrs_are_visible,
847};
848
849static const struct attribute_group *ubd_attr_groups[] = {
850 &ubd_attr_group,
851 NULL,
852};
853
854static int ubd_disk_register(int major, u64 size, int unit,
855 struct gendisk *disk)
856{
857 disk->major = major;
858 disk->first_minor = unit << UBD_SHIFT;
859 disk->minors = 1 << UBD_SHIFT;
860 disk->fops = &ubd_blops;
861 set_capacity(disk, size / 512);
862 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
863
864 ubd_devs[unit].pdev.id = unit;
865 ubd_devs[unit].pdev.name = DRIVER_NAME;
866 ubd_devs[unit].pdev.dev.release = ubd_device_release;
867 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
868 platform_device_register(&ubd_devs[unit].pdev);
869
870 disk->private_data = &ubd_devs[unit];
871 disk->queue = ubd_devs[unit].queue;
872 return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
873}
874
875#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
876
877static const struct blk_mq_ops ubd_mq_ops = {
878 .queue_rq = ubd_queue_rq,
879};
880
881static int ubd_add(int n, char **error_out)
882{
883 struct ubd *ubd_dev = &ubd_devs[n];
884 struct gendisk *disk;
885 int err = 0;
886
887 if(ubd_dev->file == NULL)
888 goto out;
889
890 err = ubd_file_size(ubd_dev, &ubd_dev->size);
891 if(err < 0){
892 *error_out = "Couldn't determine size of device's file";
893 goto out;
894 }
895
896 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
897
898 ubd_dev->tag_set.ops = &ubd_mq_ops;
899 ubd_dev->tag_set.queue_depth = 64;
900 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
901 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
902 ubd_dev->tag_set.driver_data = ubd_dev;
903 ubd_dev->tag_set.nr_hw_queues = 1;
904
905 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
906 if (err)
907 goto out;
908
909 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
910 if (IS_ERR(disk)) {
911 err = PTR_ERR(disk);
912 goto out_cleanup_tags;
913 }
914 ubd_dev->queue = disk->queue;
915
916 blk_queue_write_cache(ubd_dev->queue, true, false);
917 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
918 blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
919 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
920 if (err)
921 goto out_cleanup_disk;
922
923 ubd_gendisk[n] = disk;
924 return 0;
925
926out_cleanup_disk:
927 put_disk(disk);
928out_cleanup_tags:
929 blk_mq_free_tag_set(&ubd_dev->tag_set);
930out:
931 return err;
932}
933
934static int ubd_config(char *str, char **error_out)
935{
936 int n, ret;
937
938 /* This string is possibly broken up and stored, so it's only
939 * freed if ubd_setup_common fails, or if only general options
940 * were set.
941 */
942 str = kstrdup(str, GFP_KERNEL);
943 if (str == NULL) {
944 *error_out = "Failed to allocate memory";
945 return -ENOMEM;
946 }
947
948 ret = ubd_setup_common(str, &n, error_out);
949 if (ret)
950 goto err_free;
951
952 if (n == -1) {
953 ret = 0;
954 goto err_free;
955 }
956
957 mutex_lock(&ubd_lock);
958 ret = ubd_add(n, error_out);
959 if (ret)
960 ubd_devs[n].file = NULL;
961 mutex_unlock(&ubd_lock);
962
963out:
964 return ret;
965
966err_free:
967 kfree(str);
968 goto out;
969}
970
971static int ubd_get_config(char *name, char *str, int size, char **error_out)
972{
973 struct ubd *ubd_dev;
974 int n, len = 0;
975
976 n = parse_unit(&name);
977 if((n >= MAX_DEV) || (n < 0)){
978 *error_out = "ubd_get_config : device number out of range";
979 return -1;
980 }
981
982 ubd_dev = &ubd_devs[n];
983 mutex_lock(&ubd_lock);
984
985 if(ubd_dev->file == NULL){
986 CONFIG_CHUNK(str, size, len, "", 1);
987 goto out;
988 }
989
990 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
991
992 if(ubd_dev->cow.file != NULL){
993 CONFIG_CHUNK(str, size, len, ",", 0);
994 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
995 }
996 else CONFIG_CHUNK(str, size, len, "", 1);
997
998 out:
999 mutex_unlock(&ubd_lock);
1000 return len;
1001}
1002
1003static int ubd_id(char **str, int *start_out, int *end_out)
1004{
1005 int n;
1006
1007 n = parse_unit(str);
1008 *start_out = 0;
1009 *end_out = MAX_DEV - 1;
1010 return n;
1011}
1012
1013static int ubd_remove(int n, char **error_out)
1014{
1015 struct gendisk *disk = ubd_gendisk[n];
1016 struct ubd *ubd_dev;
1017 int err = -ENODEV;
1018
1019 mutex_lock(&ubd_lock);
1020
1021 ubd_dev = &ubd_devs[n];
1022
1023 if(ubd_dev->file == NULL)
1024 goto out;
1025
1026 /* you cannot remove a open disk */
1027 err = -EBUSY;
1028 if(ubd_dev->count > 0)
1029 goto out;
1030
1031 ubd_gendisk[n] = NULL;
1032 if(disk != NULL){
1033 del_gendisk(disk);
1034 put_disk(disk);
1035 }
1036
1037 err = 0;
1038 platform_device_unregister(&ubd_dev->pdev);
1039out:
1040 mutex_unlock(&ubd_lock);
1041 return err;
1042}
1043
1044/* All these are called by mconsole in process context and without
1045 * ubd-specific locks. The structure itself is const except for .list.
1046 */
1047static struct mc_device ubd_mc = {
1048 .list = LIST_HEAD_INIT(ubd_mc.list),
1049 .name = "ubd",
1050 .config = ubd_config,
1051 .get_config = ubd_get_config,
1052 .id = ubd_id,
1053 .remove = ubd_remove,
1054};
1055
1056static int __init ubd_mc_init(void)
1057{
1058 mconsole_register_dev(&ubd_mc);
1059 return 0;
1060}
1061
1062__initcall(ubd_mc_init);
1063
1064static int __init ubd0_init(void)
1065{
1066 struct ubd *ubd_dev = &ubd_devs[0];
1067
1068 mutex_lock(&ubd_lock);
1069 if(ubd_dev->file == NULL)
1070 ubd_dev->file = "root_fs";
1071 mutex_unlock(&ubd_lock);
1072
1073 return 0;
1074}
1075
1076__initcall(ubd0_init);
1077
1078/* Used in ubd_init, which is an initcall */
1079static struct platform_driver ubd_driver = {
1080 .driver = {
1081 .name = DRIVER_NAME,
1082 },
1083};
1084
1085static int __init ubd_init(void)
1086{
1087 char *error;
1088 int i, err;
1089
1090 if (register_blkdev(UBD_MAJOR, "ubd"))
1091 return -1;
1092
1093 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1094 sizeof(struct io_thread_req *),
1095 GFP_KERNEL
1096 );
1097 irq_remainder = 0;
1098
1099 if (irq_req_buffer == NULL) {
1100 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1101 return -1;
1102 }
1103 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1104 sizeof(struct io_thread_req *),
1105 GFP_KERNEL
1106 );
1107
1108 io_remainder = 0;
1109
1110 if (io_req_buffer == NULL) {
1111 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1112 return -1;
1113 }
1114 platform_driver_register(&ubd_driver);
1115 mutex_lock(&ubd_lock);
1116 for (i = 0; i < MAX_DEV; i++){
1117 err = ubd_add(i, &error);
1118 if(err)
1119 printk(KERN_ERR "Failed to initialize ubd device %d :"
1120 "%s\n", i, error);
1121 }
1122 mutex_unlock(&ubd_lock);
1123 return 0;
1124}
1125
1126late_initcall(ubd_init);
1127
1128static int __init ubd_driver_init(void){
1129 unsigned long stack;
1130 int err;
1131
1132 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1133 if(global_openflags.s){
1134 printk(KERN_INFO "ubd: Synchronous mode\n");
1135 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1136 * enough. So use anyway the io thread. */
1137 }
1138 stack = alloc_stack(0, 0);
1139 io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
1140 if(io_pid < 0){
1141 printk(KERN_ERR
1142 "ubd : Failed to start I/O thread (errno = %d) - "
1143 "falling back to synchronous I/O\n", -io_pid);
1144 io_pid = -1;
1145 return 0;
1146 }
1147 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1148 0, "ubd", ubd_devs);
1149 if(err < 0)
1150 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1151 return 0;
1152}
1153
1154device_initcall(ubd_driver_init);
1155
1156static int ubd_open(struct gendisk *disk, blk_mode_t mode)
1157{
1158 struct ubd *ubd_dev = disk->private_data;
1159 int err = 0;
1160
1161 mutex_lock(&ubd_mutex);
1162 if(ubd_dev->count == 0){
1163 err = ubd_open_dev(ubd_dev);
1164 if(err){
1165 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1166 disk->disk_name, ubd_dev->file, -err);
1167 goto out;
1168 }
1169 }
1170 ubd_dev->count++;
1171 set_disk_ro(disk, !ubd_dev->openflags.w);
1172out:
1173 mutex_unlock(&ubd_mutex);
1174 return err;
1175}
1176
1177static void ubd_release(struct gendisk *disk)
1178{
1179 struct ubd *ubd_dev = disk->private_data;
1180
1181 mutex_lock(&ubd_mutex);
1182 if(--ubd_dev->count == 0)
1183 ubd_close_dev(ubd_dev);
1184 mutex_unlock(&ubd_mutex);
1185}
1186
1187static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1188 __u64 *cow_offset, unsigned long *bitmap,
1189 __u64 bitmap_offset, unsigned long *bitmap_words,
1190 __u64 bitmap_len)
1191{
1192 __u64 sector = io_offset >> SECTOR_SHIFT;
1193 int i, update_bitmap = 0;
1194
1195 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1196 if(cow_mask != NULL)
1197 ubd_set_bit(i, (unsigned char *) cow_mask);
1198 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1199 continue;
1200
1201 update_bitmap = 1;
1202 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1203 }
1204
1205 if(!update_bitmap)
1206 return;
1207
1208 *cow_offset = sector / (sizeof(unsigned long) * 8);
1209
1210 /* This takes care of the case where we're exactly at the end of the
1211 * device, and *cow_offset + 1 is off the end. So, just back it up
1212 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1213 * for the original diagnosis.
1214 */
1215 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1216 sizeof(unsigned long)) - 1))
1217 (*cow_offset)--;
1218
1219 bitmap_words[0] = bitmap[*cow_offset];
1220 bitmap_words[1] = bitmap[*cow_offset + 1];
1221
1222 *cow_offset *= sizeof(unsigned long);
1223 *cow_offset += bitmap_offset;
1224}
1225
1226static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1227 unsigned long offset, unsigned long *bitmap,
1228 __u64 bitmap_offset, __u64 bitmap_len)
1229{
1230 __u64 sector = offset >> SECTOR_SHIFT;
1231 int i;
1232
1233 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1234 panic("Operation too long");
1235
1236 if (req_op(req->req) == REQ_OP_READ) {
1237 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1238 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1239 ubd_set_bit(i, (unsigned char *)
1240 &segment->sector_mask);
1241 }
1242 } else {
1243 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1244 &segment->cow_offset, bitmap, bitmap_offset,
1245 segment->bitmap_words, bitmap_len);
1246 }
1247}
1248
1249static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1250 struct request *req)
1251{
1252 struct bio_vec bvec;
1253 struct req_iterator iter;
1254 int i = 0;
1255 unsigned long byte_offset = io_req->offset;
1256 enum req_op op = req_op(req);
1257
1258 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1259 io_req->io_desc[0].buffer = NULL;
1260 io_req->io_desc[0].length = blk_rq_bytes(req);
1261 } else {
1262 rq_for_each_segment(bvec, req, iter) {
1263 BUG_ON(i >= io_req->desc_cnt);
1264
1265 io_req->io_desc[i].buffer = bvec_virt(&bvec);
1266 io_req->io_desc[i].length = bvec.bv_len;
1267 i++;
1268 }
1269 }
1270
1271 if (dev->cow.file) {
1272 for (i = 0; i < io_req->desc_cnt; i++) {
1273 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1274 dev->cow.bitmap, dev->cow.bitmap_offset,
1275 dev->cow.bitmap_len);
1276 byte_offset += io_req->io_desc[i].length;
1277 }
1278
1279 }
1280}
1281
1282static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1283 int desc_cnt)
1284{
1285 struct io_thread_req *io_req;
1286 int i;
1287
1288 io_req = kmalloc(sizeof(*io_req) +
1289 (desc_cnt * sizeof(struct io_desc)),
1290 GFP_ATOMIC);
1291 if (!io_req)
1292 return NULL;
1293
1294 io_req->req = req;
1295 if (dev->cow.file)
1296 io_req->fds[0] = dev->cow.fd;
1297 else
1298 io_req->fds[0] = dev->fd;
1299 io_req->error = 0;
1300 io_req->sectorsize = SECTOR_SIZE;
1301 io_req->fds[1] = dev->fd;
1302 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1303 io_req->offsets[0] = 0;
1304 io_req->offsets[1] = dev->cow.data_offset;
1305
1306 for (i = 0 ; i < desc_cnt; i++) {
1307 io_req->io_desc[i].sector_mask = 0;
1308 io_req->io_desc[i].cow_offset = -1;
1309 }
1310
1311 return io_req;
1312}
1313
1314static int ubd_submit_request(struct ubd *dev, struct request *req)
1315{
1316 int segs = 0;
1317 struct io_thread_req *io_req;
1318 int ret;
1319 enum req_op op = req_op(req);
1320
1321 if (op == REQ_OP_FLUSH)
1322 segs = 0;
1323 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1324 segs = 1;
1325 else
1326 segs = blk_rq_nr_phys_segments(req);
1327
1328 io_req = ubd_alloc_req(dev, req, segs);
1329 if (!io_req)
1330 return -ENOMEM;
1331
1332 io_req->desc_cnt = segs;
1333 if (segs)
1334 ubd_map_req(dev, io_req, req);
1335
1336 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1337 if (ret != sizeof(io_req)) {
1338 if (ret != -EAGAIN)
1339 pr_err("write to io thread failed: %d\n", -ret);
1340 kfree(io_req);
1341 }
1342 return ret;
1343}
1344
1345static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1346 const struct blk_mq_queue_data *bd)
1347{
1348 struct ubd *ubd_dev = hctx->queue->queuedata;
1349 struct request *req = bd->rq;
1350 int ret = 0, res = BLK_STS_OK;
1351
1352 blk_mq_start_request(req);
1353
1354 spin_lock_irq(&ubd_dev->lock);
1355
1356 switch (req_op(req)) {
1357 case REQ_OP_FLUSH:
1358 case REQ_OP_READ:
1359 case REQ_OP_WRITE:
1360 case REQ_OP_DISCARD:
1361 case REQ_OP_WRITE_ZEROES:
1362 ret = ubd_submit_request(ubd_dev, req);
1363 break;
1364 default:
1365 WARN_ON_ONCE(1);
1366 res = BLK_STS_NOTSUPP;
1367 }
1368
1369 spin_unlock_irq(&ubd_dev->lock);
1370
1371 if (ret < 0) {
1372 if (ret == -ENOMEM)
1373 res = BLK_STS_RESOURCE;
1374 else
1375 res = BLK_STS_DEV_RESOURCE;
1376 }
1377
1378 return res;
1379}
1380
1381static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1382{
1383 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1384
1385 geo->heads = 128;
1386 geo->sectors = 32;
1387 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1388 return 0;
1389}
1390
1391static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1392 unsigned int cmd, unsigned long arg)
1393{
1394 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1395 u16 ubd_id[ATA_ID_WORDS];
1396
1397 switch (cmd) {
1398 struct cdrom_volctrl volume;
1399 case HDIO_GET_IDENTITY:
1400 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1401 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1402 ubd_id[ATA_ID_HEADS] = 128;
1403 ubd_id[ATA_ID_SECTORS] = 32;
1404 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1405 sizeof(ubd_id)))
1406 return -EFAULT;
1407 return 0;
1408
1409 case CDROMVOLREAD:
1410 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1411 return -EFAULT;
1412 volume.channel0 = 255;
1413 volume.channel1 = 255;
1414 volume.channel2 = 255;
1415 volume.channel3 = 255;
1416 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1417 return -EFAULT;
1418 return 0;
1419 }
1420 return -EINVAL;
1421}
1422
1423static int map_error(int error_code)
1424{
1425 switch (error_code) {
1426 case 0:
1427 return BLK_STS_OK;
1428 case ENOSYS:
1429 case EOPNOTSUPP:
1430 return BLK_STS_NOTSUPP;
1431 case ENOSPC:
1432 return BLK_STS_NOSPC;
1433 }
1434 return BLK_STS_IOERR;
1435}
1436
1437/*
1438 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1439 *
1440 * The following functions are part of UML hypervisor code.
1441 * All functions from here onwards are executed as a helper
1442 * thread and are not allowed to execute any kernel functions.
1443 *
1444 * Any communication must occur strictly via shared memory and IPC.
1445 *
1446 * Do not add printks, locks, kernel memory operations, etc - it
1447 * will result in unpredictable behaviour and/or crashes.
1448 */
1449
1450static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1451{
1452 int n;
1453
1454 if (segment->cow_offset == -1)
1455 return map_error(0);
1456
1457 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1458 sizeof(segment->bitmap_words), segment->cow_offset);
1459 if (n != sizeof(segment->bitmap_words))
1460 return map_error(-n);
1461
1462 return map_error(0);
1463}
1464
1465static void do_io(struct io_thread_req *req, struct io_desc *desc)
1466{
1467 char *buf = NULL;
1468 unsigned long len;
1469 int n, nsectors, start, end, bit;
1470 __u64 off;
1471
1472 /* FLUSH is really a special case, we cannot "case" it with others */
1473
1474 if (req_op(req->req) == REQ_OP_FLUSH) {
1475 /* fds[0] is always either the rw image or our cow file */
1476 req->error = map_error(-os_sync_file(req->fds[0]));
1477 return;
1478 }
1479
1480 nsectors = desc->length / req->sectorsize;
1481 start = 0;
1482 do {
1483 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1484 end = start;
1485 while((end < nsectors) &&
1486 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1487 end++;
1488
1489 off = req->offset + req->offsets[bit] +
1490 start * req->sectorsize;
1491 len = (end - start) * req->sectorsize;
1492 if (desc->buffer != NULL)
1493 buf = &desc->buffer[start * req->sectorsize];
1494
1495 switch (req_op(req->req)) {
1496 case REQ_OP_READ:
1497 n = 0;
1498 do {
1499 buf = &buf[n];
1500 len -= n;
1501 n = os_pread_file(req->fds[bit], buf, len, off);
1502 if (n < 0) {
1503 req->error = map_error(-n);
1504 return;
1505 }
1506 } while((n < len) && (n != 0));
1507 if (n < len) memset(&buf[n], 0, len - n);
1508 break;
1509 case REQ_OP_WRITE:
1510 n = os_pwrite_file(req->fds[bit], buf, len, off);
1511 if(n != len){
1512 req->error = map_error(-n);
1513 return;
1514 }
1515 break;
1516 case REQ_OP_DISCARD:
1517 n = os_falloc_punch(req->fds[bit], off, len);
1518 if (n) {
1519 req->error = map_error(-n);
1520 return;
1521 }
1522 break;
1523 case REQ_OP_WRITE_ZEROES:
1524 n = os_falloc_zeroes(req->fds[bit], off, len);
1525 if (n) {
1526 req->error = map_error(-n);
1527 return;
1528 }
1529 break;
1530 default:
1531 WARN_ON_ONCE(1);
1532 req->error = BLK_STS_NOTSUPP;
1533 return;
1534 }
1535
1536 start = end;
1537 } while(start < nsectors);
1538
1539 req->offset += len;
1540 req->error = update_bitmap(req, desc);
1541}
1542
1543/* Changed in start_io_thread, which is serialized by being called only
1544 * from ubd_init, which is an initcall.
1545 */
1546int kernel_fd = -1;
1547
1548/* Only changed by the io thread. XXX: currently unused. */
1549static int io_count;
1550
1551int io_thread(void *arg)
1552{
1553 int n, count, written, res;
1554
1555 os_fix_helper_signals();
1556
1557 while(1){
1558 n = bulk_req_safe_read(
1559 kernel_fd,
1560 io_req_buffer,
1561 &io_remainder,
1562 &io_remainder_size,
1563 UBD_REQ_BUFFER_SIZE
1564 );
1565 if (n <= 0) {
1566 if (n == -EAGAIN)
1567 ubd_read_poll(-1);
1568
1569 continue;
1570 }
1571
1572 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1573 struct io_thread_req *req = (*io_req_buffer)[count];
1574 int i;
1575
1576 io_count++;
1577 for (i = 0; !req->error && i < req->desc_cnt; i++)
1578 do_io(req, &(req->io_desc[i]));
1579
1580 }
1581
1582 written = 0;
1583
1584 do {
1585 res = os_write_file(kernel_fd,
1586 ((char *) io_req_buffer) + written,
1587 n - written);
1588 if (res >= 0) {
1589 written += res;
1590 }
1591 if (written < n) {
1592 ubd_write_poll(-1);
1593 }
1594 } while (written < n);
1595 }
1596
1597 return 0;
1598}