Loading...
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define UBD_SHIFT 4
21
22#include <linux/module.h>
23#include <linux/init.h>
24#include <linux/blkdev.h>
25#include <linux/ata.h>
26#include <linux/hdreg.h>
27#include <linux/cdrom.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h>
30#include <linux/ctype.h>
31#include <linux/slab.h>
32#include <linux/vmalloc.h>
33#include <linux/platform_device.h>
34#include <linux/scatterlist.h>
35#include <asm/tlbflush.h>
36#include "kern_util.h"
37#include "mconsole_kern.h"
38#include "init.h"
39#include "irq_kern.h"
40#include "ubd.h"
41#include "os.h"
42#include "cow.h"
43
44enum ubd_req { UBD_READ, UBD_WRITE };
45
46struct io_thread_req {
47 struct request *req;
48 enum ubd_req op;
49 int fds[2];
50 unsigned long offsets[2];
51 unsigned long long offset;
52 unsigned long length;
53 char *buffer;
54 int sectorsize;
55 unsigned long sector_mask;
56 unsigned long long cow_offset;
57 unsigned long bitmap_words[2];
58 int error;
59};
60
61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
62{
63 __u64 n;
64 int bits, off;
65
66 bits = sizeof(data[0]) * 8;
67 n = bit / bits;
68 off = bit % bits;
69 return (data[n] & (1 << off)) != 0;
70}
71
72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
73{
74 __u64 n;
75 int bits, off;
76
77 bits = sizeof(data[0]) * 8;
78 n = bit / bits;
79 off = bit % bits;
80 data[n] |= (1 << off);
81}
82/*End stuff from ubd_user.h*/
83
84#define DRIVER_NAME "uml-blkdev"
85
86static DEFINE_MUTEX(ubd_lock);
87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
88
89static int ubd_open(struct block_device *bdev, fmode_t mode);
90static int ubd_release(struct gendisk *disk, fmode_t mode);
91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
92 unsigned int cmd, unsigned long arg);
93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
94
95#define MAX_DEV (16)
96
97static const struct block_device_operations ubd_blops = {
98 .owner = THIS_MODULE,
99 .open = ubd_open,
100 .release = ubd_release,
101 .ioctl = ubd_ioctl,
102 .getgeo = ubd_getgeo,
103};
104
105/* Protected by ubd_lock */
106static int fake_major = UBD_MAJOR;
107static struct gendisk *ubd_gendisk[MAX_DEV];
108static struct gendisk *fake_gendisk[MAX_DEV];
109
110#ifdef CONFIG_BLK_DEV_UBD_SYNC
111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
112 .cl = 1 })
113#else
114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
115 .cl = 1 })
116#endif
117static struct openflags global_openflags = OPEN_FLAGS;
118
119struct cow {
120 /* backing file name */
121 char *file;
122 /* backing file fd */
123 int fd;
124 unsigned long *bitmap;
125 unsigned long bitmap_len;
126 int bitmap_offset;
127 int data_offset;
128};
129
130#define MAX_SG 64
131
132struct ubd {
133 struct list_head restart;
134 /* name (and fd, below) of the file opened for writing, either the
135 * backing or the cow file. */
136 char *file;
137 int count;
138 int fd;
139 __u64 size;
140 struct openflags boot_openflags;
141 struct openflags openflags;
142 unsigned shared:1;
143 unsigned no_cow:1;
144 struct cow cow;
145 struct platform_device pdev;
146 struct request_queue *queue;
147 spinlock_t lock;
148 struct scatterlist sg[MAX_SG];
149 struct request *request;
150 int start_sg, end_sg;
151 sector_t rq_pos;
152};
153
154#define DEFAULT_COW { \
155 .file = NULL, \
156 .fd = -1, \
157 .bitmap = NULL, \
158 .bitmap_offset = 0, \
159 .data_offset = 0, \
160}
161
162#define DEFAULT_UBD { \
163 .file = NULL, \
164 .count = 0, \
165 .fd = -1, \
166 .size = -1, \
167 .boot_openflags = OPEN_FLAGS, \
168 .openflags = OPEN_FLAGS, \
169 .no_cow = 0, \
170 .shared = 0, \
171 .cow = DEFAULT_COW, \
172 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
173 .request = NULL, \
174 .start_sg = 0, \
175 .end_sg = 0, \
176 .rq_pos = 0, \
177}
178
179/* Protected by ubd_lock */
180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
181
182/* Only changed by fake_ide_setup which is a setup */
183static int fake_ide = 0;
184static struct proc_dir_entry *proc_ide_root = NULL;
185static struct proc_dir_entry *proc_ide = NULL;
186
187static void make_proc_ide(void)
188{
189 proc_ide_root = proc_mkdir("ide", NULL);
190 proc_ide = proc_mkdir("ide0", proc_ide_root);
191}
192
193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
194{
195 seq_puts(m, "disk\n");
196 return 0;
197}
198
199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, fake_ide_media_proc_show, NULL);
202}
203
204static const struct file_operations fake_ide_media_proc_fops = {
205 .owner = THIS_MODULE,
206 .open = fake_ide_media_proc_open,
207 .read = seq_read,
208 .llseek = seq_lseek,
209 .release = single_release,
210};
211
212static void make_ide_entries(const char *dev_name)
213{
214 struct proc_dir_entry *dir, *ent;
215 char name[64];
216
217 if(proc_ide_root == NULL) make_proc_ide();
218
219 dir = proc_mkdir(dev_name, proc_ide);
220 if(!dir) return;
221
222 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
223 if(!ent) return;
224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
231 return 1;
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
249 return -1;
250 *ptr = end;
251 }
252 else if (('a' <= *str) && (*str <= 'z')) {
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
257 return n;
258}
259
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
264static int ubd_setup_common(char *str, int *index_out, char **error_out)
265{
266 struct ubd *ubd_dev;
267 struct openflags flags = global_openflags;
268 char *backing_file;
269 int n, err = 0, i;
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
280 goto out1;
281 }
282
283 err = -EINVAL;
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
286 *error_out = "Didn't parse major number";
287 goto out1;
288 }
289
290 mutex_lock(&ubd_lock);
291 if (fake_major != UBD_MAJOR) {
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
295
296 fake_major = major;
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
310 }
311 if(n >= MAX_DEV){
312 *error_out = "Device number out of range";
313 return 1;
314 }
315
316 err = -EBUSY;
317 mutex_lock(&ubd_lock);
318
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
321 *error_out = "Device is already configured";
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
328 err = -EINVAL;
329 for (i = 0; i < sizeof("rscd="); i++) {
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
338 ubd_dev->no_cow = 1;
339 break;
340 case 'c':
341 ubd_dev->shared = 1;
342 break;
343 case '=':
344 str++;
345 goto break_loop;
346 default:
347 *error_out = "Expected '=' or flag letter "
348 "(r, s, c, or d)";
349 goto out;
350 }
351 str++;
352 }
353
354 if (*str == '=')
355 *error_out = "Too many flags specified";
356 else
357 *error_out = "Missing '='";
358 goto out;
359
360break_loop:
361 backing_file = strchr(str, ',');
362
363 if (backing_file == NULL)
364 backing_file = strchr(str, ':');
365
366 if(backing_file != NULL){
367 if(ubd_dev->no_cow){
368 *error_out = "Can't specify both 'd' and a cow file";
369 goto out;
370 }
371 else {
372 *backing_file = '\0';
373 backing_file++;
374 }
375 }
376 err = 0;
377 ubd_dev->file = str;
378 ubd_dev->cow.file = backing_file;
379 ubd_dev->boot_openflags = flags;
380out:
381 mutex_unlock(&ubd_lock);
382 return err;
383}
384
385static int ubd_setup(char *str)
386{
387 char *error;
388 int err;
389
390 err = ubd_setup_common(str, NULL, &error);
391 if(err)
392 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393 "%s\n", str, error);
394 return 1;
395}
396
397__setup("ubd", ubd_setup);
398__uml_help(ubd_setup,
399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400" This is used to associate a device with a file in the underlying\n"
401" filesystem. When specifying two filenames, the first one is the\n"
402" COW name and the second is the backing file name. As separator you can\n"
403" use either a ':' or a ',': the first one allows writing things like;\n"
404" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405" while with a ',' the shell would not expand the 2nd '~'.\n"
406" When using only one filename, UML will detect whether to treat it like\n"
407" a COW file or a backing file. To override this detection, add the 'd'\n"
408" flag:\n"
409" ubd0d=BackingFile\n"
410" Usually, there is a filesystem in the file, but \n"
411" that's not required. Swap devices containing swap files can be\n"
412" specified like this. Also, a file which doesn't contain a\n"
413" filesystem can have its contents read in the virtual \n"
414" machine by running 'dd' on the device. <n> must be in the range\n"
415" 0 to 7. Appending an 'r' to the number will cause that device\n"
416" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
417" an 's' will cause data to be written to disk on the host immediately.\n"
418" 'c' will cause the device to be treated as being shared between multiple\n"
419" UMLs and file locking will be turned off - this is appropriate for a\n"
420" cluster filesystem and inappropriate at almost all other times.\n\n"
421);
422
423static int udb_setup(char *str)
424{
425 printk("udb%s specified on command line is almost certainly a ubd -> "
426 "udb TYPO\n", str);
427 return 1;
428}
429
430__setup("udb", udb_setup);
431__uml_help(udb_setup,
432"udb\n"
433" This option is here solely to catch ubd -> udb typos, which can be\n"
434" to impossible to catch visually unless you specifically look for\n"
435" them. The only result of any option starting with 'udb' is an error\n"
436" in the boot output.\n\n"
437);
438
439static void do_ubd_request(struct request_queue * q);
440
441/* Only changed by ubd_init, which is an initcall. */
442static int thread_fd = -1;
443static LIST_HEAD(restart);
444
445/* XXX - move this inside ubd_intr. */
446/* Called without dev->lock held, and only in interrupt context. */
447static void ubd_handler(void)
448{
449 struct io_thread_req *req;
450 struct ubd *ubd;
451 struct list_head *list, *next_ele;
452 unsigned long flags;
453 int n;
454
455 while(1){
456 n = os_read_file(thread_fd, &req,
457 sizeof(struct io_thread_req *));
458 if(n != sizeof(req)){
459 if(n == -EAGAIN)
460 break;
461 printk(KERN_ERR "spurious interrupt in ubd_handler, "
462 "err = %d\n", -n);
463 return;
464 }
465
466 blk_end_request(req->req, 0, req->length);
467 kfree(req);
468 }
469 reactivate_fd(thread_fd, UBD_IRQ);
470
471 list_for_each_safe(list, next_ele, &restart){
472 ubd = container_of(list, struct ubd, restart);
473 list_del_init(&ubd->restart);
474 spin_lock_irqsave(&ubd->lock, flags);
475 do_ubd_request(ubd->queue);
476 spin_unlock_irqrestore(&ubd->lock, flags);
477 }
478}
479
480static irqreturn_t ubd_intr(int irq, void *dev)
481{
482 ubd_handler();
483 return IRQ_HANDLED;
484}
485
486/* Only changed by ubd_init, which is an initcall. */
487static int io_pid = -1;
488
489static void kill_io_thread(void)
490{
491 if(io_pid != -1)
492 os_kill_process(io_pid, 1);
493}
494
495__uml_exitcall(kill_io_thread);
496
497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
498{
499 char *file;
500 int fd;
501 int err;
502
503 __u32 version;
504 __u32 align;
505 char *backing_file;
506 time_t mtime;
507 unsigned long long size;
508 int sector_size;
509 int bitmap_offset;
510
511 if (ubd_dev->file && ubd_dev->cow.file) {
512 file = ubd_dev->cow.file;
513
514 goto out;
515 }
516
517 fd = os_open_file(ubd_dev->file, global_openflags, 0);
518 if (fd < 0)
519 return fd;
520
521 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
522 &mtime, &size, §or_size, &align, &bitmap_offset);
523 os_close_file(fd);
524
525 if(err == -EINVAL)
526 file = ubd_dev->file;
527 else
528 file = backing_file;
529
530out:
531 return os_file_size(file, size_out);
532}
533
534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
535{
536 int err;
537
538 err = os_seek_file(fd, offset);
539 if (err < 0)
540 return err;
541
542 err = os_read_file(fd, buf, len);
543 if (err < 0)
544 return err;
545
546 return 0;
547}
548
549static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
550{
551 unsigned long modtime;
552 unsigned long long actual;
553 int err;
554
555 err = os_file_modtime(file, &modtime);
556 if (err < 0) {
557 printk(KERN_ERR "Failed to get modification time of backing "
558 "file \"%s\", err = %d\n", file, -err);
559 return err;
560 }
561
562 err = os_file_size(file, &actual);
563 if (err < 0) {
564 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
565 "err = %d\n", file, -err);
566 return err;
567 }
568
569 if (actual != size) {
570 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
571 * the typecast.*/
572 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
573 "vs backing file\n", (unsigned long long) size, actual);
574 return -EINVAL;
575 }
576 if (modtime != mtime) {
577 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
578 "backing file\n", mtime, modtime);
579 return -EINVAL;
580 }
581 return 0;
582}
583
584static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
585{
586 struct uml_stat buf1, buf2;
587 int err;
588
589 if (from_cmdline == NULL)
590 return 0;
591 if (!strcmp(from_cmdline, from_cow))
592 return 0;
593
594 err = os_stat_file(from_cmdline, &buf1);
595 if (err < 0) {
596 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
597 -err);
598 return 0;
599 }
600 err = os_stat_file(from_cow, &buf2);
601 if (err < 0) {
602 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
603 -err);
604 return 1;
605 }
606 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
607 return 0;
608
609 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
610 "\"%s\" specified in COW header of \"%s\"\n",
611 from_cmdline, from_cow, cow);
612 return 1;
613}
614
615static int open_ubd_file(char *file, struct openflags *openflags, int shared,
616 char **backing_file_out, int *bitmap_offset_out,
617 unsigned long *bitmap_len_out, int *data_offset_out,
618 int *create_cow_out)
619{
620 time_t mtime;
621 unsigned long long size;
622 __u32 version, align;
623 char *backing_file;
624 int fd, err, sectorsize, asked_switch, mode = 0644;
625
626 fd = os_open_file(file, *openflags, mode);
627 if (fd < 0) {
628 if ((fd == -ENOENT) && (create_cow_out != NULL))
629 *create_cow_out = 1;
630 if (!openflags->w ||
631 ((fd != -EROFS) && (fd != -EACCES)))
632 return fd;
633 openflags->w = 0;
634 fd = os_open_file(file, *openflags, mode);
635 if (fd < 0)
636 return fd;
637 }
638
639 if (shared)
640 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
641 else {
642 err = os_lock_file(fd, openflags->w);
643 if (err < 0) {
644 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
645 file, -err);
646 goto out_close;
647 }
648 }
649
650 /* Successful return case! */
651 if (backing_file_out == NULL)
652 return fd;
653
654 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
655 &size, §orsize, &align, bitmap_offset_out);
656 if (err && (*backing_file_out != NULL)) {
657 printk(KERN_ERR "Failed to read COW header from COW file "
658 "\"%s\", errno = %d\n", file, -err);
659 goto out_close;
660 }
661 if (err)
662 return fd;
663
664 asked_switch = path_requires_switch(*backing_file_out, backing_file,
665 file);
666
667 /* Allow switching only if no mismatch. */
668 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
669 mtime)) {
670 printk(KERN_ERR "Switching backing file to '%s'\n",
671 *backing_file_out);
672 err = write_cow_header(file, fd, *backing_file_out,
673 sectorsize, align, &size);
674 if (err) {
675 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
676 goto out_close;
677 }
678 } else {
679 *backing_file_out = backing_file;
680 err = backing_file_mismatch(*backing_file_out, size, mtime);
681 if (err)
682 goto out_close;
683 }
684
685 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
686 bitmap_len_out, data_offset_out);
687
688 return fd;
689 out_close:
690 os_close_file(fd);
691 return err;
692}
693
694static int create_cow_file(char *cow_file, char *backing_file,
695 struct openflags flags,
696 int sectorsize, int alignment, int *bitmap_offset_out,
697 unsigned long *bitmap_len_out, int *data_offset_out)
698{
699 int err, fd;
700
701 flags.c = 1;
702 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
703 if (fd < 0) {
704 err = fd;
705 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
706 cow_file, -err);
707 goto out;
708 }
709
710 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
711 bitmap_offset_out, bitmap_len_out,
712 data_offset_out);
713 if (!err)
714 return fd;
715 os_close_file(fd);
716 out:
717 return err;
718}
719
720static void ubd_close_dev(struct ubd *ubd_dev)
721{
722 os_close_file(ubd_dev->fd);
723 if(ubd_dev->cow.file == NULL)
724 return;
725
726 os_close_file(ubd_dev->cow.fd);
727 vfree(ubd_dev->cow.bitmap);
728 ubd_dev->cow.bitmap = NULL;
729}
730
731static int ubd_open_dev(struct ubd *ubd_dev)
732{
733 struct openflags flags;
734 char **back_ptr;
735 int err, create_cow, *create_ptr;
736 int fd;
737
738 ubd_dev->openflags = ubd_dev->boot_openflags;
739 create_cow = 0;
740 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
741 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
742
743 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
744 back_ptr, &ubd_dev->cow.bitmap_offset,
745 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
746 create_ptr);
747
748 if((fd == -ENOENT) && create_cow){
749 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
750 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
751 &ubd_dev->cow.bitmap_offset,
752 &ubd_dev->cow.bitmap_len,
753 &ubd_dev->cow.data_offset);
754 if(fd >= 0){
755 printk(KERN_INFO "Creating \"%s\" as COW file for "
756 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
757 }
758 }
759
760 if(fd < 0){
761 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
762 -fd);
763 return fd;
764 }
765 ubd_dev->fd = fd;
766
767 if(ubd_dev->cow.file != NULL){
768 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
769
770 err = -ENOMEM;
771 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
772 if(ubd_dev->cow.bitmap == NULL){
773 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
774 goto error;
775 }
776 flush_tlb_kernel_vm();
777
778 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
779 ubd_dev->cow.bitmap_offset,
780 ubd_dev->cow.bitmap_len);
781 if(err < 0)
782 goto error;
783
784 flags = ubd_dev->openflags;
785 flags.w = 0;
786 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
787 NULL, NULL, NULL, NULL);
788 if(err < 0) goto error;
789 ubd_dev->cow.fd = err;
790 }
791 return 0;
792 error:
793 os_close_file(ubd_dev->fd);
794 return err;
795}
796
797static void ubd_device_release(struct device *dev)
798{
799 struct ubd *ubd_dev = dev_get_drvdata(dev);
800
801 blk_cleanup_queue(ubd_dev->queue);
802 *ubd_dev = ((struct ubd) DEFAULT_UBD);
803}
804
805static int ubd_disk_register(int major, u64 size, int unit,
806 struct gendisk **disk_out)
807{
808 struct gendisk *disk;
809
810 disk = alloc_disk(1 << UBD_SHIFT);
811 if(disk == NULL)
812 return -ENOMEM;
813
814 disk->major = major;
815 disk->first_minor = unit << UBD_SHIFT;
816 disk->fops = &ubd_blops;
817 set_capacity(disk, size / 512);
818 if (major == UBD_MAJOR)
819 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
820 else
821 sprintf(disk->disk_name, "ubd_fake%d", unit);
822
823 /* sysfs register (not for ide fake devices) */
824 if (major == UBD_MAJOR) {
825 ubd_devs[unit].pdev.id = unit;
826 ubd_devs[unit].pdev.name = DRIVER_NAME;
827 ubd_devs[unit].pdev.dev.release = ubd_device_release;
828 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
829 platform_device_register(&ubd_devs[unit].pdev);
830 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
831 }
832
833 disk->private_data = &ubd_devs[unit];
834 disk->queue = ubd_devs[unit].queue;
835 add_disk(disk);
836
837 *disk_out = disk;
838 return 0;
839}
840
841#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
842
843static int ubd_add(int n, char **error_out)
844{
845 struct ubd *ubd_dev = &ubd_devs[n];
846 int err = 0;
847
848 if(ubd_dev->file == NULL)
849 goto out;
850
851 err = ubd_file_size(ubd_dev, &ubd_dev->size);
852 if(err < 0){
853 *error_out = "Couldn't determine size of device's file";
854 goto out;
855 }
856
857 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
858
859 INIT_LIST_HEAD(&ubd_dev->restart);
860 sg_init_table(ubd_dev->sg, MAX_SG);
861
862 err = -ENOMEM;
863 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
864 if (ubd_dev->queue == NULL) {
865 *error_out = "Failed to initialize device queue";
866 goto out;
867 }
868 ubd_dev->queue->queuedata = ubd_dev;
869
870 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
871 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
872 if(err){
873 *error_out = "Failed to register device";
874 goto out_cleanup;
875 }
876
877 if (fake_major != UBD_MAJOR)
878 ubd_disk_register(fake_major, ubd_dev->size, n,
879 &fake_gendisk[n]);
880
881 /*
882 * Perhaps this should also be under the "if (fake_major)" above
883 * using the fake_disk->disk_name
884 */
885 if (fake_ide)
886 make_ide_entries(ubd_gendisk[n]->disk_name);
887
888 err = 0;
889out:
890 return err;
891
892out_cleanup:
893 blk_cleanup_queue(ubd_dev->queue);
894 goto out;
895}
896
897static int ubd_config(char *str, char **error_out)
898{
899 int n, ret;
900
901 /* This string is possibly broken up and stored, so it's only
902 * freed if ubd_setup_common fails, or if only general options
903 * were set.
904 */
905 str = kstrdup(str, GFP_KERNEL);
906 if (str == NULL) {
907 *error_out = "Failed to allocate memory";
908 return -ENOMEM;
909 }
910
911 ret = ubd_setup_common(str, &n, error_out);
912 if (ret)
913 goto err_free;
914
915 if (n == -1) {
916 ret = 0;
917 goto err_free;
918 }
919
920 mutex_lock(&ubd_lock);
921 ret = ubd_add(n, error_out);
922 if (ret)
923 ubd_devs[n].file = NULL;
924 mutex_unlock(&ubd_lock);
925
926out:
927 return ret;
928
929err_free:
930 kfree(str);
931 goto out;
932}
933
934static int ubd_get_config(char *name, char *str, int size, char **error_out)
935{
936 struct ubd *ubd_dev;
937 int n, len = 0;
938
939 n = parse_unit(&name);
940 if((n >= MAX_DEV) || (n < 0)){
941 *error_out = "ubd_get_config : device number out of range";
942 return -1;
943 }
944
945 ubd_dev = &ubd_devs[n];
946 mutex_lock(&ubd_lock);
947
948 if(ubd_dev->file == NULL){
949 CONFIG_CHUNK(str, size, len, "", 1);
950 goto out;
951 }
952
953 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
954
955 if(ubd_dev->cow.file != NULL){
956 CONFIG_CHUNK(str, size, len, ",", 0);
957 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
958 }
959 else CONFIG_CHUNK(str, size, len, "", 1);
960
961 out:
962 mutex_unlock(&ubd_lock);
963 return len;
964}
965
966static int ubd_id(char **str, int *start_out, int *end_out)
967{
968 int n;
969
970 n = parse_unit(str);
971 *start_out = 0;
972 *end_out = MAX_DEV - 1;
973 return n;
974}
975
976static int ubd_remove(int n, char **error_out)
977{
978 struct gendisk *disk = ubd_gendisk[n];
979 struct ubd *ubd_dev;
980 int err = -ENODEV;
981
982 mutex_lock(&ubd_lock);
983
984 ubd_dev = &ubd_devs[n];
985
986 if(ubd_dev->file == NULL)
987 goto out;
988
989 /* you cannot remove a open disk */
990 err = -EBUSY;
991 if(ubd_dev->count > 0)
992 goto out;
993
994 ubd_gendisk[n] = NULL;
995 if(disk != NULL){
996 del_gendisk(disk);
997 put_disk(disk);
998 }
999
1000 if(fake_gendisk[n] != NULL){
1001 del_gendisk(fake_gendisk[n]);
1002 put_disk(fake_gendisk[n]);
1003 fake_gendisk[n] = NULL;
1004 }
1005
1006 err = 0;
1007 platform_device_unregister(&ubd_dev->pdev);
1008out:
1009 mutex_unlock(&ubd_lock);
1010 return err;
1011}
1012
1013/* All these are called by mconsole in process context and without
1014 * ubd-specific locks. The structure itself is const except for .list.
1015 */
1016static struct mc_device ubd_mc = {
1017 .list = LIST_HEAD_INIT(ubd_mc.list),
1018 .name = "ubd",
1019 .config = ubd_config,
1020 .get_config = ubd_get_config,
1021 .id = ubd_id,
1022 .remove = ubd_remove,
1023};
1024
1025static int __init ubd_mc_init(void)
1026{
1027 mconsole_register_dev(&ubd_mc);
1028 return 0;
1029}
1030
1031__initcall(ubd_mc_init);
1032
1033static int __init ubd0_init(void)
1034{
1035 struct ubd *ubd_dev = &ubd_devs[0];
1036
1037 mutex_lock(&ubd_lock);
1038 if(ubd_dev->file == NULL)
1039 ubd_dev->file = "root_fs";
1040 mutex_unlock(&ubd_lock);
1041
1042 return 0;
1043}
1044
1045__initcall(ubd0_init);
1046
1047/* Used in ubd_init, which is an initcall */
1048static struct platform_driver ubd_driver = {
1049 .driver = {
1050 .name = DRIVER_NAME,
1051 },
1052};
1053
1054static int __init ubd_init(void)
1055{
1056 char *error;
1057 int i, err;
1058
1059 if (register_blkdev(UBD_MAJOR, "ubd"))
1060 return -1;
1061
1062 if (fake_major != UBD_MAJOR) {
1063 char name[sizeof("ubd_nnn\0")];
1064
1065 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1066 if (register_blkdev(fake_major, "ubd"))
1067 return -1;
1068 }
1069 platform_driver_register(&ubd_driver);
1070 mutex_lock(&ubd_lock);
1071 for (i = 0; i < MAX_DEV; i++){
1072 err = ubd_add(i, &error);
1073 if(err)
1074 printk(KERN_ERR "Failed to initialize ubd device %d :"
1075 "%s\n", i, error);
1076 }
1077 mutex_unlock(&ubd_lock);
1078 return 0;
1079}
1080
1081late_initcall(ubd_init);
1082
1083static int __init ubd_driver_init(void){
1084 unsigned long stack;
1085 int err;
1086
1087 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1088 if(global_openflags.s){
1089 printk(KERN_INFO "ubd: Synchronous mode\n");
1090 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1091 * enough. So use anyway the io thread. */
1092 }
1093 stack = alloc_stack(0, 0);
1094 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1095 &thread_fd);
1096 if(io_pid < 0){
1097 printk(KERN_ERR
1098 "ubd : Failed to start I/O thread (errno = %d) - "
1099 "falling back to synchronous I/O\n", -io_pid);
1100 io_pid = -1;
1101 return 0;
1102 }
1103 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1104 0, "ubd", ubd_devs);
1105 if(err != 0)
1106 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1107 return 0;
1108}
1109
1110device_initcall(ubd_driver_init);
1111
1112static int ubd_open(struct block_device *bdev, fmode_t mode)
1113{
1114 struct gendisk *disk = bdev->bd_disk;
1115 struct ubd *ubd_dev = disk->private_data;
1116 int err = 0;
1117
1118 mutex_lock(&ubd_mutex);
1119 if(ubd_dev->count == 0){
1120 err = ubd_open_dev(ubd_dev);
1121 if(err){
1122 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1123 disk->disk_name, ubd_dev->file, -err);
1124 goto out;
1125 }
1126 }
1127 ubd_dev->count++;
1128 set_disk_ro(disk, !ubd_dev->openflags.w);
1129
1130 /* This should no more be needed. And it didn't work anyway to exclude
1131 * read-write remounting of filesystems.*/
1132 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1133 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1134 err = -EROFS;
1135 }*/
1136out:
1137 mutex_unlock(&ubd_mutex);
1138 return err;
1139}
1140
1141static int ubd_release(struct gendisk *disk, fmode_t mode)
1142{
1143 struct ubd *ubd_dev = disk->private_data;
1144
1145 mutex_lock(&ubd_mutex);
1146 if(--ubd_dev->count == 0)
1147 ubd_close_dev(ubd_dev);
1148 mutex_unlock(&ubd_mutex);
1149 return 0;
1150}
1151
1152static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1153 __u64 *cow_offset, unsigned long *bitmap,
1154 __u64 bitmap_offset, unsigned long *bitmap_words,
1155 __u64 bitmap_len)
1156{
1157 __u64 sector = io_offset >> 9;
1158 int i, update_bitmap = 0;
1159
1160 for(i = 0; i < length >> 9; i++){
1161 if(cow_mask != NULL)
1162 ubd_set_bit(i, (unsigned char *) cow_mask);
1163 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1164 continue;
1165
1166 update_bitmap = 1;
1167 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1168 }
1169
1170 if(!update_bitmap)
1171 return;
1172
1173 *cow_offset = sector / (sizeof(unsigned long) * 8);
1174
1175 /* This takes care of the case where we're exactly at the end of the
1176 * device, and *cow_offset + 1 is off the end. So, just back it up
1177 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1178 * for the original diagnosis.
1179 */
1180 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1181 sizeof(unsigned long)) - 1))
1182 (*cow_offset)--;
1183
1184 bitmap_words[0] = bitmap[*cow_offset];
1185 bitmap_words[1] = bitmap[*cow_offset + 1];
1186
1187 *cow_offset *= sizeof(unsigned long);
1188 *cow_offset += bitmap_offset;
1189}
1190
1191static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1192 __u64 bitmap_offset, __u64 bitmap_len)
1193{
1194 __u64 sector = req->offset >> 9;
1195 int i;
1196
1197 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1198 panic("Operation too long");
1199
1200 if(req->op == UBD_READ) {
1201 for(i = 0; i < req->length >> 9; i++){
1202 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1203 ubd_set_bit(i, (unsigned char *)
1204 &req->sector_mask);
1205 }
1206 }
1207 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1208 &req->cow_offset, bitmap, bitmap_offset,
1209 req->bitmap_words, bitmap_len);
1210}
1211
1212/* Called with dev->lock held */
1213static void prepare_request(struct request *req, struct io_thread_req *io_req,
1214 unsigned long long offset, int page_offset,
1215 int len, struct page *page)
1216{
1217 struct gendisk *disk = req->rq_disk;
1218 struct ubd *ubd_dev = disk->private_data;
1219
1220 io_req->req = req;
1221 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1222 ubd_dev->fd;
1223 io_req->fds[1] = ubd_dev->fd;
1224 io_req->cow_offset = -1;
1225 io_req->offset = offset;
1226 io_req->length = len;
1227 io_req->error = 0;
1228 io_req->sector_mask = 0;
1229
1230 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1231 io_req->offsets[0] = 0;
1232 io_req->offsets[1] = ubd_dev->cow.data_offset;
1233 io_req->buffer = page_address(page) + page_offset;
1234 io_req->sectorsize = 1 << 9;
1235
1236 if(ubd_dev->cow.file != NULL)
1237 cowify_req(io_req, ubd_dev->cow.bitmap,
1238 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1239
1240}
1241
1242/* Called with dev->lock held */
1243static void do_ubd_request(struct request_queue *q)
1244{
1245 struct io_thread_req *io_req;
1246 struct request *req;
1247 int n;
1248
1249 while(1){
1250 struct ubd *dev = q->queuedata;
1251 if(dev->end_sg == 0){
1252 struct request *req = blk_fetch_request(q);
1253 if(req == NULL)
1254 return;
1255
1256 dev->request = req;
1257 dev->rq_pos = blk_rq_pos(req);
1258 dev->start_sg = 0;
1259 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1260 }
1261
1262 req = dev->request;
1263 while(dev->start_sg < dev->end_sg){
1264 struct scatterlist *sg = &dev->sg[dev->start_sg];
1265
1266 io_req = kmalloc(sizeof(struct io_thread_req),
1267 GFP_ATOMIC);
1268 if(io_req == NULL){
1269 if(list_empty(&dev->restart))
1270 list_add(&dev->restart, &restart);
1271 return;
1272 }
1273 prepare_request(req, io_req,
1274 (unsigned long long)dev->rq_pos << 9,
1275 sg->offset, sg->length, sg_page(sg));
1276
1277 n = os_write_file(thread_fd, &io_req,
1278 sizeof(struct io_thread_req *));
1279 if(n != sizeof(struct io_thread_req *)){
1280 if(n != -EAGAIN)
1281 printk("write to io thread failed, "
1282 "errno = %d\n", -n);
1283 else if(list_empty(&dev->restart))
1284 list_add(&dev->restart, &restart);
1285 kfree(io_req);
1286 return;
1287 }
1288
1289 dev->rq_pos += sg->length >> 9;
1290 dev->start_sg++;
1291 }
1292 dev->end_sg = 0;
1293 dev->request = NULL;
1294 }
1295}
1296
1297static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1298{
1299 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1300
1301 geo->heads = 128;
1302 geo->sectors = 32;
1303 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1304 return 0;
1305}
1306
1307static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1308 unsigned int cmd, unsigned long arg)
1309{
1310 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1311 u16 ubd_id[ATA_ID_WORDS];
1312
1313 switch (cmd) {
1314 struct cdrom_volctrl volume;
1315 case HDIO_GET_IDENTITY:
1316 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1317 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1318 ubd_id[ATA_ID_HEADS] = 128;
1319 ubd_id[ATA_ID_SECTORS] = 32;
1320 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1321 sizeof(ubd_id)))
1322 return -EFAULT;
1323 return 0;
1324
1325 case CDROMVOLREAD:
1326 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1327 return -EFAULT;
1328 volume.channel0 = 255;
1329 volume.channel1 = 255;
1330 volume.channel2 = 255;
1331 volume.channel3 = 255;
1332 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1333 return -EFAULT;
1334 return 0;
1335 }
1336 return -EINVAL;
1337}
1338
1339static int update_bitmap(struct io_thread_req *req)
1340{
1341 int n;
1342
1343 if(req->cow_offset == -1)
1344 return 0;
1345
1346 n = os_seek_file(req->fds[1], req->cow_offset);
1347 if(n < 0){
1348 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1349 return 1;
1350 }
1351
1352 n = os_write_file(req->fds[1], &req->bitmap_words,
1353 sizeof(req->bitmap_words));
1354 if(n != sizeof(req->bitmap_words)){
1355 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1356 req->fds[1]);
1357 return 1;
1358 }
1359
1360 return 0;
1361}
1362
1363static void do_io(struct io_thread_req *req)
1364{
1365 char *buf;
1366 unsigned long len;
1367 int n, nsectors, start, end, bit;
1368 int err;
1369 __u64 off;
1370
1371 nsectors = req->length / req->sectorsize;
1372 start = 0;
1373 do {
1374 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1375 end = start;
1376 while((end < nsectors) &&
1377 (ubd_test_bit(end, (unsigned char *)
1378 &req->sector_mask) == bit))
1379 end++;
1380
1381 off = req->offset + req->offsets[bit] +
1382 start * req->sectorsize;
1383 len = (end - start) * req->sectorsize;
1384 buf = &req->buffer[start * req->sectorsize];
1385
1386 err = os_seek_file(req->fds[bit], off);
1387 if(err < 0){
1388 printk("do_io - lseek failed : err = %d\n", -err);
1389 req->error = 1;
1390 return;
1391 }
1392 if(req->op == UBD_READ){
1393 n = 0;
1394 do {
1395 buf = &buf[n];
1396 len -= n;
1397 n = os_read_file(req->fds[bit], buf, len);
1398 if (n < 0) {
1399 printk("do_io - read failed, err = %d "
1400 "fd = %d\n", -n, req->fds[bit]);
1401 req->error = 1;
1402 return;
1403 }
1404 } while((n < len) && (n != 0));
1405 if (n < len) memset(&buf[n], 0, len - n);
1406 } else {
1407 n = os_write_file(req->fds[bit], buf, len);
1408 if(n != len){
1409 printk("do_io - write failed err = %d "
1410 "fd = %d\n", -n, req->fds[bit]);
1411 req->error = 1;
1412 return;
1413 }
1414 }
1415
1416 start = end;
1417 } while(start < nsectors);
1418
1419 req->error = update_bitmap(req);
1420}
1421
1422/* Changed in start_io_thread, which is serialized by being called only
1423 * from ubd_init, which is an initcall.
1424 */
1425int kernel_fd = -1;
1426
1427/* Only changed by the io thread. XXX: currently unused. */
1428static int io_count = 0;
1429
1430int io_thread(void *arg)
1431{
1432 struct io_thread_req *req;
1433 int n;
1434
1435 ignore_sigwinch_sig();
1436 while(1){
1437 n = os_read_file(kernel_fd, &req,
1438 sizeof(struct io_thread_req *));
1439 if(n != sizeof(struct io_thread_req *)){
1440 if(n < 0)
1441 printk("io_thread - read failed, fd = %d, "
1442 "err = %d\n", kernel_fd, -n);
1443 else {
1444 printk("io_thread - short read, fd = %d, "
1445 "length = %d\n", kernel_fd, n);
1446 }
1447 continue;
1448 }
1449 io_count++;
1450 do_io(req);
1451 n = os_write_file(kernel_fd, &req,
1452 sizeof(struct io_thread_req *));
1453 if(n != sizeof(struct io_thread_req *))
1454 printk("io_thread - write failed, fd = %d, err = %d\n",
1455 kernel_fd, -n);
1456 }
1457
1458 return 0;
1459}
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Cambridge Greys Ltd
4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
27#include <linux/blk-mq.h>
28#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/cdrom.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/ctype.h>
34#include <linux/slab.h>
35#include <linux/vmalloc.h>
36#include <linux/platform_device.h>
37#include <linux/scatterlist.h>
38#include <asm/tlbflush.h>
39#include <kern_util.h>
40#include "mconsole_kern.h"
41#include <init.h>
42#include <irq_kern.h>
43#include "ubd.h"
44#include <os.h>
45#include "cow.h"
46
47/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
49
50struct io_thread_req {
51 struct request *req;
52 int fds[2];
53 unsigned long offsets[2];
54 unsigned long long offset;
55 unsigned long length;
56 char *buffer;
57 int sectorsize;
58 unsigned long sector_mask;
59 unsigned long long cow_offset;
60 unsigned long bitmap_words[2];
61 int error;
62};
63
64
65static struct io_thread_req * (*irq_req_buffer)[];
66static struct io_thread_req *irq_remainder;
67static int irq_remainder_size;
68
69static struct io_thread_req * (*io_req_buffer)[];
70static struct io_thread_req *io_remainder;
71static int io_remainder_size;
72
73
74
75static inline int ubd_test_bit(__u64 bit, unsigned char *data)
76{
77 __u64 n;
78 int bits, off;
79
80 bits = sizeof(data[0]) * 8;
81 n = bit / bits;
82 off = bit % bits;
83 return (data[n] & (1 << off)) != 0;
84}
85
86static inline void ubd_set_bit(__u64 bit, unsigned char *data)
87{
88 __u64 n;
89 int bits, off;
90
91 bits = sizeof(data[0]) * 8;
92 n = bit / bits;
93 off = bit % bits;
94 data[n] |= (1 << off);
95}
96/*End stuff from ubd_user.h*/
97
98#define DRIVER_NAME "uml-blkdev"
99
100static DEFINE_MUTEX(ubd_lock);
101static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
102
103static int ubd_open(struct block_device *bdev, fmode_t mode);
104static void ubd_release(struct gendisk *disk, fmode_t mode);
105static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
106 unsigned int cmd, unsigned long arg);
107static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
108
109#define MAX_DEV (16)
110
111static const struct block_device_operations ubd_blops = {
112 .owner = THIS_MODULE,
113 .open = ubd_open,
114 .release = ubd_release,
115 .ioctl = ubd_ioctl,
116 .compat_ioctl = blkdev_compat_ptr_ioctl,
117 .getgeo = ubd_getgeo,
118};
119
120/* Protected by ubd_lock */
121static int fake_major = UBD_MAJOR;
122static struct gendisk *ubd_gendisk[MAX_DEV];
123static struct gendisk *fake_gendisk[MAX_DEV];
124
125#ifdef CONFIG_BLK_DEV_UBD_SYNC
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
127 .cl = 1 })
128#else
129#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
130 .cl = 1 })
131#endif
132static struct openflags global_openflags = OPEN_FLAGS;
133
134struct cow {
135 /* backing file name */
136 char *file;
137 /* backing file fd */
138 int fd;
139 unsigned long *bitmap;
140 unsigned long bitmap_len;
141 int bitmap_offset;
142 int data_offset;
143};
144
145#define MAX_SG 64
146
147struct ubd {
148 /* name (and fd, below) of the file opened for writing, either the
149 * backing or the cow file. */
150 char *file;
151 int count;
152 int fd;
153 __u64 size;
154 struct openflags boot_openflags;
155 struct openflags openflags;
156 unsigned shared:1;
157 unsigned no_cow:1;
158 unsigned no_trim:1;
159 struct cow cow;
160 struct platform_device pdev;
161 struct request_queue *queue;
162 struct blk_mq_tag_set tag_set;
163 spinlock_t lock;
164};
165
166#define DEFAULT_COW { \
167 .file = NULL, \
168 .fd = -1, \
169 .bitmap = NULL, \
170 .bitmap_offset = 0, \
171 .data_offset = 0, \
172}
173
174#define DEFAULT_UBD { \
175 .file = NULL, \
176 .count = 0, \
177 .fd = -1, \
178 .size = -1, \
179 .boot_openflags = OPEN_FLAGS, \
180 .openflags = OPEN_FLAGS, \
181 .no_cow = 0, \
182 .no_trim = 0, \
183 .shared = 0, \
184 .cow = DEFAULT_COW, \
185 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
186}
187
188/* Protected by ubd_lock */
189static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
190
191/* Only changed by fake_ide_setup which is a setup */
192static int fake_ide = 0;
193static struct proc_dir_entry *proc_ide_root = NULL;
194static struct proc_dir_entry *proc_ide = NULL;
195
196static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
197 const struct blk_mq_queue_data *bd);
198
199static void make_proc_ide(void)
200{
201 proc_ide_root = proc_mkdir("ide", NULL);
202 proc_ide = proc_mkdir("ide0", proc_ide_root);
203}
204
205static int fake_ide_media_proc_show(struct seq_file *m, void *v)
206{
207 seq_puts(m, "disk\n");
208 return 0;
209}
210
211static void make_ide_entries(const char *dev_name)
212{
213 struct proc_dir_entry *dir, *ent;
214 char name[64];
215
216 if(proc_ide_root == NULL) make_proc_ide();
217
218 dir = proc_mkdir(dev_name, proc_ide);
219 if(!dir) return;
220
221 ent = proc_create_single("media", S_IRUGO, dir,
222 fake_ide_media_proc_show);
223 if(!ent) return;
224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
231 return 1;
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
249 return -1;
250 *ptr = end;
251 }
252 else if (('a' <= *str) && (*str <= 'z')) {
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
257 return n;
258}
259
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
264static int ubd_setup_common(char *str, int *index_out, char **error_out)
265{
266 struct ubd *ubd_dev;
267 struct openflags flags = global_openflags;
268 char *backing_file;
269 int n, err = 0, i;
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
280 return err;
281 }
282
283 err = -EINVAL;
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
286 *error_out = "Didn't parse major number";
287 return err;
288 }
289
290 mutex_lock(&ubd_lock);
291 if (fake_major != UBD_MAJOR) {
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
295
296 fake_major = major;
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
310 }
311 if(n >= MAX_DEV){
312 *error_out = "Device number out of range";
313 return 1;
314 }
315
316 err = -EBUSY;
317 mutex_lock(&ubd_lock);
318
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
321 *error_out = "Device is already configured";
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
328 err = -EINVAL;
329 for (i = 0; i < sizeof("rscdt="); i++) {
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
338 ubd_dev->no_cow = 1;
339 break;
340 case 'c':
341 ubd_dev->shared = 1;
342 break;
343 case 't':
344 ubd_dev->no_trim = 1;
345 break;
346 case '=':
347 str++;
348 goto break_loop;
349 default:
350 *error_out = "Expected '=' or flag letter "
351 "(r, s, c, t or d)";
352 goto out;
353 }
354 str++;
355 }
356
357 if (*str == '=')
358 *error_out = "Too many flags specified";
359 else
360 *error_out = "Missing '='";
361 goto out;
362
363break_loop:
364 backing_file = strchr(str, ',');
365
366 if (backing_file == NULL)
367 backing_file = strchr(str, ':');
368
369 if(backing_file != NULL){
370 if(ubd_dev->no_cow){
371 *error_out = "Can't specify both 'd' and a cow file";
372 goto out;
373 }
374 else {
375 *backing_file = '\0';
376 backing_file++;
377 }
378 }
379 err = 0;
380 ubd_dev->file = str;
381 ubd_dev->cow.file = backing_file;
382 ubd_dev->boot_openflags = flags;
383out:
384 mutex_unlock(&ubd_lock);
385 return err;
386}
387
388static int ubd_setup(char *str)
389{
390 char *error;
391 int err;
392
393 err = ubd_setup_common(str, NULL, &error);
394 if(err)
395 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
396 "%s\n", str, error);
397 return 1;
398}
399
400__setup("ubd", ubd_setup);
401__uml_help(ubd_setup,
402"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
403" This is used to associate a device with a file in the underlying\n"
404" filesystem. When specifying two filenames, the first one is the\n"
405" COW name and the second is the backing file name. As separator you can\n"
406" use either a ':' or a ',': the first one allows writing things like;\n"
407" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
408" while with a ',' the shell would not expand the 2nd '~'.\n"
409" When using only one filename, UML will detect whether to treat it like\n"
410" a COW file or a backing file. To override this detection, add the 'd'\n"
411" flag:\n"
412" ubd0d=BackingFile\n"
413" Usually, there is a filesystem in the file, but \n"
414" that's not required. Swap devices containing swap files can be\n"
415" specified like this. Also, a file which doesn't contain a\n"
416" filesystem can have its contents read in the virtual \n"
417" machine by running 'dd' on the device. <n> must be in the range\n"
418" 0 to 7. Appending an 'r' to the number will cause that device\n"
419" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
420" an 's' will cause data to be written to disk on the host immediately.\n"
421" 'c' will cause the device to be treated as being shared between multiple\n"
422" UMLs and file locking will be turned off - this is appropriate for a\n"
423" cluster filesystem and inappropriate at almost all other times.\n\n"
424" 't' will disable trim/discard support on the device (enabled by default).\n\n"
425);
426
427static int udb_setup(char *str)
428{
429 printk("udb%s specified on command line is almost certainly a ubd -> "
430 "udb TYPO\n", str);
431 return 1;
432}
433
434__setup("udb", udb_setup);
435__uml_help(udb_setup,
436"udb\n"
437" This option is here solely to catch ubd -> udb typos, which can be\n"
438" to impossible to catch visually unless you specifically look for\n"
439" them. The only result of any option starting with 'udb' is an error\n"
440" in the boot output.\n\n"
441);
442
443/* Only changed by ubd_init, which is an initcall. */
444static int thread_fd = -1;
445
446/* Function to read several request pointers at a time
447* handling fractional reads if (and as) needed
448*/
449
450static int bulk_req_safe_read(
451 int fd,
452 struct io_thread_req * (*request_buffer)[],
453 struct io_thread_req **remainder,
454 int *remainder_size,
455 int max_recs
456 )
457{
458 int n = 0;
459 int res = 0;
460
461 if (*remainder_size > 0) {
462 memmove(
463 (char *) request_buffer,
464 (char *) remainder, *remainder_size
465 );
466 n = *remainder_size;
467 }
468
469 res = os_read_file(
470 fd,
471 ((char *) request_buffer) + *remainder_size,
472 sizeof(struct io_thread_req *)*max_recs
473 - *remainder_size
474 );
475 if (res > 0) {
476 n += res;
477 if ((n % sizeof(struct io_thread_req *)) > 0) {
478 /*
479 * Read somehow returned not a multiple of dword
480 * theoretically possible, but never observed in the
481 * wild, so read routine must be able to handle it
482 */
483 *remainder_size = n % sizeof(struct io_thread_req *);
484 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
485 memmove(
486 remainder,
487 ((char *) request_buffer) +
488 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
489 *remainder_size
490 );
491 n = n - *remainder_size;
492 }
493 } else {
494 n = res;
495 }
496 return n;
497}
498
499/* Called without dev->lock held, and only in interrupt context. */
500static void ubd_handler(void)
501{
502 int n;
503 int count;
504
505 while(1){
506 n = bulk_req_safe_read(
507 thread_fd,
508 irq_req_buffer,
509 &irq_remainder,
510 &irq_remainder_size,
511 UBD_REQ_BUFFER_SIZE
512 );
513 if (n < 0) {
514 if(n == -EAGAIN)
515 break;
516 printk(KERN_ERR "spurious interrupt in ubd_handler, "
517 "err = %d\n", -n);
518 return;
519 }
520 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
521 struct io_thread_req *io_req = (*irq_req_buffer)[count];
522
523 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
524 blk_queue_max_discard_sectors(io_req->req->q, 0);
525 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
526 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
527 }
528 if ((io_req->error) || (io_req->buffer == NULL))
529 blk_mq_end_request(io_req->req, io_req->error);
530 else {
531 if (!blk_update_request(io_req->req, io_req->error, io_req->length))
532 __blk_mq_end_request(io_req->req, io_req->error);
533 }
534 kfree(io_req);
535 }
536 }
537}
538
539static irqreturn_t ubd_intr(int irq, void *dev)
540{
541 ubd_handler();
542 return IRQ_HANDLED;
543}
544
545/* Only changed by ubd_init, which is an initcall. */
546static int io_pid = -1;
547
548static void kill_io_thread(void)
549{
550 if(io_pid != -1)
551 os_kill_process(io_pid, 1);
552}
553
554__uml_exitcall(kill_io_thread);
555
556static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
557{
558 char *file;
559 int fd;
560 int err;
561
562 __u32 version;
563 __u32 align;
564 char *backing_file;
565 time64_t mtime;
566 unsigned long long size;
567 int sector_size;
568 int bitmap_offset;
569
570 if (ubd_dev->file && ubd_dev->cow.file) {
571 file = ubd_dev->cow.file;
572
573 goto out;
574 }
575
576 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
577 if (fd < 0)
578 return fd;
579
580 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
581 &mtime, &size, §or_size, &align, &bitmap_offset);
582 os_close_file(fd);
583
584 if(err == -EINVAL)
585 file = ubd_dev->file;
586 else
587 file = backing_file;
588
589out:
590 return os_file_size(file, size_out);
591}
592
593static int read_cow_bitmap(int fd, void *buf, int offset, int len)
594{
595 int err;
596
597 err = os_pread_file(fd, buf, len, offset);
598 if (err < 0)
599 return err;
600
601 return 0;
602}
603
604static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
605{
606 time64_t modtime;
607 unsigned long long actual;
608 int err;
609
610 err = os_file_modtime(file, &modtime);
611 if (err < 0) {
612 printk(KERN_ERR "Failed to get modification time of backing "
613 "file \"%s\", err = %d\n", file, -err);
614 return err;
615 }
616
617 err = os_file_size(file, &actual);
618 if (err < 0) {
619 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
620 "err = %d\n", file, -err);
621 return err;
622 }
623
624 if (actual != size) {
625 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
626 * the typecast.*/
627 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
628 "vs backing file\n", (unsigned long long) size, actual);
629 return -EINVAL;
630 }
631 if (modtime != mtime) {
632 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
633 "backing file\n", mtime, modtime);
634 return -EINVAL;
635 }
636 return 0;
637}
638
639static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
640{
641 struct uml_stat buf1, buf2;
642 int err;
643
644 if (from_cmdline == NULL)
645 return 0;
646 if (!strcmp(from_cmdline, from_cow))
647 return 0;
648
649 err = os_stat_file(from_cmdline, &buf1);
650 if (err < 0) {
651 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
652 -err);
653 return 0;
654 }
655 err = os_stat_file(from_cow, &buf2);
656 if (err < 0) {
657 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
658 -err);
659 return 1;
660 }
661 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
662 return 0;
663
664 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
665 "\"%s\" specified in COW header of \"%s\"\n",
666 from_cmdline, from_cow, cow);
667 return 1;
668}
669
670static int open_ubd_file(char *file, struct openflags *openflags, int shared,
671 char **backing_file_out, int *bitmap_offset_out,
672 unsigned long *bitmap_len_out, int *data_offset_out,
673 int *create_cow_out)
674{
675 time64_t mtime;
676 unsigned long long size;
677 __u32 version, align;
678 char *backing_file;
679 int fd, err, sectorsize, asked_switch, mode = 0644;
680
681 fd = os_open_file(file, *openflags, mode);
682 if (fd < 0) {
683 if ((fd == -ENOENT) && (create_cow_out != NULL))
684 *create_cow_out = 1;
685 if (!openflags->w ||
686 ((fd != -EROFS) && (fd != -EACCES)))
687 return fd;
688 openflags->w = 0;
689 fd = os_open_file(file, *openflags, mode);
690 if (fd < 0)
691 return fd;
692 }
693
694 if (shared)
695 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
696 else {
697 err = os_lock_file(fd, openflags->w);
698 if (err < 0) {
699 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
700 file, -err);
701 goto out_close;
702 }
703 }
704
705 /* Successful return case! */
706 if (backing_file_out == NULL)
707 return fd;
708
709 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
710 &size, §orsize, &align, bitmap_offset_out);
711 if (err && (*backing_file_out != NULL)) {
712 printk(KERN_ERR "Failed to read COW header from COW file "
713 "\"%s\", errno = %d\n", file, -err);
714 goto out_close;
715 }
716 if (err)
717 return fd;
718
719 asked_switch = path_requires_switch(*backing_file_out, backing_file,
720 file);
721
722 /* Allow switching only if no mismatch. */
723 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
724 mtime)) {
725 printk(KERN_ERR "Switching backing file to '%s'\n",
726 *backing_file_out);
727 err = write_cow_header(file, fd, *backing_file_out,
728 sectorsize, align, &size);
729 if (err) {
730 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
731 goto out_close;
732 }
733 } else {
734 *backing_file_out = backing_file;
735 err = backing_file_mismatch(*backing_file_out, size, mtime);
736 if (err)
737 goto out_close;
738 }
739
740 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
741 bitmap_len_out, data_offset_out);
742
743 return fd;
744 out_close:
745 os_close_file(fd);
746 return err;
747}
748
749static int create_cow_file(char *cow_file, char *backing_file,
750 struct openflags flags,
751 int sectorsize, int alignment, int *bitmap_offset_out,
752 unsigned long *bitmap_len_out, int *data_offset_out)
753{
754 int err, fd;
755
756 flags.c = 1;
757 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
758 if (fd < 0) {
759 err = fd;
760 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
761 cow_file, -err);
762 goto out;
763 }
764
765 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
766 bitmap_offset_out, bitmap_len_out,
767 data_offset_out);
768 if (!err)
769 return fd;
770 os_close_file(fd);
771 out:
772 return err;
773}
774
775static void ubd_close_dev(struct ubd *ubd_dev)
776{
777 os_close_file(ubd_dev->fd);
778 if(ubd_dev->cow.file == NULL)
779 return;
780
781 os_close_file(ubd_dev->cow.fd);
782 vfree(ubd_dev->cow.bitmap);
783 ubd_dev->cow.bitmap = NULL;
784}
785
786static int ubd_open_dev(struct ubd *ubd_dev)
787{
788 struct openflags flags;
789 char **back_ptr;
790 int err, create_cow, *create_ptr;
791 int fd;
792
793 ubd_dev->openflags = ubd_dev->boot_openflags;
794 create_cow = 0;
795 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
796 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
797
798 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
799 back_ptr, &ubd_dev->cow.bitmap_offset,
800 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
801 create_ptr);
802
803 if((fd == -ENOENT) && create_cow){
804 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
805 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
806 &ubd_dev->cow.bitmap_offset,
807 &ubd_dev->cow.bitmap_len,
808 &ubd_dev->cow.data_offset);
809 if(fd >= 0){
810 printk(KERN_INFO "Creating \"%s\" as COW file for "
811 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
812 }
813 }
814
815 if(fd < 0){
816 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
817 -fd);
818 return fd;
819 }
820 ubd_dev->fd = fd;
821
822 if(ubd_dev->cow.file != NULL){
823 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
824
825 err = -ENOMEM;
826 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
827 if(ubd_dev->cow.bitmap == NULL){
828 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
829 goto error;
830 }
831 flush_tlb_kernel_vm();
832
833 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
834 ubd_dev->cow.bitmap_offset,
835 ubd_dev->cow.bitmap_len);
836 if(err < 0)
837 goto error;
838
839 flags = ubd_dev->openflags;
840 flags.w = 0;
841 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
842 NULL, NULL, NULL, NULL);
843 if(err < 0) goto error;
844 ubd_dev->cow.fd = err;
845 }
846 if (ubd_dev->no_trim == 0) {
847 ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
848 ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
849 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
850 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
851 blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
852 }
853 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
854 return 0;
855 error:
856 os_close_file(ubd_dev->fd);
857 return err;
858}
859
860static void ubd_device_release(struct device *dev)
861{
862 struct ubd *ubd_dev = dev_get_drvdata(dev);
863
864 blk_cleanup_queue(ubd_dev->queue);
865 blk_mq_free_tag_set(&ubd_dev->tag_set);
866 *ubd_dev = ((struct ubd) DEFAULT_UBD);
867}
868
869static int ubd_disk_register(int major, u64 size, int unit,
870 struct gendisk **disk_out)
871{
872 struct device *parent = NULL;
873 struct gendisk *disk;
874
875 disk = alloc_disk(1 << UBD_SHIFT);
876 if(disk == NULL)
877 return -ENOMEM;
878
879 disk->major = major;
880 disk->first_minor = unit << UBD_SHIFT;
881 disk->fops = &ubd_blops;
882 set_capacity(disk, size / 512);
883 if (major == UBD_MAJOR)
884 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
885 else
886 sprintf(disk->disk_name, "ubd_fake%d", unit);
887
888 /* sysfs register (not for ide fake devices) */
889 if (major == UBD_MAJOR) {
890 ubd_devs[unit].pdev.id = unit;
891 ubd_devs[unit].pdev.name = DRIVER_NAME;
892 ubd_devs[unit].pdev.dev.release = ubd_device_release;
893 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
894 platform_device_register(&ubd_devs[unit].pdev);
895 parent = &ubd_devs[unit].pdev.dev;
896 }
897
898 disk->private_data = &ubd_devs[unit];
899 disk->queue = ubd_devs[unit].queue;
900 device_add_disk(parent, disk, NULL);
901
902 *disk_out = disk;
903 return 0;
904}
905
906#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
907
908static const struct blk_mq_ops ubd_mq_ops = {
909 .queue_rq = ubd_queue_rq,
910};
911
912static int ubd_add(int n, char **error_out)
913{
914 struct ubd *ubd_dev = &ubd_devs[n];
915 int err = 0;
916
917 if(ubd_dev->file == NULL)
918 goto out;
919
920 err = ubd_file_size(ubd_dev, &ubd_dev->size);
921 if(err < 0){
922 *error_out = "Couldn't determine size of device's file";
923 goto out;
924 }
925
926 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
927
928 ubd_dev->tag_set.ops = &ubd_mq_ops;
929 ubd_dev->tag_set.queue_depth = 64;
930 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
931 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
932 ubd_dev->tag_set.driver_data = ubd_dev;
933 ubd_dev->tag_set.nr_hw_queues = 1;
934
935 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
936 if (err)
937 goto out;
938
939 ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
940 if (IS_ERR(ubd_dev->queue)) {
941 err = PTR_ERR(ubd_dev->queue);
942 goto out_cleanup_tags;
943 }
944
945 ubd_dev->queue->queuedata = ubd_dev;
946 blk_queue_write_cache(ubd_dev->queue, true, false);
947
948 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
949 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
950 if(err){
951 *error_out = "Failed to register device";
952 goto out_cleanup_tags;
953 }
954
955 if (fake_major != UBD_MAJOR)
956 ubd_disk_register(fake_major, ubd_dev->size, n,
957 &fake_gendisk[n]);
958
959 /*
960 * Perhaps this should also be under the "if (fake_major)" above
961 * using the fake_disk->disk_name
962 */
963 if (fake_ide)
964 make_ide_entries(ubd_gendisk[n]->disk_name);
965
966 err = 0;
967out:
968 return err;
969
970out_cleanup_tags:
971 blk_mq_free_tag_set(&ubd_dev->tag_set);
972 if (!(IS_ERR(ubd_dev->queue)))
973 blk_cleanup_queue(ubd_dev->queue);
974 goto out;
975}
976
977static int ubd_config(char *str, char **error_out)
978{
979 int n, ret;
980
981 /* This string is possibly broken up and stored, so it's only
982 * freed if ubd_setup_common fails, or if only general options
983 * were set.
984 */
985 str = kstrdup(str, GFP_KERNEL);
986 if (str == NULL) {
987 *error_out = "Failed to allocate memory";
988 return -ENOMEM;
989 }
990
991 ret = ubd_setup_common(str, &n, error_out);
992 if (ret)
993 goto err_free;
994
995 if (n == -1) {
996 ret = 0;
997 goto err_free;
998 }
999
1000 mutex_lock(&ubd_lock);
1001 ret = ubd_add(n, error_out);
1002 if (ret)
1003 ubd_devs[n].file = NULL;
1004 mutex_unlock(&ubd_lock);
1005
1006out:
1007 return ret;
1008
1009err_free:
1010 kfree(str);
1011 goto out;
1012}
1013
1014static int ubd_get_config(char *name, char *str, int size, char **error_out)
1015{
1016 struct ubd *ubd_dev;
1017 int n, len = 0;
1018
1019 n = parse_unit(&name);
1020 if((n >= MAX_DEV) || (n < 0)){
1021 *error_out = "ubd_get_config : device number out of range";
1022 return -1;
1023 }
1024
1025 ubd_dev = &ubd_devs[n];
1026 mutex_lock(&ubd_lock);
1027
1028 if(ubd_dev->file == NULL){
1029 CONFIG_CHUNK(str, size, len, "", 1);
1030 goto out;
1031 }
1032
1033 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1034
1035 if(ubd_dev->cow.file != NULL){
1036 CONFIG_CHUNK(str, size, len, ",", 0);
1037 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1038 }
1039 else CONFIG_CHUNK(str, size, len, "", 1);
1040
1041 out:
1042 mutex_unlock(&ubd_lock);
1043 return len;
1044}
1045
1046static int ubd_id(char **str, int *start_out, int *end_out)
1047{
1048 int n;
1049
1050 n = parse_unit(str);
1051 *start_out = 0;
1052 *end_out = MAX_DEV - 1;
1053 return n;
1054}
1055
1056static int ubd_remove(int n, char **error_out)
1057{
1058 struct gendisk *disk = ubd_gendisk[n];
1059 struct ubd *ubd_dev;
1060 int err = -ENODEV;
1061
1062 mutex_lock(&ubd_lock);
1063
1064 ubd_dev = &ubd_devs[n];
1065
1066 if(ubd_dev->file == NULL)
1067 goto out;
1068
1069 /* you cannot remove a open disk */
1070 err = -EBUSY;
1071 if(ubd_dev->count > 0)
1072 goto out;
1073
1074 ubd_gendisk[n] = NULL;
1075 if(disk != NULL){
1076 del_gendisk(disk);
1077 put_disk(disk);
1078 }
1079
1080 if(fake_gendisk[n] != NULL){
1081 del_gendisk(fake_gendisk[n]);
1082 put_disk(fake_gendisk[n]);
1083 fake_gendisk[n] = NULL;
1084 }
1085
1086 err = 0;
1087 platform_device_unregister(&ubd_dev->pdev);
1088out:
1089 mutex_unlock(&ubd_lock);
1090 return err;
1091}
1092
1093/* All these are called by mconsole in process context and without
1094 * ubd-specific locks. The structure itself is const except for .list.
1095 */
1096static struct mc_device ubd_mc = {
1097 .list = LIST_HEAD_INIT(ubd_mc.list),
1098 .name = "ubd",
1099 .config = ubd_config,
1100 .get_config = ubd_get_config,
1101 .id = ubd_id,
1102 .remove = ubd_remove,
1103};
1104
1105static int __init ubd_mc_init(void)
1106{
1107 mconsole_register_dev(&ubd_mc);
1108 return 0;
1109}
1110
1111__initcall(ubd_mc_init);
1112
1113static int __init ubd0_init(void)
1114{
1115 struct ubd *ubd_dev = &ubd_devs[0];
1116
1117 mutex_lock(&ubd_lock);
1118 if(ubd_dev->file == NULL)
1119 ubd_dev->file = "root_fs";
1120 mutex_unlock(&ubd_lock);
1121
1122 return 0;
1123}
1124
1125__initcall(ubd0_init);
1126
1127/* Used in ubd_init, which is an initcall */
1128static struct platform_driver ubd_driver = {
1129 .driver = {
1130 .name = DRIVER_NAME,
1131 },
1132};
1133
1134static int __init ubd_init(void)
1135{
1136 char *error;
1137 int i, err;
1138
1139 if (register_blkdev(UBD_MAJOR, "ubd"))
1140 return -1;
1141
1142 if (fake_major != UBD_MAJOR) {
1143 char name[sizeof("ubd_nnn\0")];
1144
1145 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1146 if (register_blkdev(fake_major, "ubd"))
1147 return -1;
1148 }
1149
1150 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1151 sizeof(struct io_thread_req *),
1152 GFP_KERNEL
1153 );
1154 irq_remainder = 0;
1155
1156 if (irq_req_buffer == NULL) {
1157 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1158 return -1;
1159 }
1160 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1161 sizeof(struct io_thread_req *),
1162 GFP_KERNEL
1163 );
1164
1165 io_remainder = 0;
1166
1167 if (io_req_buffer == NULL) {
1168 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1169 return -1;
1170 }
1171 platform_driver_register(&ubd_driver);
1172 mutex_lock(&ubd_lock);
1173 for (i = 0; i < MAX_DEV; i++){
1174 err = ubd_add(i, &error);
1175 if(err)
1176 printk(KERN_ERR "Failed to initialize ubd device %d :"
1177 "%s\n", i, error);
1178 }
1179 mutex_unlock(&ubd_lock);
1180 return 0;
1181}
1182
1183late_initcall(ubd_init);
1184
1185static int __init ubd_driver_init(void){
1186 unsigned long stack;
1187 int err;
1188
1189 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1190 if(global_openflags.s){
1191 printk(KERN_INFO "ubd: Synchronous mode\n");
1192 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1193 * enough. So use anyway the io thread. */
1194 }
1195 stack = alloc_stack(0, 0);
1196 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1197 &thread_fd);
1198 if(io_pid < 0){
1199 printk(KERN_ERR
1200 "ubd : Failed to start I/O thread (errno = %d) - "
1201 "falling back to synchronous I/O\n", -io_pid);
1202 io_pid = -1;
1203 return 0;
1204 }
1205 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1206 0, "ubd", ubd_devs);
1207 if(err != 0)
1208 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1209 return 0;
1210}
1211
1212device_initcall(ubd_driver_init);
1213
1214static int ubd_open(struct block_device *bdev, fmode_t mode)
1215{
1216 struct gendisk *disk = bdev->bd_disk;
1217 struct ubd *ubd_dev = disk->private_data;
1218 int err = 0;
1219
1220 mutex_lock(&ubd_mutex);
1221 if(ubd_dev->count == 0){
1222 err = ubd_open_dev(ubd_dev);
1223 if(err){
1224 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1225 disk->disk_name, ubd_dev->file, -err);
1226 goto out;
1227 }
1228 }
1229 ubd_dev->count++;
1230 set_disk_ro(disk, !ubd_dev->openflags.w);
1231
1232 /* This should no more be needed. And it didn't work anyway to exclude
1233 * read-write remounting of filesystems.*/
1234 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1235 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1236 err = -EROFS;
1237 }*/
1238out:
1239 mutex_unlock(&ubd_mutex);
1240 return err;
1241}
1242
1243static void ubd_release(struct gendisk *disk, fmode_t mode)
1244{
1245 struct ubd *ubd_dev = disk->private_data;
1246
1247 mutex_lock(&ubd_mutex);
1248 if(--ubd_dev->count == 0)
1249 ubd_close_dev(ubd_dev);
1250 mutex_unlock(&ubd_mutex);
1251}
1252
1253static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1254 __u64 *cow_offset, unsigned long *bitmap,
1255 __u64 bitmap_offset, unsigned long *bitmap_words,
1256 __u64 bitmap_len)
1257{
1258 __u64 sector = io_offset >> SECTOR_SHIFT;
1259 int i, update_bitmap = 0;
1260
1261 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1262 if(cow_mask != NULL)
1263 ubd_set_bit(i, (unsigned char *) cow_mask);
1264 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1265 continue;
1266
1267 update_bitmap = 1;
1268 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1269 }
1270
1271 if(!update_bitmap)
1272 return;
1273
1274 *cow_offset = sector / (sizeof(unsigned long) * 8);
1275
1276 /* This takes care of the case where we're exactly at the end of the
1277 * device, and *cow_offset + 1 is off the end. So, just back it up
1278 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1279 * for the original diagnosis.
1280 */
1281 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1282 sizeof(unsigned long)) - 1))
1283 (*cow_offset)--;
1284
1285 bitmap_words[0] = bitmap[*cow_offset];
1286 bitmap_words[1] = bitmap[*cow_offset + 1];
1287
1288 *cow_offset *= sizeof(unsigned long);
1289 *cow_offset += bitmap_offset;
1290}
1291
1292static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1293 __u64 bitmap_offset, __u64 bitmap_len)
1294{
1295 __u64 sector = req->offset >> SECTOR_SHIFT;
1296 int i;
1297
1298 if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
1299 panic("Operation too long");
1300
1301 if (req_op(req->req) == REQ_OP_READ) {
1302 for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
1303 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1304 ubd_set_bit(i, (unsigned char *)
1305 &req->sector_mask);
1306 }
1307 }
1308 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1309 &req->cow_offset, bitmap, bitmap_offset,
1310 req->bitmap_words, bitmap_len);
1311}
1312
1313static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
1314 u64 off, struct bio_vec *bvec)
1315{
1316 struct ubd *dev = hctx->queue->queuedata;
1317 struct io_thread_req *io_req;
1318 int ret;
1319
1320 io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
1321 if (!io_req)
1322 return -ENOMEM;
1323
1324 io_req->req = req;
1325 if (dev->cow.file)
1326 io_req->fds[0] = dev->cow.fd;
1327 else
1328 io_req->fds[0] = dev->fd;
1329 io_req->error = 0;
1330
1331 if (bvec != NULL) {
1332 io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
1333 io_req->length = bvec->bv_len;
1334 } else {
1335 io_req->buffer = NULL;
1336 io_req->length = blk_rq_bytes(req);
1337 }
1338
1339 io_req->sectorsize = SECTOR_SIZE;
1340 io_req->fds[1] = dev->fd;
1341 io_req->cow_offset = -1;
1342 io_req->offset = off;
1343 io_req->sector_mask = 0;
1344 io_req->offsets[0] = 0;
1345 io_req->offsets[1] = dev->cow.data_offset;
1346
1347 if (dev->cow.file)
1348 cowify_req(io_req, dev->cow.bitmap,
1349 dev->cow.bitmap_offset, dev->cow.bitmap_len);
1350
1351 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1352 if (ret != sizeof(io_req)) {
1353 if (ret != -EAGAIN)
1354 pr_err("write to io thread failed: %d\n", -ret);
1355 kfree(io_req);
1356 }
1357 return ret;
1358}
1359
1360static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
1361{
1362 struct req_iterator iter;
1363 struct bio_vec bvec;
1364 int ret;
1365 u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
1366
1367 rq_for_each_segment(bvec, req, iter) {
1368 ret = ubd_queue_one_vec(hctx, req, off, &bvec);
1369 if (ret < 0)
1370 return ret;
1371 off += bvec.bv_len;
1372 }
1373 return 0;
1374}
1375
1376static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1377 const struct blk_mq_queue_data *bd)
1378{
1379 struct ubd *ubd_dev = hctx->queue->queuedata;
1380 struct request *req = bd->rq;
1381 int ret = 0, res = BLK_STS_OK;
1382
1383 blk_mq_start_request(req);
1384
1385 spin_lock_irq(&ubd_dev->lock);
1386
1387 switch (req_op(req)) {
1388 /* operations with no lentgth/offset arguments */
1389 case REQ_OP_FLUSH:
1390 ret = ubd_queue_one_vec(hctx, req, 0, NULL);
1391 break;
1392 case REQ_OP_READ:
1393 case REQ_OP_WRITE:
1394 ret = queue_rw_req(hctx, req);
1395 break;
1396 case REQ_OP_DISCARD:
1397 case REQ_OP_WRITE_ZEROES:
1398 ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
1399 break;
1400 default:
1401 WARN_ON_ONCE(1);
1402 res = BLK_STS_NOTSUPP;
1403 }
1404
1405 spin_unlock_irq(&ubd_dev->lock);
1406
1407 if (ret < 0) {
1408 if (ret == -ENOMEM)
1409 res = BLK_STS_RESOURCE;
1410 else
1411 res = BLK_STS_DEV_RESOURCE;
1412 }
1413
1414 return res;
1415}
1416
1417static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1418{
1419 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1420
1421 geo->heads = 128;
1422 geo->sectors = 32;
1423 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1424 return 0;
1425}
1426
1427static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1428 unsigned int cmd, unsigned long arg)
1429{
1430 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1431 u16 ubd_id[ATA_ID_WORDS];
1432
1433 switch (cmd) {
1434 struct cdrom_volctrl volume;
1435 case HDIO_GET_IDENTITY:
1436 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1437 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1438 ubd_id[ATA_ID_HEADS] = 128;
1439 ubd_id[ATA_ID_SECTORS] = 32;
1440 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1441 sizeof(ubd_id)))
1442 return -EFAULT;
1443 return 0;
1444
1445 case CDROMVOLREAD:
1446 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1447 return -EFAULT;
1448 volume.channel0 = 255;
1449 volume.channel1 = 255;
1450 volume.channel2 = 255;
1451 volume.channel3 = 255;
1452 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1453 return -EFAULT;
1454 return 0;
1455 }
1456 return -EINVAL;
1457}
1458
1459static int map_error(int error_code)
1460{
1461 switch (error_code) {
1462 case 0:
1463 return BLK_STS_OK;
1464 case ENOSYS:
1465 case EOPNOTSUPP:
1466 return BLK_STS_NOTSUPP;
1467 case ENOSPC:
1468 return BLK_STS_NOSPC;
1469 }
1470 return BLK_STS_IOERR;
1471}
1472
1473/*
1474 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1475 *
1476 * The following functions are part of UML hypervisor code.
1477 * All functions from here onwards are executed as a helper
1478 * thread and are not allowed to execute any kernel functions.
1479 *
1480 * Any communication must occur strictly via shared memory and IPC.
1481 *
1482 * Do not add printks, locks, kernel memory operations, etc - it
1483 * will result in unpredictable behaviour and/or crashes.
1484 */
1485
1486static int update_bitmap(struct io_thread_req *req)
1487{
1488 int n;
1489
1490 if(req->cow_offset == -1)
1491 return map_error(0);
1492
1493 n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1494 sizeof(req->bitmap_words), req->cow_offset);
1495 if (n != sizeof(req->bitmap_words))
1496 return map_error(-n);
1497
1498 return map_error(0);
1499}
1500
1501static void do_io(struct io_thread_req *req)
1502{
1503 char *buf = NULL;
1504 unsigned long len;
1505 int n, nsectors, start, end, bit;
1506 __u64 off;
1507
1508 /* FLUSH is really a special case, we cannot "case" it with others */
1509
1510 if (req_op(req->req) == REQ_OP_FLUSH) {
1511 /* fds[0] is always either the rw image or our cow file */
1512 req->error = map_error(-os_sync_file(req->fds[0]));
1513 return;
1514 }
1515
1516 nsectors = req->length / req->sectorsize;
1517 start = 0;
1518 do {
1519 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1520 end = start;
1521 while((end < nsectors) &&
1522 (ubd_test_bit(end, (unsigned char *)
1523 &req->sector_mask) == bit))
1524 end++;
1525
1526 off = req->offset + req->offsets[bit] +
1527 start * req->sectorsize;
1528 len = (end - start) * req->sectorsize;
1529 if (req->buffer != NULL)
1530 buf = &req->buffer[start * req->sectorsize];
1531
1532 switch (req_op(req->req)) {
1533 case REQ_OP_READ:
1534 n = 0;
1535 do {
1536 buf = &buf[n];
1537 len -= n;
1538 n = os_pread_file(req->fds[bit], buf, len, off);
1539 if (n < 0) {
1540 req->error = map_error(-n);
1541 return;
1542 }
1543 } while((n < len) && (n != 0));
1544 if (n < len) memset(&buf[n], 0, len - n);
1545 break;
1546 case REQ_OP_WRITE:
1547 n = os_pwrite_file(req->fds[bit], buf, len, off);
1548 if(n != len){
1549 req->error = map_error(-n);
1550 return;
1551 }
1552 break;
1553 case REQ_OP_DISCARD:
1554 case REQ_OP_WRITE_ZEROES:
1555 n = os_falloc_punch(req->fds[bit], off, len);
1556 if (n) {
1557 req->error = map_error(-n);
1558 return;
1559 }
1560 break;
1561 default:
1562 WARN_ON_ONCE(1);
1563 req->error = BLK_STS_NOTSUPP;
1564 return;
1565 }
1566
1567 start = end;
1568 } while(start < nsectors);
1569
1570 req->error = update_bitmap(req);
1571}
1572
1573/* Changed in start_io_thread, which is serialized by being called only
1574 * from ubd_init, which is an initcall.
1575 */
1576int kernel_fd = -1;
1577
1578/* Only changed by the io thread. XXX: currently unused. */
1579static int io_count = 0;
1580
1581int io_thread(void *arg)
1582{
1583 int n, count, written, res;
1584
1585 os_fix_helper_signals();
1586
1587 while(1){
1588 n = bulk_req_safe_read(
1589 kernel_fd,
1590 io_req_buffer,
1591 &io_remainder,
1592 &io_remainder_size,
1593 UBD_REQ_BUFFER_SIZE
1594 );
1595 if (n <= 0) {
1596 if (n == -EAGAIN)
1597 ubd_read_poll(-1);
1598
1599 continue;
1600 }
1601
1602 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1603 io_count++;
1604 do_io((*io_req_buffer)[count]);
1605 }
1606
1607 written = 0;
1608
1609 do {
1610 res = os_write_file(kernel_fd,
1611 ((char *) io_req_buffer) + written,
1612 n - written);
1613 if (res >= 0) {
1614 written += res;
1615 }
1616 if (written < n) {
1617 ubd_write_poll(-1);
1618 }
1619 } while (written < n);
1620 }
1621
1622 return 0;
1623}