Loading...
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define UBD_SHIFT 4
21
22#include <linux/module.h>
23#include <linux/init.h>
24#include <linux/blkdev.h>
25#include <linux/ata.h>
26#include <linux/hdreg.h>
27#include <linux/cdrom.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h>
30#include <linux/ctype.h>
31#include <linux/slab.h>
32#include <linux/vmalloc.h>
33#include <linux/platform_device.h>
34#include <linux/scatterlist.h>
35#include <asm/tlbflush.h>
36#include "kern_util.h"
37#include "mconsole_kern.h"
38#include "init.h"
39#include "irq_kern.h"
40#include "ubd.h"
41#include "os.h"
42#include "cow.h"
43
44enum ubd_req { UBD_READ, UBD_WRITE };
45
46struct io_thread_req {
47 struct request *req;
48 enum ubd_req op;
49 int fds[2];
50 unsigned long offsets[2];
51 unsigned long long offset;
52 unsigned long length;
53 char *buffer;
54 int sectorsize;
55 unsigned long sector_mask;
56 unsigned long long cow_offset;
57 unsigned long bitmap_words[2];
58 int error;
59};
60
61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
62{
63 __u64 n;
64 int bits, off;
65
66 bits = sizeof(data[0]) * 8;
67 n = bit / bits;
68 off = bit % bits;
69 return (data[n] & (1 << off)) != 0;
70}
71
72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
73{
74 __u64 n;
75 int bits, off;
76
77 bits = sizeof(data[0]) * 8;
78 n = bit / bits;
79 off = bit % bits;
80 data[n] |= (1 << off);
81}
82/*End stuff from ubd_user.h*/
83
84#define DRIVER_NAME "uml-blkdev"
85
86static DEFINE_MUTEX(ubd_lock);
87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
88
89static int ubd_open(struct block_device *bdev, fmode_t mode);
90static int ubd_release(struct gendisk *disk, fmode_t mode);
91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
92 unsigned int cmd, unsigned long arg);
93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
94
95#define MAX_DEV (16)
96
97static const struct block_device_operations ubd_blops = {
98 .owner = THIS_MODULE,
99 .open = ubd_open,
100 .release = ubd_release,
101 .ioctl = ubd_ioctl,
102 .getgeo = ubd_getgeo,
103};
104
105/* Protected by ubd_lock */
106static int fake_major = UBD_MAJOR;
107static struct gendisk *ubd_gendisk[MAX_DEV];
108static struct gendisk *fake_gendisk[MAX_DEV];
109
110#ifdef CONFIG_BLK_DEV_UBD_SYNC
111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
112 .cl = 1 })
113#else
114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
115 .cl = 1 })
116#endif
117static struct openflags global_openflags = OPEN_FLAGS;
118
119struct cow {
120 /* backing file name */
121 char *file;
122 /* backing file fd */
123 int fd;
124 unsigned long *bitmap;
125 unsigned long bitmap_len;
126 int bitmap_offset;
127 int data_offset;
128};
129
130#define MAX_SG 64
131
132struct ubd {
133 struct list_head restart;
134 /* name (and fd, below) of the file opened for writing, either the
135 * backing or the cow file. */
136 char *file;
137 int count;
138 int fd;
139 __u64 size;
140 struct openflags boot_openflags;
141 struct openflags openflags;
142 unsigned shared:1;
143 unsigned no_cow:1;
144 struct cow cow;
145 struct platform_device pdev;
146 struct request_queue *queue;
147 spinlock_t lock;
148 struct scatterlist sg[MAX_SG];
149 struct request *request;
150 int start_sg, end_sg;
151 sector_t rq_pos;
152};
153
154#define DEFAULT_COW { \
155 .file = NULL, \
156 .fd = -1, \
157 .bitmap = NULL, \
158 .bitmap_offset = 0, \
159 .data_offset = 0, \
160}
161
162#define DEFAULT_UBD { \
163 .file = NULL, \
164 .count = 0, \
165 .fd = -1, \
166 .size = -1, \
167 .boot_openflags = OPEN_FLAGS, \
168 .openflags = OPEN_FLAGS, \
169 .no_cow = 0, \
170 .shared = 0, \
171 .cow = DEFAULT_COW, \
172 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
173 .request = NULL, \
174 .start_sg = 0, \
175 .end_sg = 0, \
176 .rq_pos = 0, \
177}
178
179/* Protected by ubd_lock */
180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
181
182/* Only changed by fake_ide_setup which is a setup */
183static int fake_ide = 0;
184static struct proc_dir_entry *proc_ide_root = NULL;
185static struct proc_dir_entry *proc_ide = NULL;
186
187static void make_proc_ide(void)
188{
189 proc_ide_root = proc_mkdir("ide", NULL);
190 proc_ide = proc_mkdir("ide0", proc_ide_root);
191}
192
193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
194{
195 seq_puts(m, "disk\n");
196 return 0;
197}
198
199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
200{
201 return single_open(file, fake_ide_media_proc_show, NULL);
202}
203
204static const struct file_operations fake_ide_media_proc_fops = {
205 .owner = THIS_MODULE,
206 .open = fake_ide_media_proc_open,
207 .read = seq_read,
208 .llseek = seq_lseek,
209 .release = single_release,
210};
211
212static void make_ide_entries(const char *dev_name)
213{
214 struct proc_dir_entry *dir, *ent;
215 char name[64];
216
217 if(proc_ide_root == NULL) make_proc_ide();
218
219 dir = proc_mkdir(dev_name, proc_ide);
220 if(!dir) return;
221
222 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
223 if(!ent) return;
224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
231 return 1;
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
249 return -1;
250 *ptr = end;
251 }
252 else if (('a' <= *str) && (*str <= 'z')) {
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
257 return n;
258}
259
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
264static int ubd_setup_common(char *str, int *index_out, char **error_out)
265{
266 struct ubd *ubd_dev;
267 struct openflags flags = global_openflags;
268 char *backing_file;
269 int n, err = 0, i;
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
280 goto out1;
281 }
282
283 err = -EINVAL;
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
286 *error_out = "Didn't parse major number";
287 goto out1;
288 }
289
290 mutex_lock(&ubd_lock);
291 if (fake_major != UBD_MAJOR) {
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
295
296 fake_major = major;
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
310 }
311 if(n >= MAX_DEV){
312 *error_out = "Device number out of range";
313 return 1;
314 }
315
316 err = -EBUSY;
317 mutex_lock(&ubd_lock);
318
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
321 *error_out = "Device is already configured";
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
328 err = -EINVAL;
329 for (i = 0; i < sizeof("rscd="); i++) {
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
338 ubd_dev->no_cow = 1;
339 break;
340 case 'c':
341 ubd_dev->shared = 1;
342 break;
343 case '=':
344 str++;
345 goto break_loop;
346 default:
347 *error_out = "Expected '=' or flag letter "
348 "(r, s, c, or d)";
349 goto out;
350 }
351 str++;
352 }
353
354 if (*str == '=')
355 *error_out = "Too many flags specified";
356 else
357 *error_out = "Missing '='";
358 goto out;
359
360break_loop:
361 backing_file = strchr(str, ',');
362
363 if (backing_file == NULL)
364 backing_file = strchr(str, ':');
365
366 if(backing_file != NULL){
367 if(ubd_dev->no_cow){
368 *error_out = "Can't specify both 'd' and a cow file";
369 goto out;
370 }
371 else {
372 *backing_file = '\0';
373 backing_file++;
374 }
375 }
376 err = 0;
377 ubd_dev->file = str;
378 ubd_dev->cow.file = backing_file;
379 ubd_dev->boot_openflags = flags;
380out:
381 mutex_unlock(&ubd_lock);
382 return err;
383}
384
385static int ubd_setup(char *str)
386{
387 char *error;
388 int err;
389
390 err = ubd_setup_common(str, NULL, &error);
391 if(err)
392 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393 "%s\n", str, error);
394 return 1;
395}
396
397__setup("ubd", ubd_setup);
398__uml_help(ubd_setup,
399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400" This is used to associate a device with a file in the underlying\n"
401" filesystem. When specifying two filenames, the first one is the\n"
402" COW name and the second is the backing file name. As separator you can\n"
403" use either a ':' or a ',': the first one allows writing things like;\n"
404" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405" while with a ',' the shell would not expand the 2nd '~'.\n"
406" When using only one filename, UML will detect whether to treat it like\n"
407" a COW file or a backing file. To override this detection, add the 'd'\n"
408" flag:\n"
409" ubd0d=BackingFile\n"
410" Usually, there is a filesystem in the file, but \n"
411" that's not required. Swap devices containing swap files can be\n"
412" specified like this. Also, a file which doesn't contain a\n"
413" filesystem can have its contents read in the virtual \n"
414" machine by running 'dd' on the device. <n> must be in the range\n"
415" 0 to 7. Appending an 'r' to the number will cause that device\n"
416" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
417" an 's' will cause data to be written to disk on the host immediately.\n"
418" 'c' will cause the device to be treated as being shared between multiple\n"
419" UMLs and file locking will be turned off - this is appropriate for a\n"
420" cluster filesystem and inappropriate at almost all other times.\n\n"
421);
422
423static int udb_setup(char *str)
424{
425 printk("udb%s specified on command line is almost certainly a ubd -> "
426 "udb TYPO\n", str);
427 return 1;
428}
429
430__setup("udb", udb_setup);
431__uml_help(udb_setup,
432"udb\n"
433" This option is here solely to catch ubd -> udb typos, which can be\n"
434" to impossible to catch visually unless you specifically look for\n"
435" them. The only result of any option starting with 'udb' is an error\n"
436" in the boot output.\n\n"
437);
438
439static void do_ubd_request(struct request_queue * q);
440
441/* Only changed by ubd_init, which is an initcall. */
442static int thread_fd = -1;
443static LIST_HEAD(restart);
444
445/* XXX - move this inside ubd_intr. */
446/* Called without dev->lock held, and only in interrupt context. */
447static void ubd_handler(void)
448{
449 struct io_thread_req *req;
450 struct ubd *ubd;
451 struct list_head *list, *next_ele;
452 unsigned long flags;
453 int n;
454
455 while(1){
456 n = os_read_file(thread_fd, &req,
457 sizeof(struct io_thread_req *));
458 if(n != sizeof(req)){
459 if(n == -EAGAIN)
460 break;
461 printk(KERN_ERR "spurious interrupt in ubd_handler, "
462 "err = %d\n", -n);
463 return;
464 }
465
466 blk_end_request(req->req, 0, req->length);
467 kfree(req);
468 }
469 reactivate_fd(thread_fd, UBD_IRQ);
470
471 list_for_each_safe(list, next_ele, &restart){
472 ubd = container_of(list, struct ubd, restart);
473 list_del_init(&ubd->restart);
474 spin_lock_irqsave(&ubd->lock, flags);
475 do_ubd_request(ubd->queue);
476 spin_unlock_irqrestore(&ubd->lock, flags);
477 }
478}
479
480static irqreturn_t ubd_intr(int irq, void *dev)
481{
482 ubd_handler();
483 return IRQ_HANDLED;
484}
485
486/* Only changed by ubd_init, which is an initcall. */
487static int io_pid = -1;
488
489static void kill_io_thread(void)
490{
491 if(io_pid != -1)
492 os_kill_process(io_pid, 1);
493}
494
495__uml_exitcall(kill_io_thread);
496
497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
498{
499 char *file;
500 int fd;
501 int err;
502
503 __u32 version;
504 __u32 align;
505 char *backing_file;
506 time_t mtime;
507 unsigned long long size;
508 int sector_size;
509 int bitmap_offset;
510
511 if (ubd_dev->file && ubd_dev->cow.file) {
512 file = ubd_dev->cow.file;
513
514 goto out;
515 }
516
517 fd = os_open_file(ubd_dev->file, global_openflags, 0);
518 if (fd < 0)
519 return fd;
520
521 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
522 &mtime, &size, §or_size, &align, &bitmap_offset);
523 os_close_file(fd);
524
525 if(err == -EINVAL)
526 file = ubd_dev->file;
527 else
528 file = backing_file;
529
530out:
531 return os_file_size(file, size_out);
532}
533
534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
535{
536 int err;
537
538 err = os_seek_file(fd, offset);
539 if (err < 0)
540 return err;
541
542 err = os_read_file(fd, buf, len);
543 if (err < 0)
544 return err;
545
546 return 0;
547}
548
549static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
550{
551 unsigned long modtime;
552 unsigned long long actual;
553 int err;
554
555 err = os_file_modtime(file, &modtime);
556 if (err < 0) {
557 printk(KERN_ERR "Failed to get modification time of backing "
558 "file \"%s\", err = %d\n", file, -err);
559 return err;
560 }
561
562 err = os_file_size(file, &actual);
563 if (err < 0) {
564 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
565 "err = %d\n", file, -err);
566 return err;
567 }
568
569 if (actual != size) {
570 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
571 * the typecast.*/
572 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
573 "vs backing file\n", (unsigned long long) size, actual);
574 return -EINVAL;
575 }
576 if (modtime != mtime) {
577 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
578 "backing file\n", mtime, modtime);
579 return -EINVAL;
580 }
581 return 0;
582}
583
584static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
585{
586 struct uml_stat buf1, buf2;
587 int err;
588
589 if (from_cmdline == NULL)
590 return 0;
591 if (!strcmp(from_cmdline, from_cow))
592 return 0;
593
594 err = os_stat_file(from_cmdline, &buf1);
595 if (err < 0) {
596 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
597 -err);
598 return 0;
599 }
600 err = os_stat_file(from_cow, &buf2);
601 if (err < 0) {
602 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
603 -err);
604 return 1;
605 }
606 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
607 return 0;
608
609 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
610 "\"%s\" specified in COW header of \"%s\"\n",
611 from_cmdline, from_cow, cow);
612 return 1;
613}
614
615static int open_ubd_file(char *file, struct openflags *openflags, int shared,
616 char **backing_file_out, int *bitmap_offset_out,
617 unsigned long *bitmap_len_out, int *data_offset_out,
618 int *create_cow_out)
619{
620 time_t mtime;
621 unsigned long long size;
622 __u32 version, align;
623 char *backing_file;
624 int fd, err, sectorsize, asked_switch, mode = 0644;
625
626 fd = os_open_file(file, *openflags, mode);
627 if (fd < 0) {
628 if ((fd == -ENOENT) && (create_cow_out != NULL))
629 *create_cow_out = 1;
630 if (!openflags->w ||
631 ((fd != -EROFS) && (fd != -EACCES)))
632 return fd;
633 openflags->w = 0;
634 fd = os_open_file(file, *openflags, mode);
635 if (fd < 0)
636 return fd;
637 }
638
639 if (shared)
640 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
641 else {
642 err = os_lock_file(fd, openflags->w);
643 if (err < 0) {
644 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
645 file, -err);
646 goto out_close;
647 }
648 }
649
650 /* Successful return case! */
651 if (backing_file_out == NULL)
652 return fd;
653
654 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
655 &size, §orsize, &align, bitmap_offset_out);
656 if (err && (*backing_file_out != NULL)) {
657 printk(KERN_ERR "Failed to read COW header from COW file "
658 "\"%s\", errno = %d\n", file, -err);
659 goto out_close;
660 }
661 if (err)
662 return fd;
663
664 asked_switch = path_requires_switch(*backing_file_out, backing_file,
665 file);
666
667 /* Allow switching only if no mismatch. */
668 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
669 mtime)) {
670 printk(KERN_ERR "Switching backing file to '%s'\n",
671 *backing_file_out);
672 err = write_cow_header(file, fd, *backing_file_out,
673 sectorsize, align, &size);
674 if (err) {
675 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
676 goto out_close;
677 }
678 } else {
679 *backing_file_out = backing_file;
680 err = backing_file_mismatch(*backing_file_out, size, mtime);
681 if (err)
682 goto out_close;
683 }
684
685 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
686 bitmap_len_out, data_offset_out);
687
688 return fd;
689 out_close:
690 os_close_file(fd);
691 return err;
692}
693
694static int create_cow_file(char *cow_file, char *backing_file,
695 struct openflags flags,
696 int sectorsize, int alignment, int *bitmap_offset_out,
697 unsigned long *bitmap_len_out, int *data_offset_out)
698{
699 int err, fd;
700
701 flags.c = 1;
702 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
703 if (fd < 0) {
704 err = fd;
705 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
706 cow_file, -err);
707 goto out;
708 }
709
710 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
711 bitmap_offset_out, bitmap_len_out,
712 data_offset_out);
713 if (!err)
714 return fd;
715 os_close_file(fd);
716 out:
717 return err;
718}
719
720static void ubd_close_dev(struct ubd *ubd_dev)
721{
722 os_close_file(ubd_dev->fd);
723 if(ubd_dev->cow.file == NULL)
724 return;
725
726 os_close_file(ubd_dev->cow.fd);
727 vfree(ubd_dev->cow.bitmap);
728 ubd_dev->cow.bitmap = NULL;
729}
730
731static int ubd_open_dev(struct ubd *ubd_dev)
732{
733 struct openflags flags;
734 char **back_ptr;
735 int err, create_cow, *create_ptr;
736 int fd;
737
738 ubd_dev->openflags = ubd_dev->boot_openflags;
739 create_cow = 0;
740 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
741 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
742
743 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
744 back_ptr, &ubd_dev->cow.bitmap_offset,
745 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
746 create_ptr);
747
748 if((fd == -ENOENT) && create_cow){
749 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
750 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
751 &ubd_dev->cow.bitmap_offset,
752 &ubd_dev->cow.bitmap_len,
753 &ubd_dev->cow.data_offset);
754 if(fd >= 0){
755 printk(KERN_INFO "Creating \"%s\" as COW file for "
756 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
757 }
758 }
759
760 if(fd < 0){
761 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
762 -fd);
763 return fd;
764 }
765 ubd_dev->fd = fd;
766
767 if(ubd_dev->cow.file != NULL){
768 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
769
770 err = -ENOMEM;
771 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
772 if(ubd_dev->cow.bitmap == NULL){
773 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
774 goto error;
775 }
776 flush_tlb_kernel_vm();
777
778 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
779 ubd_dev->cow.bitmap_offset,
780 ubd_dev->cow.bitmap_len);
781 if(err < 0)
782 goto error;
783
784 flags = ubd_dev->openflags;
785 flags.w = 0;
786 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
787 NULL, NULL, NULL, NULL);
788 if(err < 0) goto error;
789 ubd_dev->cow.fd = err;
790 }
791 return 0;
792 error:
793 os_close_file(ubd_dev->fd);
794 return err;
795}
796
797static void ubd_device_release(struct device *dev)
798{
799 struct ubd *ubd_dev = dev_get_drvdata(dev);
800
801 blk_cleanup_queue(ubd_dev->queue);
802 *ubd_dev = ((struct ubd) DEFAULT_UBD);
803}
804
805static int ubd_disk_register(int major, u64 size, int unit,
806 struct gendisk **disk_out)
807{
808 struct gendisk *disk;
809
810 disk = alloc_disk(1 << UBD_SHIFT);
811 if(disk == NULL)
812 return -ENOMEM;
813
814 disk->major = major;
815 disk->first_minor = unit << UBD_SHIFT;
816 disk->fops = &ubd_blops;
817 set_capacity(disk, size / 512);
818 if (major == UBD_MAJOR)
819 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
820 else
821 sprintf(disk->disk_name, "ubd_fake%d", unit);
822
823 /* sysfs register (not for ide fake devices) */
824 if (major == UBD_MAJOR) {
825 ubd_devs[unit].pdev.id = unit;
826 ubd_devs[unit].pdev.name = DRIVER_NAME;
827 ubd_devs[unit].pdev.dev.release = ubd_device_release;
828 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
829 platform_device_register(&ubd_devs[unit].pdev);
830 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
831 }
832
833 disk->private_data = &ubd_devs[unit];
834 disk->queue = ubd_devs[unit].queue;
835 add_disk(disk);
836
837 *disk_out = disk;
838 return 0;
839}
840
841#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
842
843static int ubd_add(int n, char **error_out)
844{
845 struct ubd *ubd_dev = &ubd_devs[n];
846 int err = 0;
847
848 if(ubd_dev->file == NULL)
849 goto out;
850
851 err = ubd_file_size(ubd_dev, &ubd_dev->size);
852 if(err < 0){
853 *error_out = "Couldn't determine size of device's file";
854 goto out;
855 }
856
857 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
858
859 INIT_LIST_HEAD(&ubd_dev->restart);
860 sg_init_table(ubd_dev->sg, MAX_SG);
861
862 err = -ENOMEM;
863 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
864 if (ubd_dev->queue == NULL) {
865 *error_out = "Failed to initialize device queue";
866 goto out;
867 }
868 ubd_dev->queue->queuedata = ubd_dev;
869
870 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
871 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
872 if(err){
873 *error_out = "Failed to register device";
874 goto out_cleanup;
875 }
876
877 if (fake_major != UBD_MAJOR)
878 ubd_disk_register(fake_major, ubd_dev->size, n,
879 &fake_gendisk[n]);
880
881 /*
882 * Perhaps this should also be under the "if (fake_major)" above
883 * using the fake_disk->disk_name
884 */
885 if (fake_ide)
886 make_ide_entries(ubd_gendisk[n]->disk_name);
887
888 err = 0;
889out:
890 return err;
891
892out_cleanup:
893 blk_cleanup_queue(ubd_dev->queue);
894 goto out;
895}
896
897static int ubd_config(char *str, char **error_out)
898{
899 int n, ret;
900
901 /* This string is possibly broken up and stored, so it's only
902 * freed if ubd_setup_common fails, or if only general options
903 * were set.
904 */
905 str = kstrdup(str, GFP_KERNEL);
906 if (str == NULL) {
907 *error_out = "Failed to allocate memory";
908 return -ENOMEM;
909 }
910
911 ret = ubd_setup_common(str, &n, error_out);
912 if (ret)
913 goto err_free;
914
915 if (n == -1) {
916 ret = 0;
917 goto err_free;
918 }
919
920 mutex_lock(&ubd_lock);
921 ret = ubd_add(n, error_out);
922 if (ret)
923 ubd_devs[n].file = NULL;
924 mutex_unlock(&ubd_lock);
925
926out:
927 return ret;
928
929err_free:
930 kfree(str);
931 goto out;
932}
933
934static int ubd_get_config(char *name, char *str, int size, char **error_out)
935{
936 struct ubd *ubd_dev;
937 int n, len = 0;
938
939 n = parse_unit(&name);
940 if((n >= MAX_DEV) || (n < 0)){
941 *error_out = "ubd_get_config : device number out of range";
942 return -1;
943 }
944
945 ubd_dev = &ubd_devs[n];
946 mutex_lock(&ubd_lock);
947
948 if(ubd_dev->file == NULL){
949 CONFIG_CHUNK(str, size, len, "", 1);
950 goto out;
951 }
952
953 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
954
955 if(ubd_dev->cow.file != NULL){
956 CONFIG_CHUNK(str, size, len, ",", 0);
957 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
958 }
959 else CONFIG_CHUNK(str, size, len, "", 1);
960
961 out:
962 mutex_unlock(&ubd_lock);
963 return len;
964}
965
966static int ubd_id(char **str, int *start_out, int *end_out)
967{
968 int n;
969
970 n = parse_unit(str);
971 *start_out = 0;
972 *end_out = MAX_DEV - 1;
973 return n;
974}
975
976static int ubd_remove(int n, char **error_out)
977{
978 struct gendisk *disk = ubd_gendisk[n];
979 struct ubd *ubd_dev;
980 int err = -ENODEV;
981
982 mutex_lock(&ubd_lock);
983
984 ubd_dev = &ubd_devs[n];
985
986 if(ubd_dev->file == NULL)
987 goto out;
988
989 /* you cannot remove a open disk */
990 err = -EBUSY;
991 if(ubd_dev->count > 0)
992 goto out;
993
994 ubd_gendisk[n] = NULL;
995 if(disk != NULL){
996 del_gendisk(disk);
997 put_disk(disk);
998 }
999
1000 if(fake_gendisk[n] != NULL){
1001 del_gendisk(fake_gendisk[n]);
1002 put_disk(fake_gendisk[n]);
1003 fake_gendisk[n] = NULL;
1004 }
1005
1006 err = 0;
1007 platform_device_unregister(&ubd_dev->pdev);
1008out:
1009 mutex_unlock(&ubd_lock);
1010 return err;
1011}
1012
1013/* All these are called by mconsole in process context and without
1014 * ubd-specific locks. The structure itself is const except for .list.
1015 */
1016static struct mc_device ubd_mc = {
1017 .list = LIST_HEAD_INIT(ubd_mc.list),
1018 .name = "ubd",
1019 .config = ubd_config,
1020 .get_config = ubd_get_config,
1021 .id = ubd_id,
1022 .remove = ubd_remove,
1023};
1024
1025static int __init ubd_mc_init(void)
1026{
1027 mconsole_register_dev(&ubd_mc);
1028 return 0;
1029}
1030
1031__initcall(ubd_mc_init);
1032
1033static int __init ubd0_init(void)
1034{
1035 struct ubd *ubd_dev = &ubd_devs[0];
1036
1037 mutex_lock(&ubd_lock);
1038 if(ubd_dev->file == NULL)
1039 ubd_dev->file = "root_fs";
1040 mutex_unlock(&ubd_lock);
1041
1042 return 0;
1043}
1044
1045__initcall(ubd0_init);
1046
1047/* Used in ubd_init, which is an initcall */
1048static struct platform_driver ubd_driver = {
1049 .driver = {
1050 .name = DRIVER_NAME,
1051 },
1052};
1053
1054static int __init ubd_init(void)
1055{
1056 char *error;
1057 int i, err;
1058
1059 if (register_blkdev(UBD_MAJOR, "ubd"))
1060 return -1;
1061
1062 if (fake_major != UBD_MAJOR) {
1063 char name[sizeof("ubd_nnn\0")];
1064
1065 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1066 if (register_blkdev(fake_major, "ubd"))
1067 return -1;
1068 }
1069 platform_driver_register(&ubd_driver);
1070 mutex_lock(&ubd_lock);
1071 for (i = 0; i < MAX_DEV; i++){
1072 err = ubd_add(i, &error);
1073 if(err)
1074 printk(KERN_ERR "Failed to initialize ubd device %d :"
1075 "%s\n", i, error);
1076 }
1077 mutex_unlock(&ubd_lock);
1078 return 0;
1079}
1080
1081late_initcall(ubd_init);
1082
1083static int __init ubd_driver_init(void){
1084 unsigned long stack;
1085 int err;
1086
1087 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1088 if(global_openflags.s){
1089 printk(KERN_INFO "ubd: Synchronous mode\n");
1090 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1091 * enough. So use anyway the io thread. */
1092 }
1093 stack = alloc_stack(0, 0);
1094 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1095 &thread_fd);
1096 if(io_pid < 0){
1097 printk(KERN_ERR
1098 "ubd : Failed to start I/O thread (errno = %d) - "
1099 "falling back to synchronous I/O\n", -io_pid);
1100 io_pid = -1;
1101 return 0;
1102 }
1103 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1104 0, "ubd", ubd_devs);
1105 if(err != 0)
1106 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1107 return 0;
1108}
1109
1110device_initcall(ubd_driver_init);
1111
1112static int ubd_open(struct block_device *bdev, fmode_t mode)
1113{
1114 struct gendisk *disk = bdev->bd_disk;
1115 struct ubd *ubd_dev = disk->private_data;
1116 int err = 0;
1117
1118 mutex_lock(&ubd_mutex);
1119 if(ubd_dev->count == 0){
1120 err = ubd_open_dev(ubd_dev);
1121 if(err){
1122 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1123 disk->disk_name, ubd_dev->file, -err);
1124 goto out;
1125 }
1126 }
1127 ubd_dev->count++;
1128 set_disk_ro(disk, !ubd_dev->openflags.w);
1129
1130 /* This should no more be needed. And it didn't work anyway to exclude
1131 * read-write remounting of filesystems.*/
1132 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1133 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1134 err = -EROFS;
1135 }*/
1136out:
1137 mutex_unlock(&ubd_mutex);
1138 return err;
1139}
1140
1141static int ubd_release(struct gendisk *disk, fmode_t mode)
1142{
1143 struct ubd *ubd_dev = disk->private_data;
1144
1145 mutex_lock(&ubd_mutex);
1146 if(--ubd_dev->count == 0)
1147 ubd_close_dev(ubd_dev);
1148 mutex_unlock(&ubd_mutex);
1149 return 0;
1150}
1151
1152static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1153 __u64 *cow_offset, unsigned long *bitmap,
1154 __u64 bitmap_offset, unsigned long *bitmap_words,
1155 __u64 bitmap_len)
1156{
1157 __u64 sector = io_offset >> 9;
1158 int i, update_bitmap = 0;
1159
1160 for(i = 0; i < length >> 9; i++){
1161 if(cow_mask != NULL)
1162 ubd_set_bit(i, (unsigned char *) cow_mask);
1163 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1164 continue;
1165
1166 update_bitmap = 1;
1167 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1168 }
1169
1170 if(!update_bitmap)
1171 return;
1172
1173 *cow_offset = sector / (sizeof(unsigned long) * 8);
1174
1175 /* This takes care of the case where we're exactly at the end of the
1176 * device, and *cow_offset + 1 is off the end. So, just back it up
1177 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1178 * for the original diagnosis.
1179 */
1180 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1181 sizeof(unsigned long)) - 1))
1182 (*cow_offset)--;
1183
1184 bitmap_words[0] = bitmap[*cow_offset];
1185 bitmap_words[1] = bitmap[*cow_offset + 1];
1186
1187 *cow_offset *= sizeof(unsigned long);
1188 *cow_offset += bitmap_offset;
1189}
1190
1191static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1192 __u64 bitmap_offset, __u64 bitmap_len)
1193{
1194 __u64 sector = req->offset >> 9;
1195 int i;
1196
1197 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1198 panic("Operation too long");
1199
1200 if(req->op == UBD_READ) {
1201 for(i = 0; i < req->length >> 9; i++){
1202 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1203 ubd_set_bit(i, (unsigned char *)
1204 &req->sector_mask);
1205 }
1206 }
1207 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1208 &req->cow_offset, bitmap, bitmap_offset,
1209 req->bitmap_words, bitmap_len);
1210}
1211
1212/* Called with dev->lock held */
1213static void prepare_request(struct request *req, struct io_thread_req *io_req,
1214 unsigned long long offset, int page_offset,
1215 int len, struct page *page)
1216{
1217 struct gendisk *disk = req->rq_disk;
1218 struct ubd *ubd_dev = disk->private_data;
1219
1220 io_req->req = req;
1221 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1222 ubd_dev->fd;
1223 io_req->fds[1] = ubd_dev->fd;
1224 io_req->cow_offset = -1;
1225 io_req->offset = offset;
1226 io_req->length = len;
1227 io_req->error = 0;
1228 io_req->sector_mask = 0;
1229
1230 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1231 io_req->offsets[0] = 0;
1232 io_req->offsets[1] = ubd_dev->cow.data_offset;
1233 io_req->buffer = page_address(page) + page_offset;
1234 io_req->sectorsize = 1 << 9;
1235
1236 if(ubd_dev->cow.file != NULL)
1237 cowify_req(io_req, ubd_dev->cow.bitmap,
1238 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1239
1240}
1241
1242/* Called with dev->lock held */
1243static void do_ubd_request(struct request_queue *q)
1244{
1245 struct io_thread_req *io_req;
1246 struct request *req;
1247 int n;
1248
1249 while(1){
1250 struct ubd *dev = q->queuedata;
1251 if(dev->end_sg == 0){
1252 struct request *req = blk_fetch_request(q);
1253 if(req == NULL)
1254 return;
1255
1256 dev->request = req;
1257 dev->rq_pos = blk_rq_pos(req);
1258 dev->start_sg = 0;
1259 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1260 }
1261
1262 req = dev->request;
1263 while(dev->start_sg < dev->end_sg){
1264 struct scatterlist *sg = &dev->sg[dev->start_sg];
1265
1266 io_req = kmalloc(sizeof(struct io_thread_req),
1267 GFP_ATOMIC);
1268 if(io_req == NULL){
1269 if(list_empty(&dev->restart))
1270 list_add(&dev->restart, &restart);
1271 return;
1272 }
1273 prepare_request(req, io_req,
1274 (unsigned long long)dev->rq_pos << 9,
1275 sg->offset, sg->length, sg_page(sg));
1276
1277 n = os_write_file(thread_fd, &io_req,
1278 sizeof(struct io_thread_req *));
1279 if(n != sizeof(struct io_thread_req *)){
1280 if(n != -EAGAIN)
1281 printk("write to io thread failed, "
1282 "errno = %d\n", -n);
1283 else if(list_empty(&dev->restart))
1284 list_add(&dev->restart, &restart);
1285 kfree(io_req);
1286 return;
1287 }
1288
1289 dev->rq_pos += sg->length >> 9;
1290 dev->start_sg++;
1291 }
1292 dev->end_sg = 0;
1293 dev->request = NULL;
1294 }
1295}
1296
1297static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1298{
1299 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1300
1301 geo->heads = 128;
1302 geo->sectors = 32;
1303 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1304 return 0;
1305}
1306
1307static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1308 unsigned int cmd, unsigned long arg)
1309{
1310 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1311 u16 ubd_id[ATA_ID_WORDS];
1312
1313 switch (cmd) {
1314 struct cdrom_volctrl volume;
1315 case HDIO_GET_IDENTITY:
1316 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1317 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1318 ubd_id[ATA_ID_HEADS] = 128;
1319 ubd_id[ATA_ID_SECTORS] = 32;
1320 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1321 sizeof(ubd_id)))
1322 return -EFAULT;
1323 return 0;
1324
1325 case CDROMVOLREAD:
1326 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1327 return -EFAULT;
1328 volume.channel0 = 255;
1329 volume.channel1 = 255;
1330 volume.channel2 = 255;
1331 volume.channel3 = 255;
1332 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1333 return -EFAULT;
1334 return 0;
1335 }
1336 return -EINVAL;
1337}
1338
1339static int update_bitmap(struct io_thread_req *req)
1340{
1341 int n;
1342
1343 if(req->cow_offset == -1)
1344 return 0;
1345
1346 n = os_seek_file(req->fds[1], req->cow_offset);
1347 if(n < 0){
1348 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1349 return 1;
1350 }
1351
1352 n = os_write_file(req->fds[1], &req->bitmap_words,
1353 sizeof(req->bitmap_words));
1354 if(n != sizeof(req->bitmap_words)){
1355 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1356 req->fds[1]);
1357 return 1;
1358 }
1359
1360 return 0;
1361}
1362
1363static void do_io(struct io_thread_req *req)
1364{
1365 char *buf;
1366 unsigned long len;
1367 int n, nsectors, start, end, bit;
1368 int err;
1369 __u64 off;
1370
1371 nsectors = req->length / req->sectorsize;
1372 start = 0;
1373 do {
1374 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1375 end = start;
1376 while((end < nsectors) &&
1377 (ubd_test_bit(end, (unsigned char *)
1378 &req->sector_mask) == bit))
1379 end++;
1380
1381 off = req->offset + req->offsets[bit] +
1382 start * req->sectorsize;
1383 len = (end - start) * req->sectorsize;
1384 buf = &req->buffer[start * req->sectorsize];
1385
1386 err = os_seek_file(req->fds[bit], off);
1387 if(err < 0){
1388 printk("do_io - lseek failed : err = %d\n", -err);
1389 req->error = 1;
1390 return;
1391 }
1392 if(req->op == UBD_READ){
1393 n = 0;
1394 do {
1395 buf = &buf[n];
1396 len -= n;
1397 n = os_read_file(req->fds[bit], buf, len);
1398 if (n < 0) {
1399 printk("do_io - read failed, err = %d "
1400 "fd = %d\n", -n, req->fds[bit]);
1401 req->error = 1;
1402 return;
1403 }
1404 } while((n < len) && (n != 0));
1405 if (n < len) memset(&buf[n], 0, len - n);
1406 } else {
1407 n = os_write_file(req->fds[bit], buf, len);
1408 if(n != len){
1409 printk("do_io - write failed err = %d "
1410 "fd = %d\n", -n, req->fds[bit]);
1411 req->error = 1;
1412 return;
1413 }
1414 }
1415
1416 start = end;
1417 } while(start < nsectors);
1418
1419 req->error = update_bitmap(req);
1420}
1421
1422/* Changed in start_io_thread, which is serialized by being called only
1423 * from ubd_init, which is an initcall.
1424 */
1425int kernel_fd = -1;
1426
1427/* Only changed by the io thread. XXX: currently unused. */
1428static int io_count = 0;
1429
1430int io_thread(void *arg)
1431{
1432 struct io_thread_req *req;
1433 int n;
1434
1435 ignore_sigwinch_sig();
1436 while(1){
1437 n = os_read_file(kernel_fd, &req,
1438 sizeof(struct io_thread_req *));
1439 if(n != sizeof(struct io_thread_req *)){
1440 if(n < 0)
1441 printk("io_thread - read failed, fd = %d, "
1442 "err = %d\n", kernel_fd, -n);
1443 else {
1444 printk("io_thread - short read, fd = %d, "
1445 "length = %d\n", kernel_fd, n);
1446 }
1447 continue;
1448 }
1449 io_count++;
1450 do_io(req);
1451 n = os_write_file(kernel_fd, &req,
1452 sizeof(struct io_thread_req *));
1453 if(n != sizeof(struct io_thread_req *))
1454 printk("io_thread - write failed, fd = %d, err = %d\n",
1455 kernel_fd, -n);
1456 }
1457
1458 return 0;
1459}
1/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define UBD_SHIFT 4
21
22#include "linux/kernel.h"
23#include "linux/module.h"
24#include "linux/blkdev.h"
25#include "linux/ata.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/seq_file.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/slab.h"
35#include "linux/vmalloc.h"
36#include "linux/mutex.h"
37#include "linux/blkpg.h"
38#include "linux/genhd.h"
39#include "linux/spinlock.h"
40#include "linux/platform_device.h"
41#include "linux/scatterlist.h"
42#include "asm/segment.h"
43#include "asm/uaccess.h"
44#include "asm/irq.h"
45#include "asm/types.h"
46#include "asm/tlbflush.h"
47#include "mem_user.h"
48#include "kern_util.h"
49#include "kern.h"
50#include "mconsole_kern.h"
51#include "init.h"
52#include "irq_user.h"
53#include "irq_kern.h"
54#include "ubd_user.h"
55#include "os.h"
56#include "mem.h"
57#include "mem_kern.h"
58#include "cow.h"
59
60enum ubd_req { UBD_READ, UBD_WRITE };
61
62struct io_thread_req {
63 struct request *req;
64 enum ubd_req op;
65 int fds[2];
66 unsigned long offsets[2];
67 unsigned long long offset;
68 unsigned long length;
69 char *buffer;
70 int sectorsize;
71 unsigned long sector_mask;
72 unsigned long long cow_offset;
73 unsigned long bitmap_words[2];
74 int error;
75};
76
77static inline int ubd_test_bit(__u64 bit, unsigned char *data)
78{
79 __u64 n;
80 int bits, off;
81
82 bits = sizeof(data[0]) * 8;
83 n = bit / bits;
84 off = bit % bits;
85 return (data[n] & (1 << off)) != 0;
86}
87
88static inline void ubd_set_bit(__u64 bit, unsigned char *data)
89{
90 __u64 n;
91 int bits, off;
92
93 bits = sizeof(data[0]) * 8;
94 n = bit / bits;
95 off = bit % bits;
96 data[n] |= (1 << off);
97}
98/*End stuff from ubd_user.h*/
99
100#define DRIVER_NAME "uml-blkdev"
101
102static DEFINE_MUTEX(ubd_lock);
103static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
104
105static int ubd_open(struct block_device *bdev, fmode_t mode);
106static int ubd_release(struct gendisk *disk, fmode_t mode);
107static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
108 unsigned int cmd, unsigned long arg);
109static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
110
111#define MAX_DEV (16)
112
113static const struct block_device_operations ubd_blops = {
114 .owner = THIS_MODULE,
115 .open = ubd_open,
116 .release = ubd_release,
117 .ioctl = ubd_ioctl,
118 .getgeo = ubd_getgeo,
119};
120
121/* Protected by ubd_lock */
122static int fake_major = UBD_MAJOR;
123static struct gendisk *ubd_gendisk[MAX_DEV];
124static struct gendisk *fake_gendisk[MAX_DEV];
125
126#ifdef CONFIG_BLK_DEV_UBD_SYNC
127#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
128 .cl = 1 })
129#else
130#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
131 .cl = 1 })
132#endif
133static struct openflags global_openflags = OPEN_FLAGS;
134
135struct cow {
136 /* backing file name */
137 char *file;
138 /* backing file fd */
139 int fd;
140 unsigned long *bitmap;
141 unsigned long bitmap_len;
142 int bitmap_offset;
143 int data_offset;
144};
145
146#define MAX_SG 64
147
148struct ubd {
149 struct list_head restart;
150 /* name (and fd, below) of the file opened for writing, either the
151 * backing or the cow file. */
152 char *file;
153 int count;
154 int fd;
155 __u64 size;
156 struct openflags boot_openflags;
157 struct openflags openflags;
158 unsigned shared:1;
159 unsigned no_cow:1;
160 struct cow cow;
161 struct platform_device pdev;
162 struct request_queue *queue;
163 spinlock_t lock;
164 struct scatterlist sg[MAX_SG];
165 struct request *request;
166 int start_sg, end_sg;
167 sector_t rq_pos;
168};
169
170#define DEFAULT_COW { \
171 .file = NULL, \
172 .fd = -1, \
173 .bitmap = NULL, \
174 .bitmap_offset = 0, \
175 .data_offset = 0, \
176}
177
178#define DEFAULT_UBD { \
179 .file = NULL, \
180 .count = 0, \
181 .fd = -1, \
182 .size = -1, \
183 .boot_openflags = OPEN_FLAGS, \
184 .openflags = OPEN_FLAGS, \
185 .no_cow = 0, \
186 .shared = 0, \
187 .cow = DEFAULT_COW, \
188 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
189 .request = NULL, \
190 .start_sg = 0, \
191 .end_sg = 0, \
192 .rq_pos = 0, \
193}
194
195/* Protected by ubd_lock */
196static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
197
198/* Only changed by fake_ide_setup which is a setup */
199static int fake_ide = 0;
200static struct proc_dir_entry *proc_ide_root = NULL;
201static struct proc_dir_entry *proc_ide = NULL;
202
203static void make_proc_ide(void)
204{
205 proc_ide_root = proc_mkdir("ide", NULL);
206 proc_ide = proc_mkdir("ide0", proc_ide_root);
207}
208
209static int fake_ide_media_proc_show(struct seq_file *m, void *v)
210{
211 seq_puts(m, "disk\n");
212 return 0;
213}
214
215static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
216{
217 return single_open(file, fake_ide_media_proc_show, NULL);
218}
219
220static const struct file_operations fake_ide_media_proc_fops = {
221 .owner = THIS_MODULE,
222 .open = fake_ide_media_proc_open,
223 .read = seq_read,
224 .llseek = seq_lseek,
225 .release = single_release,
226};
227
228static void make_ide_entries(const char *dev_name)
229{
230 struct proc_dir_entry *dir, *ent;
231 char name[64];
232
233 if(proc_ide_root == NULL) make_proc_ide();
234
235 dir = proc_mkdir(dev_name, proc_ide);
236 if(!dir) return;
237
238 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
239 if(!ent) return;
240 snprintf(name, sizeof(name), "ide0/%s", dev_name);
241 proc_symlink(dev_name, proc_ide_root, name);
242}
243
244static int fake_ide_setup(char *str)
245{
246 fake_ide = 1;
247 return 1;
248}
249
250__setup("fake_ide", fake_ide_setup);
251
252__uml_help(fake_ide_setup,
253"fake_ide\n"
254" Create ide0 entries that map onto ubd devices.\n\n"
255);
256
257static int parse_unit(char **ptr)
258{
259 char *str = *ptr, *end;
260 int n = -1;
261
262 if(isdigit(*str)) {
263 n = simple_strtoul(str, &end, 0);
264 if(end == str)
265 return -1;
266 *ptr = end;
267 }
268 else if (('a' <= *str) && (*str <= 'z')) {
269 n = *str - 'a';
270 str++;
271 *ptr = str;
272 }
273 return n;
274}
275
276/* If *index_out == -1 at exit, the passed option was a general one;
277 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
278 * should not be freed on exit.
279 */
280static int ubd_setup_common(char *str, int *index_out, char **error_out)
281{
282 struct ubd *ubd_dev;
283 struct openflags flags = global_openflags;
284 char *backing_file;
285 int n, err = 0, i;
286
287 if(index_out) *index_out = -1;
288 n = *str;
289 if(n == '='){
290 char *end;
291 int major;
292
293 str++;
294 if(!strcmp(str, "sync")){
295 global_openflags = of_sync(global_openflags);
296 goto out1;
297 }
298
299 err = -EINVAL;
300 major = simple_strtoul(str, &end, 0);
301 if((*end != '\0') || (end == str)){
302 *error_out = "Didn't parse major number";
303 goto out1;
304 }
305
306 mutex_lock(&ubd_lock);
307 if (fake_major != UBD_MAJOR) {
308 *error_out = "Can't assign a fake major twice";
309 goto out1;
310 }
311
312 fake_major = major;
313
314 printk(KERN_INFO "Setting extra ubd major number to %d\n",
315 major);
316 err = 0;
317 out1:
318 mutex_unlock(&ubd_lock);
319 return err;
320 }
321
322 n = parse_unit(&str);
323 if(n < 0){
324 *error_out = "Couldn't parse device number";
325 return -EINVAL;
326 }
327 if(n >= MAX_DEV){
328 *error_out = "Device number out of range";
329 return 1;
330 }
331
332 err = -EBUSY;
333 mutex_lock(&ubd_lock);
334
335 ubd_dev = &ubd_devs[n];
336 if(ubd_dev->file != NULL){
337 *error_out = "Device is already configured";
338 goto out;
339 }
340
341 if (index_out)
342 *index_out = n;
343
344 err = -EINVAL;
345 for (i = 0; i < sizeof("rscd="); i++) {
346 switch (*str) {
347 case 'r':
348 flags.w = 0;
349 break;
350 case 's':
351 flags.s = 1;
352 break;
353 case 'd':
354 ubd_dev->no_cow = 1;
355 break;
356 case 'c':
357 ubd_dev->shared = 1;
358 break;
359 case '=':
360 str++;
361 goto break_loop;
362 default:
363 *error_out = "Expected '=' or flag letter "
364 "(r, s, c, or d)";
365 goto out;
366 }
367 str++;
368 }
369
370 if (*str == '=')
371 *error_out = "Too many flags specified";
372 else
373 *error_out = "Missing '='";
374 goto out;
375
376break_loop:
377 backing_file = strchr(str, ',');
378
379 if (backing_file == NULL)
380 backing_file = strchr(str, ':');
381
382 if(backing_file != NULL){
383 if(ubd_dev->no_cow){
384 *error_out = "Can't specify both 'd' and a cow file";
385 goto out;
386 }
387 else {
388 *backing_file = '\0';
389 backing_file++;
390 }
391 }
392 err = 0;
393 ubd_dev->file = str;
394 ubd_dev->cow.file = backing_file;
395 ubd_dev->boot_openflags = flags;
396out:
397 mutex_unlock(&ubd_lock);
398 return err;
399}
400
401static int ubd_setup(char *str)
402{
403 char *error;
404 int err;
405
406 err = ubd_setup_common(str, NULL, &error);
407 if(err)
408 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
409 "%s\n", str, error);
410 return 1;
411}
412
413__setup("ubd", ubd_setup);
414__uml_help(ubd_setup,
415"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
416" This is used to associate a device with a file in the underlying\n"
417" filesystem. When specifying two filenames, the first one is the\n"
418" COW name and the second is the backing file name. As separator you can\n"
419" use either a ':' or a ',': the first one allows writing things like;\n"
420" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
421" while with a ',' the shell would not expand the 2nd '~'.\n"
422" When using only one filename, UML will detect whether to treat it like\n"
423" a COW file or a backing file. To override this detection, add the 'd'\n"
424" flag:\n"
425" ubd0d=BackingFile\n"
426" Usually, there is a filesystem in the file, but \n"
427" that's not required. Swap devices containing swap files can be\n"
428" specified like this. Also, a file which doesn't contain a\n"
429" filesystem can have its contents read in the virtual \n"
430" machine by running 'dd' on the device. <n> must be in the range\n"
431" 0 to 7. Appending an 'r' to the number will cause that device\n"
432" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
433" an 's' will cause data to be written to disk on the host immediately.\n"
434" 'c' will cause the device to be treated as being shared between multiple\n"
435" UMLs and file locking will be turned off - this is appropriate for a\n"
436" cluster filesystem and inappropriate at almost all other times.\n\n"
437);
438
439static int udb_setup(char *str)
440{
441 printk("udb%s specified on command line is almost certainly a ubd -> "
442 "udb TYPO\n", str);
443 return 1;
444}
445
446__setup("udb", udb_setup);
447__uml_help(udb_setup,
448"udb\n"
449" This option is here solely to catch ubd -> udb typos, which can be\n"
450" to impossible to catch visually unless you specifically look for\n"
451" them. The only result of any option starting with 'udb' is an error\n"
452" in the boot output.\n\n"
453);
454
455static void do_ubd_request(struct request_queue * q);
456
457/* Only changed by ubd_init, which is an initcall. */
458static int thread_fd = -1;
459static LIST_HEAD(restart);
460
461/* XXX - move this inside ubd_intr. */
462/* Called without dev->lock held, and only in interrupt context. */
463static void ubd_handler(void)
464{
465 struct io_thread_req *req;
466 struct ubd *ubd;
467 struct list_head *list, *next_ele;
468 unsigned long flags;
469 int n;
470
471 while(1){
472 n = os_read_file(thread_fd, &req,
473 sizeof(struct io_thread_req *));
474 if(n != sizeof(req)){
475 if(n == -EAGAIN)
476 break;
477 printk(KERN_ERR "spurious interrupt in ubd_handler, "
478 "err = %d\n", -n);
479 return;
480 }
481
482 blk_end_request(req->req, 0, req->length);
483 kfree(req);
484 }
485 reactivate_fd(thread_fd, UBD_IRQ);
486
487 list_for_each_safe(list, next_ele, &restart){
488 ubd = container_of(list, struct ubd, restart);
489 list_del_init(&ubd->restart);
490 spin_lock_irqsave(&ubd->lock, flags);
491 do_ubd_request(ubd->queue);
492 spin_unlock_irqrestore(&ubd->lock, flags);
493 }
494}
495
496static irqreturn_t ubd_intr(int irq, void *dev)
497{
498 ubd_handler();
499 return IRQ_HANDLED;
500}
501
502/* Only changed by ubd_init, which is an initcall. */
503static int io_pid = -1;
504
505static void kill_io_thread(void)
506{
507 if(io_pid != -1)
508 os_kill_process(io_pid, 1);
509}
510
511__uml_exitcall(kill_io_thread);
512
513static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
514{
515 char *file;
516
517 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
518 return os_file_size(file, size_out);
519}
520
521static int read_cow_bitmap(int fd, void *buf, int offset, int len)
522{
523 int err;
524
525 err = os_seek_file(fd, offset);
526 if (err < 0)
527 return err;
528
529 err = os_read_file(fd, buf, len);
530 if (err < 0)
531 return err;
532
533 return 0;
534}
535
536static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
537{
538 unsigned long modtime;
539 unsigned long long actual;
540 int err;
541
542 err = os_file_modtime(file, &modtime);
543 if (err < 0) {
544 printk(KERN_ERR "Failed to get modification time of backing "
545 "file \"%s\", err = %d\n", file, -err);
546 return err;
547 }
548
549 err = os_file_size(file, &actual);
550 if (err < 0) {
551 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
552 "err = %d\n", file, -err);
553 return err;
554 }
555
556 if (actual != size) {
557 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
558 * the typecast.*/
559 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
560 "vs backing file\n", (unsigned long long) size, actual);
561 return -EINVAL;
562 }
563 if (modtime != mtime) {
564 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
565 "backing file\n", mtime, modtime);
566 return -EINVAL;
567 }
568 return 0;
569}
570
571static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
572{
573 struct uml_stat buf1, buf2;
574 int err;
575
576 if (from_cmdline == NULL)
577 return 0;
578 if (!strcmp(from_cmdline, from_cow))
579 return 0;
580
581 err = os_stat_file(from_cmdline, &buf1);
582 if (err < 0) {
583 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
584 -err);
585 return 0;
586 }
587 err = os_stat_file(from_cow, &buf2);
588 if (err < 0) {
589 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
590 -err);
591 return 1;
592 }
593 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
594 return 0;
595
596 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
597 "\"%s\" specified in COW header of \"%s\"\n",
598 from_cmdline, from_cow, cow);
599 return 1;
600}
601
602static int open_ubd_file(char *file, struct openflags *openflags, int shared,
603 char **backing_file_out, int *bitmap_offset_out,
604 unsigned long *bitmap_len_out, int *data_offset_out,
605 int *create_cow_out)
606{
607 time_t mtime;
608 unsigned long long size;
609 __u32 version, align;
610 char *backing_file;
611 int fd, err, sectorsize, asked_switch, mode = 0644;
612
613 fd = os_open_file(file, *openflags, mode);
614 if (fd < 0) {
615 if ((fd == -ENOENT) && (create_cow_out != NULL))
616 *create_cow_out = 1;
617 if (!openflags->w ||
618 ((fd != -EROFS) && (fd != -EACCES)))
619 return fd;
620 openflags->w = 0;
621 fd = os_open_file(file, *openflags, mode);
622 if (fd < 0)
623 return fd;
624 }
625
626 if (shared)
627 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
628 else {
629 err = os_lock_file(fd, openflags->w);
630 if (err < 0) {
631 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
632 file, -err);
633 goto out_close;
634 }
635 }
636
637 /* Successful return case! */
638 if (backing_file_out == NULL)
639 return fd;
640
641 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
642 &size, §orsize, &align, bitmap_offset_out);
643 if (err && (*backing_file_out != NULL)) {
644 printk(KERN_ERR "Failed to read COW header from COW file "
645 "\"%s\", errno = %d\n", file, -err);
646 goto out_close;
647 }
648 if (err)
649 return fd;
650
651 asked_switch = path_requires_switch(*backing_file_out, backing_file,
652 file);
653
654 /* Allow switching only if no mismatch. */
655 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
656 mtime)) {
657 printk(KERN_ERR "Switching backing file to '%s'\n",
658 *backing_file_out);
659 err = write_cow_header(file, fd, *backing_file_out,
660 sectorsize, align, &size);
661 if (err) {
662 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
663 goto out_close;
664 }
665 } else {
666 *backing_file_out = backing_file;
667 err = backing_file_mismatch(*backing_file_out, size, mtime);
668 if (err)
669 goto out_close;
670 }
671
672 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
673 bitmap_len_out, data_offset_out);
674
675 return fd;
676 out_close:
677 os_close_file(fd);
678 return err;
679}
680
681static int create_cow_file(char *cow_file, char *backing_file,
682 struct openflags flags,
683 int sectorsize, int alignment, int *bitmap_offset_out,
684 unsigned long *bitmap_len_out, int *data_offset_out)
685{
686 int err, fd;
687
688 flags.c = 1;
689 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
690 if (fd < 0) {
691 err = fd;
692 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
693 cow_file, -err);
694 goto out;
695 }
696
697 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
698 bitmap_offset_out, bitmap_len_out,
699 data_offset_out);
700 if (!err)
701 return fd;
702 os_close_file(fd);
703 out:
704 return err;
705}
706
707static void ubd_close_dev(struct ubd *ubd_dev)
708{
709 os_close_file(ubd_dev->fd);
710 if(ubd_dev->cow.file == NULL)
711 return;
712
713 os_close_file(ubd_dev->cow.fd);
714 vfree(ubd_dev->cow.bitmap);
715 ubd_dev->cow.bitmap = NULL;
716}
717
718static int ubd_open_dev(struct ubd *ubd_dev)
719{
720 struct openflags flags;
721 char **back_ptr;
722 int err, create_cow, *create_ptr;
723 int fd;
724
725 ubd_dev->openflags = ubd_dev->boot_openflags;
726 create_cow = 0;
727 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
728 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
729
730 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
731 back_ptr, &ubd_dev->cow.bitmap_offset,
732 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
733 create_ptr);
734
735 if((fd == -ENOENT) && create_cow){
736 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
737 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
738 &ubd_dev->cow.bitmap_offset,
739 &ubd_dev->cow.bitmap_len,
740 &ubd_dev->cow.data_offset);
741 if(fd >= 0){
742 printk(KERN_INFO "Creating \"%s\" as COW file for "
743 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
744 }
745 }
746
747 if(fd < 0){
748 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
749 -fd);
750 return fd;
751 }
752 ubd_dev->fd = fd;
753
754 if(ubd_dev->cow.file != NULL){
755 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
756
757 err = -ENOMEM;
758 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
759 if(ubd_dev->cow.bitmap == NULL){
760 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
761 goto error;
762 }
763 flush_tlb_kernel_vm();
764
765 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
766 ubd_dev->cow.bitmap_offset,
767 ubd_dev->cow.bitmap_len);
768 if(err < 0)
769 goto error;
770
771 flags = ubd_dev->openflags;
772 flags.w = 0;
773 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
774 NULL, NULL, NULL, NULL);
775 if(err < 0) goto error;
776 ubd_dev->cow.fd = err;
777 }
778 return 0;
779 error:
780 os_close_file(ubd_dev->fd);
781 return err;
782}
783
784static void ubd_device_release(struct device *dev)
785{
786 struct ubd *ubd_dev = dev_get_drvdata(dev);
787
788 blk_cleanup_queue(ubd_dev->queue);
789 *ubd_dev = ((struct ubd) DEFAULT_UBD);
790}
791
792static int ubd_disk_register(int major, u64 size, int unit,
793 struct gendisk **disk_out)
794{
795 struct gendisk *disk;
796
797 disk = alloc_disk(1 << UBD_SHIFT);
798 if(disk == NULL)
799 return -ENOMEM;
800
801 disk->major = major;
802 disk->first_minor = unit << UBD_SHIFT;
803 disk->fops = &ubd_blops;
804 set_capacity(disk, size / 512);
805 if (major == UBD_MAJOR)
806 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
807 else
808 sprintf(disk->disk_name, "ubd_fake%d", unit);
809
810 /* sysfs register (not for ide fake devices) */
811 if (major == UBD_MAJOR) {
812 ubd_devs[unit].pdev.id = unit;
813 ubd_devs[unit].pdev.name = DRIVER_NAME;
814 ubd_devs[unit].pdev.dev.release = ubd_device_release;
815 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
816 platform_device_register(&ubd_devs[unit].pdev);
817 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
818 }
819
820 disk->private_data = &ubd_devs[unit];
821 disk->queue = ubd_devs[unit].queue;
822 add_disk(disk);
823
824 *disk_out = disk;
825 return 0;
826}
827
828#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
829
830static int ubd_add(int n, char **error_out)
831{
832 struct ubd *ubd_dev = &ubd_devs[n];
833 int err = 0;
834
835 if(ubd_dev->file == NULL)
836 goto out;
837
838 err = ubd_file_size(ubd_dev, &ubd_dev->size);
839 if(err < 0){
840 *error_out = "Couldn't determine size of device's file";
841 goto out;
842 }
843
844 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
845
846 INIT_LIST_HEAD(&ubd_dev->restart);
847 sg_init_table(ubd_dev->sg, MAX_SG);
848
849 err = -ENOMEM;
850 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
851 if (ubd_dev->queue == NULL) {
852 *error_out = "Failed to initialize device queue";
853 goto out;
854 }
855 ubd_dev->queue->queuedata = ubd_dev;
856
857 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
858 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
859 if(err){
860 *error_out = "Failed to register device";
861 goto out_cleanup;
862 }
863
864 if (fake_major != UBD_MAJOR)
865 ubd_disk_register(fake_major, ubd_dev->size, n,
866 &fake_gendisk[n]);
867
868 /*
869 * Perhaps this should also be under the "if (fake_major)" above
870 * using the fake_disk->disk_name
871 */
872 if (fake_ide)
873 make_ide_entries(ubd_gendisk[n]->disk_name);
874
875 err = 0;
876out:
877 return err;
878
879out_cleanup:
880 blk_cleanup_queue(ubd_dev->queue);
881 goto out;
882}
883
884static int ubd_config(char *str, char **error_out)
885{
886 int n, ret;
887
888 /* This string is possibly broken up and stored, so it's only
889 * freed if ubd_setup_common fails, or if only general options
890 * were set.
891 */
892 str = kstrdup(str, GFP_KERNEL);
893 if (str == NULL) {
894 *error_out = "Failed to allocate memory";
895 return -ENOMEM;
896 }
897
898 ret = ubd_setup_common(str, &n, error_out);
899 if (ret)
900 goto err_free;
901
902 if (n == -1) {
903 ret = 0;
904 goto err_free;
905 }
906
907 mutex_lock(&ubd_lock);
908 ret = ubd_add(n, error_out);
909 if (ret)
910 ubd_devs[n].file = NULL;
911 mutex_unlock(&ubd_lock);
912
913out:
914 return ret;
915
916err_free:
917 kfree(str);
918 goto out;
919}
920
921static int ubd_get_config(char *name, char *str, int size, char **error_out)
922{
923 struct ubd *ubd_dev;
924 int n, len = 0;
925
926 n = parse_unit(&name);
927 if((n >= MAX_DEV) || (n < 0)){
928 *error_out = "ubd_get_config : device number out of range";
929 return -1;
930 }
931
932 ubd_dev = &ubd_devs[n];
933 mutex_lock(&ubd_lock);
934
935 if(ubd_dev->file == NULL){
936 CONFIG_CHUNK(str, size, len, "", 1);
937 goto out;
938 }
939
940 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
941
942 if(ubd_dev->cow.file != NULL){
943 CONFIG_CHUNK(str, size, len, ",", 0);
944 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
945 }
946 else CONFIG_CHUNK(str, size, len, "", 1);
947
948 out:
949 mutex_unlock(&ubd_lock);
950 return len;
951}
952
953static int ubd_id(char **str, int *start_out, int *end_out)
954{
955 int n;
956
957 n = parse_unit(str);
958 *start_out = 0;
959 *end_out = MAX_DEV - 1;
960 return n;
961}
962
963static int ubd_remove(int n, char **error_out)
964{
965 struct gendisk *disk = ubd_gendisk[n];
966 struct ubd *ubd_dev;
967 int err = -ENODEV;
968
969 mutex_lock(&ubd_lock);
970
971 ubd_dev = &ubd_devs[n];
972
973 if(ubd_dev->file == NULL)
974 goto out;
975
976 /* you cannot remove a open disk */
977 err = -EBUSY;
978 if(ubd_dev->count > 0)
979 goto out;
980
981 ubd_gendisk[n] = NULL;
982 if(disk != NULL){
983 del_gendisk(disk);
984 put_disk(disk);
985 }
986
987 if(fake_gendisk[n] != NULL){
988 del_gendisk(fake_gendisk[n]);
989 put_disk(fake_gendisk[n]);
990 fake_gendisk[n] = NULL;
991 }
992
993 err = 0;
994 platform_device_unregister(&ubd_dev->pdev);
995out:
996 mutex_unlock(&ubd_lock);
997 return err;
998}
999
1000/* All these are called by mconsole in process context and without
1001 * ubd-specific locks. The structure itself is const except for .list.
1002 */
1003static struct mc_device ubd_mc = {
1004 .list = LIST_HEAD_INIT(ubd_mc.list),
1005 .name = "ubd",
1006 .config = ubd_config,
1007 .get_config = ubd_get_config,
1008 .id = ubd_id,
1009 .remove = ubd_remove,
1010};
1011
1012static int __init ubd_mc_init(void)
1013{
1014 mconsole_register_dev(&ubd_mc);
1015 return 0;
1016}
1017
1018__initcall(ubd_mc_init);
1019
1020static int __init ubd0_init(void)
1021{
1022 struct ubd *ubd_dev = &ubd_devs[0];
1023
1024 mutex_lock(&ubd_lock);
1025 if(ubd_dev->file == NULL)
1026 ubd_dev->file = "root_fs";
1027 mutex_unlock(&ubd_lock);
1028
1029 return 0;
1030}
1031
1032__initcall(ubd0_init);
1033
1034/* Used in ubd_init, which is an initcall */
1035static struct platform_driver ubd_driver = {
1036 .driver = {
1037 .name = DRIVER_NAME,
1038 },
1039};
1040
1041static int __init ubd_init(void)
1042{
1043 char *error;
1044 int i, err;
1045
1046 if (register_blkdev(UBD_MAJOR, "ubd"))
1047 return -1;
1048
1049 if (fake_major != UBD_MAJOR) {
1050 char name[sizeof("ubd_nnn\0")];
1051
1052 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1053 if (register_blkdev(fake_major, "ubd"))
1054 return -1;
1055 }
1056 platform_driver_register(&ubd_driver);
1057 mutex_lock(&ubd_lock);
1058 for (i = 0; i < MAX_DEV; i++){
1059 err = ubd_add(i, &error);
1060 if(err)
1061 printk(KERN_ERR "Failed to initialize ubd device %d :"
1062 "%s\n", i, error);
1063 }
1064 mutex_unlock(&ubd_lock);
1065 return 0;
1066}
1067
1068late_initcall(ubd_init);
1069
1070static int __init ubd_driver_init(void){
1071 unsigned long stack;
1072 int err;
1073
1074 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1075 if(global_openflags.s){
1076 printk(KERN_INFO "ubd: Synchronous mode\n");
1077 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1078 * enough. So use anyway the io thread. */
1079 }
1080 stack = alloc_stack(0, 0);
1081 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1082 &thread_fd);
1083 if(io_pid < 0){
1084 printk(KERN_ERR
1085 "ubd : Failed to start I/O thread (errno = %d) - "
1086 "falling back to synchronous I/O\n", -io_pid);
1087 io_pid = -1;
1088 return 0;
1089 }
1090 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1091 IRQF_DISABLED, "ubd", ubd_devs);
1092 if(err != 0)
1093 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1094 return 0;
1095}
1096
1097device_initcall(ubd_driver_init);
1098
1099static int ubd_open(struct block_device *bdev, fmode_t mode)
1100{
1101 struct gendisk *disk = bdev->bd_disk;
1102 struct ubd *ubd_dev = disk->private_data;
1103 int err = 0;
1104
1105 mutex_lock(&ubd_mutex);
1106 if(ubd_dev->count == 0){
1107 err = ubd_open_dev(ubd_dev);
1108 if(err){
1109 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1110 disk->disk_name, ubd_dev->file, -err);
1111 goto out;
1112 }
1113 }
1114 ubd_dev->count++;
1115 set_disk_ro(disk, !ubd_dev->openflags.w);
1116
1117 /* This should no more be needed. And it didn't work anyway to exclude
1118 * read-write remounting of filesystems.*/
1119 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1120 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1121 err = -EROFS;
1122 }*/
1123out:
1124 mutex_unlock(&ubd_mutex);
1125 return err;
1126}
1127
1128static int ubd_release(struct gendisk *disk, fmode_t mode)
1129{
1130 struct ubd *ubd_dev = disk->private_data;
1131
1132 mutex_lock(&ubd_mutex);
1133 if(--ubd_dev->count == 0)
1134 ubd_close_dev(ubd_dev);
1135 mutex_unlock(&ubd_mutex);
1136 return 0;
1137}
1138
1139static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1140 __u64 *cow_offset, unsigned long *bitmap,
1141 __u64 bitmap_offset, unsigned long *bitmap_words,
1142 __u64 bitmap_len)
1143{
1144 __u64 sector = io_offset >> 9;
1145 int i, update_bitmap = 0;
1146
1147 for(i = 0; i < length >> 9; i++){
1148 if(cow_mask != NULL)
1149 ubd_set_bit(i, (unsigned char *) cow_mask);
1150 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1151 continue;
1152
1153 update_bitmap = 1;
1154 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1155 }
1156
1157 if(!update_bitmap)
1158 return;
1159
1160 *cow_offset = sector / (sizeof(unsigned long) * 8);
1161
1162 /* This takes care of the case where we're exactly at the end of the
1163 * device, and *cow_offset + 1 is off the end. So, just back it up
1164 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1165 * for the original diagnosis.
1166 */
1167 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1168 sizeof(unsigned long)) - 1))
1169 (*cow_offset)--;
1170
1171 bitmap_words[0] = bitmap[*cow_offset];
1172 bitmap_words[1] = bitmap[*cow_offset + 1];
1173
1174 *cow_offset *= sizeof(unsigned long);
1175 *cow_offset += bitmap_offset;
1176}
1177
1178static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1179 __u64 bitmap_offset, __u64 bitmap_len)
1180{
1181 __u64 sector = req->offset >> 9;
1182 int i;
1183
1184 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1185 panic("Operation too long");
1186
1187 if(req->op == UBD_READ) {
1188 for(i = 0; i < req->length >> 9; i++){
1189 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1190 ubd_set_bit(i, (unsigned char *)
1191 &req->sector_mask);
1192 }
1193 }
1194 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1195 &req->cow_offset, bitmap, bitmap_offset,
1196 req->bitmap_words, bitmap_len);
1197}
1198
1199/* Called with dev->lock held */
1200static void prepare_request(struct request *req, struct io_thread_req *io_req,
1201 unsigned long long offset, int page_offset,
1202 int len, struct page *page)
1203{
1204 struct gendisk *disk = req->rq_disk;
1205 struct ubd *ubd_dev = disk->private_data;
1206
1207 io_req->req = req;
1208 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1209 ubd_dev->fd;
1210 io_req->fds[1] = ubd_dev->fd;
1211 io_req->cow_offset = -1;
1212 io_req->offset = offset;
1213 io_req->length = len;
1214 io_req->error = 0;
1215 io_req->sector_mask = 0;
1216
1217 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1218 io_req->offsets[0] = 0;
1219 io_req->offsets[1] = ubd_dev->cow.data_offset;
1220 io_req->buffer = page_address(page) + page_offset;
1221 io_req->sectorsize = 1 << 9;
1222
1223 if(ubd_dev->cow.file != NULL)
1224 cowify_req(io_req, ubd_dev->cow.bitmap,
1225 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1226
1227}
1228
1229/* Called with dev->lock held */
1230static void do_ubd_request(struct request_queue *q)
1231{
1232 struct io_thread_req *io_req;
1233 struct request *req;
1234 int n;
1235
1236 while(1){
1237 struct ubd *dev = q->queuedata;
1238 if(dev->end_sg == 0){
1239 struct request *req = blk_fetch_request(q);
1240 if(req == NULL)
1241 return;
1242
1243 dev->request = req;
1244 dev->rq_pos = blk_rq_pos(req);
1245 dev->start_sg = 0;
1246 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1247 }
1248
1249 req = dev->request;
1250 while(dev->start_sg < dev->end_sg){
1251 struct scatterlist *sg = &dev->sg[dev->start_sg];
1252
1253 io_req = kmalloc(sizeof(struct io_thread_req),
1254 GFP_ATOMIC);
1255 if(io_req == NULL){
1256 if(list_empty(&dev->restart))
1257 list_add(&dev->restart, &restart);
1258 return;
1259 }
1260 prepare_request(req, io_req,
1261 (unsigned long long)dev->rq_pos << 9,
1262 sg->offset, sg->length, sg_page(sg));
1263
1264 n = os_write_file(thread_fd, &io_req,
1265 sizeof(struct io_thread_req *));
1266 if(n != sizeof(struct io_thread_req *)){
1267 if(n != -EAGAIN)
1268 printk("write to io thread failed, "
1269 "errno = %d\n", -n);
1270 else if(list_empty(&dev->restart))
1271 list_add(&dev->restart, &restart);
1272 kfree(io_req);
1273 return;
1274 }
1275
1276 dev->rq_pos += sg->length >> 9;
1277 dev->start_sg++;
1278 }
1279 dev->end_sg = 0;
1280 dev->request = NULL;
1281 }
1282}
1283
1284static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1285{
1286 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1287
1288 geo->heads = 128;
1289 geo->sectors = 32;
1290 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1291 return 0;
1292}
1293
1294static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1295 unsigned int cmd, unsigned long arg)
1296{
1297 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1298 u16 ubd_id[ATA_ID_WORDS];
1299
1300 switch (cmd) {
1301 struct cdrom_volctrl volume;
1302 case HDIO_GET_IDENTITY:
1303 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1304 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1305 ubd_id[ATA_ID_HEADS] = 128;
1306 ubd_id[ATA_ID_SECTORS] = 32;
1307 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1308 sizeof(ubd_id)))
1309 return -EFAULT;
1310 return 0;
1311
1312 case CDROMVOLREAD:
1313 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1314 return -EFAULT;
1315 volume.channel0 = 255;
1316 volume.channel1 = 255;
1317 volume.channel2 = 255;
1318 volume.channel3 = 255;
1319 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1320 return -EFAULT;
1321 return 0;
1322 }
1323 return -EINVAL;
1324}
1325
1326static int update_bitmap(struct io_thread_req *req)
1327{
1328 int n;
1329
1330 if(req->cow_offset == -1)
1331 return 0;
1332
1333 n = os_seek_file(req->fds[1], req->cow_offset);
1334 if(n < 0){
1335 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1336 return 1;
1337 }
1338
1339 n = os_write_file(req->fds[1], &req->bitmap_words,
1340 sizeof(req->bitmap_words));
1341 if(n != sizeof(req->bitmap_words)){
1342 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1343 req->fds[1]);
1344 return 1;
1345 }
1346
1347 return 0;
1348}
1349
1350static void do_io(struct io_thread_req *req)
1351{
1352 char *buf;
1353 unsigned long len;
1354 int n, nsectors, start, end, bit;
1355 int err;
1356 __u64 off;
1357
1358 nsectors = req->length / req->sectorsize;
1359 start = 0;
1360 do {
1361 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1362 end = start;
1363 while((end < nsectors) &&
1364 (ubd_test_bit(end, (unsigned char *)
1365 &req->sector_mask) == bit))
1366 end++;
1367
1368 off = req->offset + req->offsets[bit] +
1369 start * req->sectorsize;
1370 len = (end - start) * req->sectorsize;
1371 buf = &req->buffer[start * req->sectorsize];
1372
1373 err = os_seek_file(req->fds[bit], off);
1374 if(err < 0){
1375 printk("do_io - lseek failed : err = %d\n", -err);
1376 req->error = 1;
1377 return;
1378 }
1379 if(req->op == UBD_READ){
1380 n = 0;
1381 do {
1382 buf = &buf[n];
1383 len -= n;
1384 n = os_read_file(req->fds[bit], buf, len);
1385 if (n < 0) {
1386 printk("do_io - read failed, err = %d "
1387 "fd = %d\n", -n, req->fds[bit]);
1388 req->error = 1;
1389 return;
1390 }
1391 } while((n < len) && (n != 0));
1392 if (n < len) memset(&buf[n], 0, len - n);
1393 } else {
1394 n = os_write_file(req->fds[bit], buf, len);
1395 if(n != len){
1396 printk("do_io - write failed err = %d "
1397 "fd = %d\n", -n, req->fds[bit]);
1398 req->error = 1;
1399 return;
1400 }
1401 }
1402
1403 start = end;
1404 } while(start < nsectors);
1405
1406 req->error = update_bitmap(req);
1407}
1408
1409/* Changed in start_io_thread, which is serialized by being called only
1410 * from ubd_init, which is an initcall.
1411 */
1412int kernel_fd = -1;
1413
1414/* Only changed by the io thread. XXX: currently unused. */
1415static int io_count = 0;
1416
1417int io_thread(void *arg)
1418{
1419 struct io_thread_req *req;
1420 int n;
1421
1422 ignore_sigwinch_sig();
1423 while(1){
1424 n = os_read_file(kernel_fd, &req,
1425 sizeof(struct io_thread_req *));
1426 if(n != sizeof(struct io_thread_req *)){
1427 if(n < 0)
1428 printk("io_thread - read failed, fd = %d, "
1429 "err = %d\n", kernel_fd, -n);
1430 else {
1431 printk("io_thread - short read, fd = %d, "
1432 "length = %d\n", kernel_fd, n);
1433 }
1434 continue;
1435 }
1436 io_count++;
1437 do_io(req);
1438 n = os_write_file(kernel_fd, &req,
1439 sizeof(struct io_thread_req *));
1440 if(n != sizeof(struct io_thread_req *))
1441 printk("io_thread - write failed, fd = %d, err = %d\n",
1442 kernel_fd, -n);
1443 }
1444
1445 return 0;
1446}