Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Cambridge Greys Ltd
4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
27#include <linux/blk-mq.h>
28#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/major.h>
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
39#include <asm/tlbflush.h>
40#include <kern_util.h>
41#include "mconsole_kern.h"
42#include <init.h>
43#include <irq_kern.h>
44#include "ubd.h"
45#include <os.h>
46#include "cow.h"
47
48/* Max request size is determined by sector mask - 32K */
49#define UBD_MAX_REQUEST (8 * sizeof(long))
50
51struct io_desc {
52 char *buffer;
53 unsigned long length;
54 unsigned long sector_mask;
55 unsigned long long cow_offset;
56 unsigned long bitmap_words[2];
57};
58
59struct io_thread_req {
60 struct request *req;
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
64 int sectorsize;
65 int error;
66
67 int desc_cnt;
68 /* io_desc has to be the last element of the struct */
69 struct io_desc io_desc[];
70};
71
72
73static struct io_thread_req * (*irq_req_buffer)[];
74static struct io_thread_req *irq_remainder;
75static int irq_remainder_size;
76
77static struct io_thread_req * (*io_req_buffer)[];
78static struct io_thread_req *io_remainder;
79static int io_remainder_size;
80
81
82
83static inline int ubd_test_bit(__u64 bit, unsigned char *data)
84{
85 __u64 n;
86 int bits, off;
87
88 bits = sizeof(data[0]) * 8;
89 n = bit / bits;
90 off = bit % bits;
91 return (data[n] & (1 << off)) != 0;
92}
93
94static inline void ubd_set_bit(__u64 bit, unsigned char *data)
95{
96 __u64 n;
97 int bits, off;
98
99 bits = sizeof(data[0]) * 8;
100 n = bit / bits;
101 off = bit % bits;
102 data[n] |= (1 << off);
103}
104/*End stuff from ubd_user.h*/
105
106#define DRIVER_NAME "uml-blkdev"
107
108static DEFINE_MUTEX(ubd_lock);
109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
110
111static int ubd_open(struct block_device *bdev, fmode_t mode);
112static void ubd_release(struct gendisk *disk, fmode_t mode);
113static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
114 unsigned int cmd, unsigned long arg);
115static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
116
117#define MAX_DEV (16)
118
119static const struct block_device_operations ubd_blops = {
120 .owner = THIS_MODULE,
121 .open = ubd_open,
122 .release = ubd_release,
123 .ioctl = ubd_ioctl,
124 .compat_ioctl = blkdev_compat_ptr_ioctl,
125 .getgeo = ubd_getgeo,
126};
127
128/* Protected by ubd_lock */
129static struct gendisk *ubd_gendisk[MAX_DEV];
130
131#ifdef CONFIG_BLK_DEV_UBD_SYNC
132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
133 .cl = 1 })
134#else
135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
136 .cl = 1 })
137#endif
138static struct openflags global_openflags = OPEN_FLAGS;
139
140struct cow {
141 /* backing file name */
142 char *file;
143 /* backing file fd */
144 int fd;
145 unsigned long *bitmap;
146 unsigned long bitmap_len;
147 int bitmap_offset;
148 int data_offset;
149};
150
151#define MAX_SG 64
152
153struct ubd {
154 /* name (and fd, below) of the file opened for writing, either the
155 * backing or the cow file. */
156 char *file;
157 char *serial;
158 int count;
159 int fd;
160 __u64 size;
161 struct openflags boot_openflags;
162 struct openflags openflags;
163 unsigned shared:1;
164 unsigned no_cow:1;
165 unsigned no_trim:1;
166 struct cow cow;
167 struct platform_device pdev;
168 struct request_queue *queue;
169 struct blk_mq_tag_set tag_set;
170 spinlock_t lock;
171};
172
173#define DEFAULT_COW { \
174 .file = NULL, \
175 .fd = -1, \
176 .bitmap = NULL, \
177 .bitmap_offset = 0, \
178 .data_offset = 0, \
179}
180
181#define DEFAULT_UBD { \
182 .file = NULL, \
183 .serial = NULL, \
184 .count = 0, \
185 .fd = -1, \
186 .size = -1, \
187 .boot_openflags = OPEN_FLAGS, \
188 .openflags = OPEN_FLAGS, \
189 .no_cow = 0, \
190 .no_trim = 0, \
191 .shared = 0, \
192 .cow = DEFAULT_COW, \
193 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
194}
195
196/* Protected by ubd_lock */
197static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
198
199static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
200 const struct blk_mq_queue_data *bd);
201
202static int fake_ide_setup(char *str)
203{
204 pr_warn("The fake_ide option has been removed\n");
205 return 1;
206}
207__setup("fake_ide", fake_ide_setup);
208
209__uml_help(fake_ide_setup,
210"fake_ide\n"
211" Obsolete stub.\n\n"
212);
213
214static int parse_unit(char **ptr)
215{
216 char *str = *ptr, *end;
217 int n = -1;
218
219 if(isdigit(*str)) {
220 n = simple_strtoul(str, &end, 0);
221 if(end == str)
222 return -1;
223 *ptr = end;
224 }
225 else if (('a' <= *str) && (*str <= 'z')) {
226 n = *str - 'a';
227 str++;
228 *ptr = str;
229 }
230 return n;
231}
232
233/* If *index_out == -1 at exit, the passed option was a general one;
234 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
235 * should not be freed on exit.
236 */
237static int ubd_setup_common(char *str, int *index_out, char **error_out)
238{
239 struct ubd *ubd_dev;
240 struct openflags flags = global_openflags;
241 char *file, *backing_file, *serial;
242 int n, err = 0, i;
243
244 if(index_out) *index_out = -1;
245 n = *str;
246 if(n == '='){
247 str++;
248 if(!strcmp(str, "sync")){
249 global_openflags = of_sync(global_openflags);
250 return err;
251 }
252
253 pr_warn("fake major not supported any more\n");
254 return 0;
255 }
256
257 n = parse_unit(&str);
258 if(n < 0){
259 *error_out = "Couldn't parse device number";
260 return -EINVAL;
261 }
262 if(n >= MAX_DEV){
263 *error_out = "Device number out of range";
264 return 1;
265 }
266
267 err = -EBUSY;
268 mutex_lock(&ubd_lock);
269
270 ubd_dev = &ubd_devs[n];
271 if(ubd_dev->file != NULL){
272 *error_out = "Device is already configured";
273 goto out;
274 }
275
276 if (index_out)
277 *index_out = n;
278
279 err = -EINVAL;
280 for (i = 0; i < sizeof("rscdt="); i++) {
281 switch (*str) {
282 case 'r':
283 flags.w = 0;
284 break;
285 case 's':
286 flags.s = 1;
287 break;
288 case 'd':
289 ubd_dev->no_cow = 1;
290 break;
291 case 'c':
292 ubd_dev->shared = 1;
293 break;
294 case 't':
295 ubd_dev->no_trim = 1;
296 break;
297 case '=':
298 str++;
299 goto break_loop;
300 default:
301 *error_out = "Expected '=' or flag letter "
302 "(r, s, c, t or d)";
303 goto out;
304 }
305 str++;
306 }
307
308 if (*str == '=')
309 *error_out = "Too many flags specified";
310 else
311 *error_out = "Missing '='";
312 goto out;
313
314break_loop:
315 file = strsep(&str, ",:");
316 if (*file == '\0')
317 file = NULL;
318
319 backing_file = strsep(&str, ",:");
320 if (backing_file && *backing_file == '\0')
321 backing_file = NULL;
322
323 serial = strsep(&str, ",:");
324 if (serial && *serial == '\0')
325 serial = NULL;
326
327 if (backing_file && ubd_dev->no_cow) {
328 *error_out = "Can't specify both 'd' and a cow file";
329 goto out;
330 }
331
332 err = 0;
333 ubd_dev->file = file;
334 ubd_dev->cow.file = backing_file;
335 ubd_dev->serial = serial;
336 ubd_dev->boot_openflags = flags;
337out:
338 mutex_unlock(&ubd_lock);
339 return err;
340}
341
342static int ubd_setup(char *str)
343{
344 char *error;
345 int err;
346
347 err = ubd_setup_common(str, NULL, &error);
348 if(err)
349 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
350 "%s\n", str, error);
351 return 1;
352}
353
354__setup("ubd", ubd_setup);
355__uml_help(ubd_setup,
356"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
357" This is used to associate a device with a file in the underlying\n"
358" filesystem. When specifying two filenames, the first one is the\n"
359" COW name and the second is the backing file name. As separator you can\n"
360" use either a ':' or a ',': the first one allows writing things like;\n"
361" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
362" while with a ',' the shell would not expand the 2nd '~'.\n"
363" When using only one filename, UML will detect whether to treat it like\n"
364" a COW file or a backing file. To override this detection, add the 'd'\n"
365" flag:\n"
366" ubd0d=BackingFile\n"
367" Usually, there is a filesystem in the file, but \n"
368" that's not required. Swap devices containing swap files can be\n"
369" specified like this. Also, a file which doesn't contain a\n"
370" filesystem can have its contents read in the virtual \n"
371" machine by running 'dd' on the device. <n> must be in the range\n"
372" 0 to 7. Appending an 'r' to the number will cause that device\n"
373" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
374" an 's' will cause data to be written to disk on the host immediately.\n"
375" 'c' will cause the device to be treated as being shared between multiple\n"
376" UMLs and file locking will be turned off - this is appropriate for a\n"
377" cluster filesystem and inappropriate at almost all other times.\n\n"
378" 't' will disable trim/discard support on the device (enabled by default).\n\n"
379" An optional device serial number can be exposed using the serial parameter\n"
380" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
381" useful when a unique number should be given to the device. Note when\n"
382" specifying a label, the filename2 must be also presented. It can be\n"
383" an empty string, in which case the backing file is not used:\n"
384" ubd0=File,,Serial\n"
385);
386
387static int udb_setup(char *str)
388{
389 printk("udb%s specified on command line is almost certainly a ubd -> "
390 "udb TYPO\n", str);
391 return 1;
392}
393
394__setup("udb", udb_setup);
395__uml_help(udb_setup,
396"udb\n"
397" This option is here solely to catch ubd -> udb typos, which can be\n"
398" to impossible to catch visually unless you specifically look for\n"
399" them. The only result of any option starting with 'udb' is an error\n"
400" in the boot output.\n\n"
401);
402
403/* Only changed by ubd_init, which is an initcall. */
404static int thread_fd = -1;
405
406/* Function to read several request pointers at a time
407* handling fractional reads if (and as) needed
408*/
409
410static int bulk_req_safe_read(
411 int fd,
412 struct io_thread_req * (*request_buffer)[],
413 struct io_thread_req **remainder,
414 int *remainder_size,
415 int max_recs
416 )
417{
418 int n = 0;
419 int res = 0;
420
421 if (*remainder_size > 0) {
422 memmove(
423 (char *) request_buffer,
424 (char *) remainder, *remainder_size
425 );
426 n = *remainder_size;
427 }
428
429 res = os_read_file(
430 fd,
431 ((char *) request_buffer) + *remainder_size,
432 sizeof(struct io_thread_req *)*max_recs
433 - *remainder_size
434 );
435 if (res > 0) {
436 n += res;
437 if ((n % sizeof(struct io_thread_req *)) > 0) {
438 /*
439 * Read somehow returned not a multiple of dword
440 * theoretically possible, but never observed in the
441 * wild, so read routine must be able to handle it
442 */
443 *remainder_size = n % sizeof(struct io_thread_req *);
444 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
445 memmove(
446 remainder,
447 ((char *) request_buffer) +
448 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
449 *remainder_size
450 );
451 n = n - *remainder_size;
452 }
453 } else {
454 n = res;
455 }
456 return n;
457}
458
459/* Called without dev->lock held, and only in interrupt context. */
460static void ubd_handler(void)
461{
462 int n;
463 int count;
464
465 while(1){
466 n = bulk_req_safe_read(
467 thread_fd,
468 irq_req_buffer,
469 &irq_remainder,
470 &irq_remainder_size,
471 UBD_REQ_BUFFER_SIZE
472 );
473 if (n < 0) {
474 if(n == -EAGAIN)
475 break;
476 printk(KERN_ERR "spurious interrupt in ubd_handler, "
477 "err = %d\n", -n);
478 return;
479 }
480 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
481 struct io_thread_req *io_req = (*irq_req_buffer)[count];
482
483 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
484 blk_queue_max_discard_sectors(io_req->req->q, 0);
485 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
486 }
487 blk_mq_end_request(io_req->req, io_req->error);
488 kfree(io_req);
489 }
490 }
491}
492
493static irqreturn_t ubd_intr(int irq, void *dev)
494{
495 ubd_handler();
496 return IRQ_HANDLED;
497}
498
499/* Only changed by ubd_init, which is an initcall. */
500static int io_pid = -1;
501
502static void kill_io_thread(void)
503{
504 if(io_pid != -1)
505 os_kill_process(io_pid, 1);
506}
507
508__uml_exitcall(kill_io_thread);
509
510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
511{
512 char *file;
513 int fd;
514 int err;
515
516 __u32 version;
517 __u32 align;
518 char *backing_file;
519 time64_t mtime;
520 unsigned long long size;
521 int sector_size;
522 int bitmap_offset;
523
524 if (ubd_dev->file && ubd_dev->cow.file) {
525 file = ubd_dev->cow.file;
526
527 goto out;
528 }
529
530 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
531 if (fd < 0)
532 return fd;
533
534 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
535 &mtime, &size, §or_size, &align, &bitmap_offset);
536 os_close_file(fd);
537
538 if(err == -EINVAL)
539 file = ubd_dev->file;
540 else
541 file = backing_file;
542
543out:
544 return os_file_size(file, size_out);
545}
546
547static int read_cow_bitmap(int fd, void *buf, int offset, int len)
548{
549 int err;
550
551 err = os_pread_file(fd, buf, len, offset);
552 if (err < 0)
553 return err;
554
555 return 0;
556}
557
558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
559{
560 time64_t modtime;
561 unsigned long long actual;
562 int err;
563
564 err = os_file_modtime(file, &modtime);
565 if (err < 0) {
566 printk(KERN_ERR "Failed to get modification time of backing "
567 "file \"%s\", err = %d\n", file, -err);
568 return err;
569 }
570
571 err = os_file_size(file, &actual);
572 if (err < 0) {
573 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
574 "err = %d\n", file, -err);
575 return err;
576 }
577
578 if (actual != size) {
579 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
580 * the typecast.*/
581 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
582 "vs backing file\n", (unsigned long long) size, actual);
583 return -EINVAL;
584 }
585 if (modtime != mtime) {
586 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
587 "backing file\n", mtime, modtime);
588 return -EINVAL;
589 }
590 return 0;
591}
592
593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
594{
595 struct uml_stat buf1, buf2;
596 int err;
597
598 if (from_cmdline == NULL)
599 return 0;
600 if (!strcmp(from_cmdline, from_cow))
601 return 0;
602
603 err = os_stat_file(from_cmdline, &buf1);
604 if (err < 0) {
605 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
606 -err);
607 return 0;
608 }
609 err = os_stat_file(from_cow, &buf2);
610 if (err < 0) {
611 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
612 -err);
613 return 1;
614 }
615 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
616 return 0;
617
618 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
619 "\"%s\" specified in COW header of \"%s\"\n",
620 from_cmdline, from_cow, cow);
621 return 1;
622}
623
624static int open_ubd_file(char *file, struct openflags *openflags, int shared,
625 char **backing_file_out, int *bitmap_offset_out,
626 unsigned long *bitmap_len_out, int *data_offset_out,
627 int *create_cow_out)
628{
629 time64_t mtime;
630 unsigned long long size;
631 __u32 version, align;
632 char *backing_file;
633 int fd, err, sectorsize, asked_switch, mode = 0644;
634
635 fd = os_open_file(file, *openflags, mode);
636 if (fd < 0) {
637 if ((fd == -ENOENT) && (create_cow_out != NULL))
638 *create_cow_out = 1;
639 if (!openflags->w ||
640 ((fd != -EROFS) && (fd != -EACCES)))
641 return fd;
642 openflags->w = 0;
643 fd = os_open_file(file, *openflags, mode);
644 if (fd < 0)
645 return fd;
646 }
647
648 if (shared)
649 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
650 else {
651 err = os_lock_file(fd, openflags->w);
652 if (err < 0) {
653 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
654 file, -err);
655 goto out_close;
656 }
657 }
658
659 /* Successful return case! */
660 if (backing_file_out == NULL)
661 return fd;
662
663 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
664 &size, §orsize, &align, bitmap_offset_out);
665 if (err && (*backing_file_out != NULL)) {
666 printk(KERN_ERR "Failed to read COW header from COW file "
667 "\"%s\", errno = %d\n", file, -err);
668 goto out_close;
669 }
670 if (err)
671 return fd;
672
673 asked_switch = path_requires_switch(*backing_file_out, backing_file,
674 file);
675
676 /* Allow switching only if no mismatch. */
677 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
678 mtime)) {
679 printk(KERN_ERR "Switching backing file to '%s'\n",
680 *backing_file_out);
681 err = write_cow_header(file, fd, *backing_file_out,
682 sectorsize, align, &size);
683 if (err) {
684 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
685 goto out_close;
686 }
687 } else {
688 *backing_file_out = backing_file;
689 err = backing_file_mismatch(*backing_file_out, size, mtime);
690 if (err)
691 goto out_close;
692 }
693
694 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
695 bitmap_len_out, data_offset_out);
696
697 return fd;
698 out_close:
699 os_close_file(fd);
700 return err;
701}
702
703static int create_cow_file(char *cow_file, char *backing_file,
704 struct openflags flags,
705 int sectorsize, int alignment, int *bitmap_offset_out,
706 unsigned long *bitmap_len_out, int *data_offset_out)
707{
708 int err, fd;
709
710 flags.c = 1;
711 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
712 if (fd < 0) {
713 err = fd;
714 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
715 cow_file, -err);
716 goto out;
717 }
718
719 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
720 bitmap_offset_out, bitmap_len_out,
721 data_offset_out);
722 if (!err)
723 return fd;
724 os_close_file(fd);
725 out:
726 return err;
727}
728
729static void ubd_close_dev(struct ubd *ubd_dev)
730{
731 os_close_file(ubd_dev->fd);
732 if(ubd_dev->cow.file == NULL)
733 return;
734
735 os_close_file(ubd_dev->cow.fd);
736 vfree(ubd_dev->cow.bitmap);
737 ubd_dev->cow.bitmap = NULL;
738}
739
740static int ubd_open_dev(struct ubd *ubd_dev)
741{
742 struct openflags flags;
743 char **back_ptr;
744 int err, create_cow, *create_ptr;
745 int fd;
746
747 ubd_dev->openflags = ubd_dev->boot_openflags;
748 create_cow = 0;
749 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
750 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
751
752 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
753 back_ptr, &ubd_dev->cow.bitmap_offset,
754 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
755 create_ptr);
756
757 if((fd == -ENOENT) && create_cow){
758 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
759 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
760 &ubd_dev->cow.bitmap_offset,
761 &ubd_dev->cow.bitmap_len,
762 &ubd_dev->cow.data_offset);
763 if(fd >= 0){
764 printk(KERN_INFO "Creating \"%s\" as COW file for "
765 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
766 }
767 }
768
769 if(fd < 0){
770 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
771 -fd);
772 return fd;
773 }
774 ubd_dev->fd = fd;
775
776 if(ubd_dev->cow.file != NULL){
777 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
778
779 err = -ENOMEM;
780 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
781 if(ubd_dev->cow.bitmap == NULL){
782 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
783 goto error;
784 }
785 flush_tlb_kernel_vm();
786
787 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
788 ubd_dev->cow.bitmap_offset,
789 ubd_dev->cow.bitmap_len);
790 if(err < 0)
791 goto error;
792
793 flags = ubd_dev->openflags;
794 flags.w = 0;
795 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
796 NULL, NULL, NULL, NULL);
797 if(err < 0) goto error;
798 ubd_dev->cow.fd = err;
799 }
800 if (ubd_dev->no_trim == 0) {
801 ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
802 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
803 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
804 }
805 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
806 return 0;
807 error:
808 os_close_file(ubd_dev->fd);
809 return err;
810}
811
812static void ubd_device_release(struct device *dev)
813{
814 struct ubd *ubd_dev = dev_get_drvdata(dev);
815
816 blk_mq_free_tag_set(&ubd_dev->tag_set);
817 *ubd_dev = ((struct ubd) DEFAULT_UBD);
818}
819
820static ssize_t serial_show(struct device *dev,
821 struct device_attribute *attr, char *buf)
822{
823 struct gendisk *disk = dev_to_disk(dev);
824 struct ubd *ubd_dev = disk->private_data;
825
826 if (!ubd_dev)
827 return 0;
828
829 return sprintf(buf, "%s", ubd_dev->serial);
830}
831
832static DEVICE_ATTR_RO(serial);
833
834static struct attribute *ubd_attrs[] = {
835 &dev_attr_serial.attr,
836 NULL,
837};
838
839static umode_t ubd_attrs_are_visible(struct kobject *kobj,
840 struct attribute *a, int n)
841{
842 return a->mode;
843}
844
845static const struct attribute_group ubd_attr_group = {
846 .attrs = ubd_attrs,
847 .is_visible = ubd_attrs_are_visible,
848};
849
850static const struct attribute_group *ubd_attr_groups[] = {
851 &ubd_attr_group,
852 NULL,
853};
854
855static int ubd_disk_register(int major, u64 size, int unit,
856 struct gendisk *disk)
857{
858 disk->major = major;
859 disk->first_minor = unit << UBD_SHIFT;
860 disk->minors = 1 << UBD_SHIFT;
861 disk->fops = &ubd_blops;
862 set_capacity(disk, size / 512);
863 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
864
865 ubd_devs[unit].pdev.id = unit;
866 ubd_devs[unit].pdev.name = DRIVER_NAME;
867 ubd_devs[unit].pdev.dev.release = ubd_device_release;
868 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
869 platform_device_register(&ubd_devs[unit].pdev);
870
871 disk->private_data = &ubd_devs[unit];
872 disk->queue = ubd_devs[unit].queue;
873 return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
874}
875
876#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
877
878static const struct blk_mq_ops ubd_mq_ops = {
879 .queue_rq = ubd_queue_rq,
880};
881
882static int ubd_add(int n, char **error_out)
883{
884 struct ubd *ubd_dev = &ubd_devs[n];
885 struct gendisk *disk;
886 int err = 0;
887
888 if(ubd_dev->file == NULL)
889 goto out;
890
891 err = ubd_file_size(ubd_dev, &ubd_dev->size);
892 if(err < 0){
893 *error_out = "Couldn't determine size of device's file";
894 goto out;
895 }
896
897 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
898
899 ubd_dev->tag_set.ops = &ubd_mq_ops;
900 ubd_dev->tag_set.queue_depth = 64;
901 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
902 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
903 ubd_dev->tag_set.driver_data = ubd_dev;
904 ubd_dev->tag_set.nr_hw_queues = 1;
905
906 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
907 if (err)
908 goto out;
909
910 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
911 if (IS_ERR(disk)) {
912 err = PTR_ERR(disk);
913 goto out_cleanup_tags;
914 }
915 ubd_dev->queue = disk->queue;
916
917 blk_queue_write_cache(ubd_dev->queue, true, false);
918 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
919 blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
920 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
921 if (err)
922 goto out_cleanup_disk;
923
924 ubd_gendisk[n] = disk;
925 return 0;
926
927out_cleanup_disk:
928 put_disk(disk);
929out_cleanup_tags:
930 blk_mq_free_tag_set(&ubd_dev->tag_set);
931out:
932 return err;
933}
934
935static int ubd_config(char *str, char **error_out)
936{
937 int n, ret;
938
939 /* This string is possibly broken up and stored, so it's only
940 * freed if ubd_setup_common fails, or if only general options
941 * were set.
942 */
943 str = kstrdup(str, GFP_KERNEL);
944 if (str == NULL) {
945 *error_out = "Failed to allocate memory";
946 return -ENOMEM;
947 }
948
949 ret = ubd_setup_common(str, &n, error_out);
950 if (ret)
951 goto err_free;
952
953 if (n == -1) {
954 ret = 0;
955 goto err_free;
956 }
957
958 mutex_lock(&ubd_lock);
959 ret = ubd_add(n, error_out);
960 if (ret)
961 ubd_devs[n].file = NULL;
962 mutex_unlock(&ubd_lock);
963
964out:
965 return ret;
966
967err_free:
968 kfree(str);
969 goto out;
970}
971
972static int ubd_get_config(char *name, char *str, int size, char **error_out)
973{
974 struct ubd *ubd_dev;
975 int n, len = 0;
976
977 n = parse_unit(&name);
978 if((n >= MAX_DEV) || (n < 0)){
979 *error_out = "ubd_get_config : device number out of range";
980 return -1;
981 }
982
983 ubd_dev = &ubd_devs[n];
984 mutex_lock(&ubd_lock);
985
986 if(ubd_dev->file == NULL){
987 CONFIG_CHUNK(str, size, len, "", 1);
988 goto out;
989 }
990
991 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
992
993 if(ubd_dev->cow.file != NULL){
994 CONFIG_CHUNK(str, size, len, ",", 0);
995 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
996 }
997 else CONFIG_CHUNK(str, size, len, "", 1);
998
999 out:
1000 mutex_unlock(&ubd_lock);
1001 return len;
1002}
1003
1004static int ubd_id(char **str, int *start_out, int *end_out)
1005{
1006 int n;
1007
1008 n = parse_unit(str);
1009 *start_out = 0;
1010 *end_out = MAX_DEV - 1;
1011 return n;
1012}
1013
1014static int ubd_remove(int n, char **error_out)
1015{
1016 struct gendisk *disk = ubd_gendisk[n];
1017 struct ubd *ubd_dev;
1018 int err = -ENODEV;
1019
1020 mutex_lock(&ubd_lock);
1021
1022 ubd_dev = &ubd_devs[n];
1023
1024 if(ubd_dev->file == NULL)
1025 goto out;
1026
1027 /* you cannot remove a open disk */
1028 err = -EBUSY;
1029 if(ubd_dev->count > 0)
1030 goto out;
1031
1032 ubd_gendisk[n] = NULL;
1033 if(disk != NULL){
1034 del_gendisk(disk);
1035 put_disk(disk);
1036 }
1037
1038 err = 0;
1039 platform_device_unregister(&ubd_dev->pdev);
1040out:
1041 mutex_unlock(&ubd_lock);
1042 return err;
1043}
1044
1045/* All these are called by mconsole in process context and without
1046 * ubd-specific locks. The structure itself is const except for .list.
1047 */
1048static struct mc_device ubd_mc = {
1049 .list = LIST_HEAD_INIT(ubd_mc.list),
1050 .name = "ubd",
1051 .config = ubd_config,
1052 .get_config = ubd_get_config,
1053 .id = ubd_id,
1054 .remove = ubd_remove,
1055};
1056
1057static int __init ubd_mc_init(void)
1058{
1059 mconsole_register_dev(&ubd_mc);
1060 return 0;
1061}
1062
1063__initcall(ubd_mc_init);
1064
1065static int __init ubd0_init(void)
1066{
1067 struct ubd *ubd_dev = &ubd_devs[0];
1068
1069 mutex_lock(&ubd_lock);
1070 if(ubd_dev->file == NULL)
1071 ubd_dev->file = "root_fs";
1072 mutex_unlock(&ubd_lock);
1073
1074 return 0;
1075}
1076
1077__initcall(ubd0_init);
1078
1079/* Used in ubd_init, which is an initcall */
1080static struct platform_driver ubd_driver = {
1081 .driver = {
1082 .name = DRIVER_NAME,
1083 },
1084};
1085
1086static int __init ubd_init(void)
1087{
1088 char *error;
1089 int i, err;
1090
1091 if (register_blkdev(UBD_MAJOR, "ubd"))
1092 return -1;
1093
1094 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1095 sizeof(struct io_thread_req *),
1096 GFP_KERNEL
1097 );
1098 irq_remainder = 0;
1099
1100 if (irq_req_buffer == NULL) {
1101 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1102 return -1;
1103 }
1104 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1105 sizeof(struct io_thread_req *),
1106 GFP_KERNEL
1107 );
1108
1109 io_remainder = 0;
1110
1111 if (io_req_buffer == NULL) {
1112 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1113 return -1;
1114 }
1115 platform_driver_register(&ubd_driver);
1116 mutex_lock(&ubd_lock);
1117 for (i = 0; i < MAX_DEV; i++){
1118 err = ubd_add(i, &error);
1119 if(err)
1120 printk(KERN_ERR "Failed to initialize ubd device %d :"
1121 "%s\n", i, error);
1122 }
1123 mutex_unlock(&ubd_lock);
1124 return 0;
1125}
1126
1127late_initcall(ubd_init);
1128
1129static int __init ubd_driver_init(void){
1130 unsigned long stack;
1131 int err;
1132
1133 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1134 if(global_openflags.s){
1135 printk(KERN_INFO "ubd: Synchronous mode\n");
1136 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1137 * enough. So use anyway the io thread. */
1138 }
1139 stack = alloc_stack(0, 0);
1140 io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
1141 if(io_pid < 0){
1142 printk(KERN_ERR
1143 "ubd : Failed to start I/O thread (errno = %d) - "
1144 "falling back to synchronous I/O\n", -io_pid);
1145 io_pid = -1;
1146 return 0;
1147 }
1148 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1149 0, "ubd", ubd_devs);
1150 if(err < 0)
1151 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1152 return 0;
1153}
1154
1155device_initcall(ubd_driver_init);
1156
1157static int ubd_open(struct block_device *bdev, fmode_t mode)
1158{
1159 struct gendisk *disk = bdev->bd_disk;
1160 struct ubd *ubd_dev = disk->private_data;
1161 int err = 0;
1162
1163 mutex_lock(&ubd_mutex);
1164 if(ubd_dev->count == 0){
1165 err = ubd_open_dev(ubd_dev);
1166 if(err){
1167 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1168 disk->disk_name, ubd_dev->file, -err);
1169 goto out;
1170 }
1171 }
1172 ubd_dev->count++;
1173 set_disk_ro(disk, !ubd_dev->openflags.w);
1174
1175 /* This should no more be needed. And it didn't work anyway to exclude
1176 * read-write remounting of filesystems.*/
1177 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1178 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1179 err = -EROFS;
1180 }*/
1181out:
1182 mutex_unlock(&ubd_mutex);
1183 return err;
1184}
1185
1186static void ubd_release(struct gendisk *disk, fmode_t mode)
1187{
1188 struct ubd *ubd_dev = disk->private_data;
1189
1190 mutex_lock(&ubd_mutex);
1191 if(--ubd_dev->count == 0)
1192 ubd_close_dev(ubd_dev);
1193 mutex_unlock(&ubd_mutex);
1194}
1195
1196static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1197 __u64 *cow_offset, unsigned long *bitmap,
1198 __u64 bitmap_offset, unsigned long *bitmap_words,
1199 __u64 bitmap_len)
1200{
1201 __u64 sector = io_offset >> SECTOR_SHIFT;
1202 int i, update_bitmap = 0;
1203
1204 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1205 if(cow_mask != NULL)
1206 ubd_set_bit(i, (unsigned char *) cow_mask);
1207 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1208 continue;
1209
1210 update_bitmap = 1;
1211 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1212 }
1213
1214 if(!update_bitmap)
1215 return;
1216
1217 *cow_offset = sector / (sizeof(unsigned long) * 8);
1218
1219 /* This takes care of the case where we're exactly at the end of the
1220 * device, and *cow_offset + 1 is off the end. So, just back it up
1221 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1222 * for the original diagnosis.
1223 */
1224 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1225 sizeof(unsigned long)) - 1))
1226 (*cow_offset)--;
1227
1228 bitmap_words[0] = bitmap[*cow_offset];
1229 bitmap_words[1] = bitmap[*cow_offset + 1];
1230
1231 *cow_offset *= sizeof(unsigned long);
1232 *cow_offset += bitmap_offset;
1233}
1234
1235static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1236 unsigned long offset, unsigned long *bitmap,
1237 __u64 bitmap_offset, __u64 bitmap_len)
1238{
1239 __u64 sector = offset >> SECTOR_SHIFT;
1240 int i;
1241
1242 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1243 panic("Operation too long");
1244
1245 if (req_op(req->req) == REQ_OP_READ) {
1246 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1247 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1248 ubd_set_bit(i, (unsigned char *)
1249 &segment->sector_mask);
1250 }
1251 } else {
1252 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1253 &segment->cow_offset, bitmap, bitmap_offset,
1254 segment->bitmap_words, bitmap_len);
1255 }
1256}
1257
1258static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1259 struct request *req)
1260{
1261 struct bio_vec bvec;
1262 struct req_iterator iter;
1263 int i = 0;
1264 unsigned long byte_offset = io_req->offset;
1265 enum req_op op = req_op(req);
1266
1267 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1268 io_req->io_desc[0].buffer = NULL;
1269 io_req->io_desc[0].length = blk_rq_bytes(req);
1270 } else {
1271 rq_for_each_segment(bvec, req, iter) {
1272 BUG_ON(i >= io_req->desc_cnt);
1273
1274 io_req->io_desc[i].buffer = bvec_virt(&bvec);
1275 io_req->io_desc[i].length = bvec.bv_len;
1276 i++;
1277 }
1278 }
1279
1280 if (dev->cow.file) {
1281 for (i = 0; i < io_req->desc_cnt; i++) {
1282 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1283 dev->cow.bitmap, dev->cow.bitmap_offset,
1284 dev->cow.bitmap_len);
1285 byte_offset += io_req->io_desc[i].length;
1286 }
1287
1288 }
1289}
1290
1291static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1292 int desc_cnt)
1293{
1294 struct io_thread_req *io_req;
1295 int i;
1296
1297 io_req = kmalloc(sizeof(*io_req) +
1298 (desc_cnt * sizeof(struct io_desc)),
1299 GFP_ATOMIC);
1300 if (!io_req)
1301 return NULL;
1302
1303 io_req->req = req;
1304 if (dev->cow.file)
1305 io_req->fds[0] = dev->cow.fd;
1306 else
1307 io_req->fds[0] = dev->fd;
1308 io_req->error = 0;
1309 io_req->sectorsize = SECTOR_SIZE;
1310 io_req->fds[1] = dev->fd;
1311 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1312 io_req->offsets[0] = 0;
1313 io_req->offsets[1] = dev->cow.data_offset;
1314
1315 for (i = 0 ; i < desc_cnt; i++) {
1316 io_req->io_desc[i].sector_mask = 0;
1317 io_req->io_desc[i].cow_offset = -1;
1318 }
1319
1320 return io_req;
1321}
1322
1323static int ubd_submit_request(struct ubd *dev, struct request *req)
1324{
1325 int segs = 0;
1326 struct io_thread_req *io_req;
1327 int ret;
1328 enum req_op op = req_op(req);
1329
1330 if (op == REQ_OP_FLUSH)
1331 segs = 0;
1332 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1333 segs = 1;
1334 else
1335 segs = blk_rq_nr_phys_segments(req);
1336
1337 io_req = ubd_alloc_req(dev, req, segs);
1338 if (!io_req)
1339 return -ENOMEM;
1340
1341 io_req->desc_cnt = segs;
1342 if (segs)
1343 ubd_map_req(dev, io_req, req);
1344
1345 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1346 if (ret != sizeof(io_req)) {
1347 if (ret != -EAGAIN)
1348 pr_err("write to io thread failed: %d\n", -ret);
1349 kfree(io_req);
1350 }
1351 return ret;
1352}
1353
1354static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1355 const struct blk_mq_queue_data *bd)
1356{
1357 struct ubd *ubd_dev = hctx->queue->queuedata;
1358 struct request *req = bd->rq;
1359 int ret = 0, res = BLK_STS_OK;
1360
1361 blk_mq_start_request(req);
1362
1363 spin_lock_irq(&ubd_dev->lock);
1364
1365 switch (req_op(req)) {
1366 case REQ_OP_FLUSH:
1367 case REQ_OP_READ:
1368 case REQ_OP_WRITE:
1369 case REQ_OP_DISCARD:
1370 case REQ_OP_WRITE_ZEROES:
1371 ret = ubd_submit_request(ubd_dev, req);
1372 break;
1373 default:
1374 WARN_ON_ONCE(1);
1375 res = BLK_STS_NOTSUPP;
1376 }
1377
1378 spin_unlock_irq(&ubd_dev->lock);
1379
1380 if (ret < 0) {
1381 if (ret == -ENOMEM)
1382 res = BLK_STS_RESOURCE;
1383 else
1384 res = BLK_STS_DEV_RESOURCE;
1385 }
1386
1387 return res;
1388}
1389
1390static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1391{
1392 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1393
1394 geo->heads = 128;
1395 geo->sectors = 32;
1396 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1397 return 0;
1398}
1399
1400static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1401 unsigned int cmd, unsigned long arg)
1402{
1403 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1404 u16 ubd_id[ATA_ID_WORDS];
1405
1406 switch (cmd) {
1407 struct cdrom_volctrl volume;
1408 case HDIO_GET_IDENTITY:
1409 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1410 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1411 ubd_id[ATA_ID_HEADS] = 128;
1412 ubd_id[ATA_ID_SECTORS] = 32;
1413 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1414 sizeof(ubd_id)))
1415 return -EFAULT;
1416 return 0;
1417
1418 case CDROMVOLREAD:
1419 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1420 return -EFAULT;
1421 volume.channel0 = 255;
1422 volume.channel1 = 255;
1423 volume.channel2 = 255;
1424 volume.channel3 = 255;
1425 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1426 return -EFAULT;
1427 return 0;
1428 }
1429 return -EINVAL;
1430}
1431
1432static int map_error(int error_code)
1433{
1434 switch (error_code) {
1435 case 0:
1436 return BLK_STS_OK;
1437 case ENOSYS:
1438 case EOPNOTSUPP:
1439 return BLK_STS_NOTSUPP;
1440 case ENOSPC:
1441 return BLK_STS_NOSPC;
1442 }
1443 return BLK_STS_IOERR;
1444}
1445
1446/*
1447 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1448 *
1449 * The following functions are part of UML hypervisor code.
1450 * All functions from here onwards are executed as a helper
1451 * thread and are not allowed to execute any kernel functions.
1452 *
1453 * Any communication must occur strictly via shared memory and IPC.
1454 *
1455 * Do not add printks, locks, kernel memory operations, etc - it
1456 * will result in unpredictable behaviour and/or crashes.
1457 */
1458
1459static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1460{
1461 int n;
1462
1463 if (segment->cow_offset == -1)
1464 return map_error(0);
1465
1466 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1467 sizeof(segment->bitmap_words), segment->cow_offset);
1468 if (n != sizeof(segment->bitmap_words))
1469 return map_error(-n);
1470
1471 return map_error(0);
1472}
1473
1474static void do_io(struct io_thread_req *req, struct io_desc *desc)
1475{
1476 char *buf = NULL;
1477 unsigned long len;
1478 int n, nsectors, start, end, bit;
1479 __u64 off;
1480
1481 /* FLUSH is really a special case, we cannot "case" it with others */
1482
1483 if (req_op(req->req) == REQ_OP_FLUSH) {
1484 /* fds[0] is always either the rw image or our cow file */
1485 req->error = map_error(-os_sync_file(req->fds[0]));
1486 return;
1487 }
1488
1489 nsectors = desc->length / req->sectorsize;
1490 start = 0;
1491 do {
1492 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1493 end = start;
1494 while((end < nsectors) &&
1495 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1496 end++;
1497
1498 off = req->offset + req->offsets[bit] +
1499 start * req->sectorsize;
1500 len = (end - start) * req->sectorsize;
1501 if (desc->buffer != NULL)
1502 buf = &desc->buffer[start * req->sectorsize];
1503
1504 switch (req_op(req->req)) {
1505 case REQ_OP_READ:
1506 n = 0;
1507 do {
1508 buf = &buf[n];
1509 len -= n;
1510 n = os_pread_file(req->fds[bit], buf, len, off);
1511 if (n < 0) {
1512 req->error = map_error(-n);
1513 return;
1514 }
1515 } while((n < len) && (n != 0));
1516 if (n < len) memset(&buf[n], 0, len - n);
1517 break;
1518 case REQ_OP_WRITE:
1519 n = os_pwrite_file(req->fds[bit], buf, len, off);
1520 if(n != len){
1521 req->error = map_error(-n);
1522 return;
1523 }
1524 break;
1525 case REQ_OP_DISCARD:
1526 n = os_falloc_punch(req->fds[bit], off, len);
1527 if (n) {
1528 req->error = map_error(-n);
1529 return;
1530 }
1531 break;
1532 case REQ_OP_WRITE_ZEROES:
1533 n = os_falloc_zeroes(req->fds[bit], off, len);
1534 if (n) {
1535 req->error = map_error(-n);
1536 return;
1537 }
1538 break;
1539 default:
1540 WARN_ON_ONCE(1);
1541 req->error = BLK_STS_NOTSUPP;
1542 return;
1543 }
1544
1545 start = end;
1546 } while(start < nsectors);
1547
1548 req->offset += len;
1549 req->error = update_bitmap(req, desc);
1550}
1551
1552/* Changed in start_io_thread, which is serialized by being called only
1553 * from ubd_init, which is an initcall.
1554 */
1555int kernel_fd = -1;
1556
1557/* Only changed by the io thread. XXX: currently unused. */
1558static int io_count;
1559
1560int io_thread(void *arg)
1561{
1562 int n, count, written, res;
1563
1564 os_fix_helper_signals();
1565
1566 while(1){
1567 n = bulk_req_safe_read(
1568 kernel_fd,
1569 io_req_buffer,
1570 &io_remainder,
1571 &io_remainder_size,
1572 UBD_REQ_BUFFER_SIZE
1573 );
1574 if (n <= 0) {
1575 if (n == -EAGAIN)
1576 ubd_read_poll(-1);
1577
1578 continue;
1579 }
1580
1581 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1582 struct io_thread_req *req = (*io_req_buffer)[count];
1583 int i;
1584
1585 io_count++;
1586 for (i = 0; !req->error && i < req->desc_cnt; i++)
1587 do_io(req, &(req->io_desc[i]));
1588
1589 }
1590
1591 written = 0;
1592
1593 do {
1594 res = os_write_file(kernel_fd,
1595 ((char *) io_req_buffer) + written,
1596 n - written);
1597 if (res >= 0) {
1598 written += res;
1599 }
1600 if (written < n) {
1601 ubd_write_poll(-1);
1602 }
1603 } while (written < n);
1604 }
1605
1606 return 0;
1607}
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Cambridge Greys Ltd
4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
27#include <linux/blk-mq.h>
28#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/major.h>
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
39#include <kern_util.h>
40#include "mconsole_kern.h"
41#include <init.h>
42#include <irq_kern.h>
43#include "ubd.h"
44#include <os.h>
45#include "cow.h"
46
47/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
49
50struct io_desc {
51 char *buffer;
52 unsigned long length;
53 unsigned long sector_mask;
54 unsigned long long cow_offset;
55 unsigned long bitmap_words[2];
56};
57
58struct io_thread_req {
59 struct request *req;
60 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
63 int sectorsize;
64 int error;
65
66 int desc_cnt;
67 /* io_desc has to be the last element of the struct */
68 struct io_desc io_desc[];
69};
70
71
72static struct io_thread_req * (*irq_req_buffer)[];
73static struct io_thread_req *irq_remainder;
74static int irq_remainder_size;
75
76static struct io_thread_req * (*io_req_buffer)[];
77static struct io_thread_req *io_remainder;
78static int io_remainder_size;
79
80
81
82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
83{
84 __u64 n;
85 int bits, off;
86
87 bits = sizeof(data[0]) * 8;
88 n = bit / bits;
89 off = bit % bits;
90 return (data[n] & (1 << off)) != 0;
91}
92
93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
94{
95 __u64 n;
96 int bits, off;
97
98 bits = sizeof(data[0]) * 8;
99 n = bit / bits;
100 off = bit % bits;
101 data[n] |= (1 << off);
102}
103/*End stuff from ubd_user.h*/
104
105#define DRIVER_NAME "uml-blkdev"
106
107static DEFINE_MUTEX(ubd_lock);
108
109static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
110 unsigned int cmd, unsigned long arg);
111static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
112
113#define MAX_DEV (16)
114
115static const struct block_device_operations ubd_blops = {
116 .owner = THIS_MODULE,
117 .ioctl = ubd_ioctl,
118 .compat_ioctl = blkdev_compat_ptr_ioctl,
119 .getgeo = ubd_getgeo,
120};
121
122#ifdef CONFIG_BLK_DEV_UBD_SYNC
123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 .cl = 1 })
125#else
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 .cl = 1 })
128#endif
129static struct openflags global_openflags = OPEN_FLAGS;
130
131struct cow {
132 /* backing file name */
133 char *file;
134 /* backing file fd */
135 int fd;
136 unsigned long *bitmap;
137 unsigned long bitmap_len;
138 int bitmap_offset;
139 int data_offset;
140};
141
142#define MAX_SG 64
143
144struct ubd {
145 /* name (and fd, below) of the file opened for writing, either the
146 * backing or the cow file. */
147 char *file;
148 char *serial;
149 int fd;
150 __u64 size;
151 struct openflags boot_openflags;
152 struct openflags openflags;
153 unsigned shared:1;
154 unsigned no_cow:1;
155 unsigned no_trim:1;
156 struct cow cow;
157 struct platform_device pdev;
158 struct gendisk *disk;
159 struct blk_mq_tag_set tag_set;
160 spinlock_t lock;
161};
162
163#define DEFAULT_COW { \
164 .file = NULL, \
165 .fd = -1, \
166 .bitmap = NULL, \
167 .bitmap_offset = 0, \
168 .data_offset = 0, \
169}
170
171#define DEFAULT_UBD { \
172 .file = NULL, \
173 .serial = NULL, \
174 .fd = -1, \
175 .size = -1, \
176 .boot_openflags = OPEN_FLAGS, \
177 .openflags = OPEN_FLAGS, \
178 .no_cow = 0, \
179 .no_trim = 0, \
180 .shared = 0, \
181 .cow = DEFAULT_COW, \
182 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
183}
184
185/* Protected by ubd_lock */
186static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
187
188static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
189 const struct blk_mq_queue_data *bd);
190
191static int fake_ide_setup(char *str)
192{
193 pr_warn("The fake_ide option has been removed\n");
194 return 1;
195}
196__setup("fake_ide", fake_ide_setup);
197
198__uml_help(fake_ide_setup,
199"fake_ide\n"
200" Obsolete stub.\n\n"
201);
202
203static int parse_unit(char **ptr)
204{
205 char *str = *ptr, *end;
206 int n = -1;
207
208 if(isdigit(*str)) {
209 n = simple_strtoul(str, &end, 0);
210 if(end == str)
211 return -1;
212 *ptr = end;
213 }
214 else if (('a' <= *str) && (*str <= 'z')) {
215 n = *str - 'a';
216 str++;
217 *ptr = str;
218 }
219 return n;
220}
221
222/* If *index_out == -1 at exit, the passed option was a general one;
223 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
224 * should not be freed on exit.
225 */
226static int ubd_setup_common(char *str, int *index_out, char **error_out)
227{
228 struct ubd *ubd_dev;
229 struct openflags flags = global_openflags;
230 char *file, *backing_file, *serial;
231 int n, err = 0, i;
232
233 if(index_out) *index_out = -1;
234 n = *str;
235 if(n == '='){
236 str++;
237 if(!strcmp(str, "sync")){
238 global_openflags = of_sync(global_openflags);
239 return err;
240 }
241
242 pr_warn("fake major not supported any more\n");
243 return 0;
244 }
245
246 n = parse_unit(&str);
247 if(n < 0){
248 *error_out = "Couldn't parse device number";
249 return -EINVAL;
250 }
251 if(n >= MAX_DEV){
252 *error_out = "Device number out of range";
253 return 1;
254 }
255
256 err = -EBUSY;
257 mutex_lock(&ubd_lock);
258
259 ubd_dev = &ubd_devs[n];
260 if(ubd_dev->file != NULL){
261 *error_out = "Device is already configured";
262 goto out;
263 }
264
265 if (index_out)
266 *index_out = n;
267
268 err = -EINVAL;
269 for (i = 0; i < sizeof("rscdt="); i++) {
270 switch (*str) {
271 case 'r':
272 flags.w = 0;
273 break;
274 case 's':
275 flags.s = 1;
276 break;
277 case 'd':
278 ubd_dev->no_cow = 1;
279 break;
280 case 'c':
281 ubd_dev->shared = 1;
282 break;
283 case 't':
284 ubd_dev->no_trim = 1;
285 break;
286 case '=':
287 str++;
288 goto break_loop;
289 default:
290 *error_out = "Expected '=' or flag letter "
291 "(r, s, c, t or d)";
292 goto out;
293 }
294 str++;
295 }
296
297 if (*str == '=')
298 *error_out = "Too many flags specified";
299 else
300 *error_out = "Missing '='";
301 goto out;
302
303break_loop:
304 file = strsep(&str, ",:");
305 if (*file == '\0')
306 file = NULL;
307
308 backing_file = strsep(&str, ",:");
309 if (backing_file && *backing_file == '\0')
310 backing_file = NULL;
311
312 serial = strsep(&str, ",:");
313 if (serial && *serial == '\0')
314 serial = NULL;
315
316 if (backing_file && ubd_dev->no_cow) {
317 *error_out = "Can't specify both 'd' and a cow file";
318 goto out;
319 }
320
321 err = 0;
322 ubd_dev->file = file;
323 ubd_dev->cow.file = backing_file;
324 ubd_dev->serial = serial;
325 ubd_dev->boot_openflags = flags;
326out:
327 mutex_unlock(&ubd_lock);
328 return err;
329}
330
331static int ubd_setup(char *str)
332{
333 char *error;
334 int err;
335
336 err = ubd_setup_common(str, NULL, &error);
337 if(err)
338 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
339 "%s\n", str, error);
340 return 1;
341}
342
343__setup("ubd", ubd_setup);
344__uml_help(ubd_setup,
345"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
346" This is used to associate a device with a file in the underlying\n"
347" filesystem. When specifying two filenames, the first one is the\n"
348" COW name and the second is the backing file name. As separator you can\n"
349" use either a ':' or a ',': the first one allows writing things like;\n"
350" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
351" while with a ',' the shell would not expand the 2nd '~'.\n"
352" When using only one filename, UML will detect whether to treat it like\n"
353" a COW file or a backing file. To override this detection, add the 'd'\n"
354" flag:\n"
355" ubd0d=BackingFile\n"
356" Usually, there is a filesystem in the file, but \n"
357" that's not required. Swap devices containing swap files can be\n"
358" specified like this. Also, a file which doesn't contain a\n"
359" filesystem can have its contents read in the virtual \n"
360" machine by running 'dd' on the device. <n> must be in the range\n"
361" 0 to 7. Appending an 'r' to the number will cause that device\n"
362" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
363" an 's' will cause data to be written to disk on the host immediately.\n"
364" 'c' will cause the device to be treated as being shared between multiple\n"
365" UMLs and file locking will be turned off - this is appropriate for a\n"
366" cluster filesystem and inappropriate at almost all other times.\n\n"
367" 't' will disable trim/discard support on the device (enabled by default).\n\n"
368" An optional device serial number can be exposed using the serial parameter\n"
369" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
370" useful when a unique number should be given to the device. Note when\n"
371" specifying a label, the filename2 must be also presented. It can be\n"
372" an empty string, in which case the backing file is not used:\n"
373" ubd0=File,,Serial\n"
374);
375
376static int udb_setup(char *str)
377{
378 printk("udb%s specified on command line is almost certainly a ubd -> "
379 "udb TYPO\n", str);
380 return 1;
381}
382
383__setup("udb", udb_setup);
384__uml_help(udb_setup,
385"udb\n"
386" This option is here solely to catch ubd -> udb typos, which can be\n"
387" to impossible to catch visually unless you specifically look for\n"
388" them. The only result of any option starting with 'udb' is an error\n"
389" in the boot output.\n\n"
390);
391
392/* Only changed by ubd_init, which is an initcall. */
393static int thread_fd = -1;
394
395/* Function to read several request pointers at a time
396* handling fractional reads if (and as) needed
397*/
398
399static int bulk_req_safe_read(
400 int fd,
401 struct io_thread_req * (*request_buffer)[],
402 struct io_thread_req **remainder,
403 int *remainder_size,
404 int max_recs
405 )
406{
407 int n = 0;
408 int res = 0;
409
410 if (*remainder_size > 0) {
411 memmove(
412 (char *) request_buffer,
413 (char *) remainder, *remainder_size
414 );
415 n = *remainder_size;
416 }
417
418 res = os_read_file(
419 fd,
420 ((char *) request_buffer) + *remainder_size,
421 sizeof(struct io_thread_req *)*max_recs
422 - *remainder_size
423 );
424 if (res > 0) {
425 n += res;
426 if ((n % sizeof(struct io_thread_req *)) > 0) {
427 /*
428 * Read somehow returned not a multiple of dword
429 * theoretically possible, but never observed in the
430 * wild, so read routine must be able to handle it
431 */
432 *remainder_size = n % sizeof(struct io_thread_req *);
433 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
434 memmove(
435 remainder,
436 ((char *) request_buffer) +
437 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
438 *remainder_size
439 );
440 n = n - *remainder_size;
441 }
442 } else {
443 n = res;
444 }
445 return n;
446}
447
448static void ubd_end_request(struct io_thread_req *io_req)
449{
450 if (io_req->error == BLK_STS_NOTSUPP) {
451 if (req_op(io_req->req) == REQ_OP_DISCARD)
452 blk_queue_disable_discard(io_req->req->q);
453 else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
454 blk_queue_disable_write_zeroes(io_req->req->q);
455 }
456 blk_mq_end_request(io_req->req, io_req->error);
457 kfree(io_req);
458}
459
460static irqreturn_t ubd_intr(int irq, void *dev)
461{
462 int len, i;
463
464 while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
465 &irq_remainder, &irq_remainder_size,
466 UBD_REQ_BUFFER_SIZE)) >= 0) {
467 for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
468 ubd_end_request((*irq_req_buffer)[i]);
469 }
470
471 if (len < 0 && len != -EAGAIN)
472 pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
473 return IRQ_HANDLED;
474}
475
476/* Only changed by ubd_init, which is an initcall. */
477static int io_pid = -1;
478
479static void kill_io_thread(void)
480{
481 if(io_pid != -1)
482 os_kill_process(io_pid, 1);
483}
484
485__uml_exitcall(kill_io_thread);
486
487static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
488{
489 char *file;
490 int fd;
491 int err;
492
493 __u32 version;
494 __u32 align;
495 char *backing_file;
496 time64_t mtime;
497 unsigned long long size;
498 int sector_size;
499 int bitmap_offset;
500
501 if (ubd_dev->file && ubd_dev->cow.file) {
502 file = ubd_dev->cow.file;
503
504 goto out;
505 }
506
507 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
508 if (fd < 0)
509 return fd;
510
511 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
512 &mtime, &size, §or_size, &align, &bitmap_offset);
513 os_close_file(fd);
514
515 if(err == -EINVAL)
516 file = ubd_dev->file;
517 else
518 file = backing_file;
519
520out:
521 return os_file_size(file, size_out);
522}
523
524static int read_cow_bitmap(int fd, void *buf, int offset, int len)
525{
526 int err;
527
528 err = os_pread_file(fd, buf, len, offset);
529 if (err < 0)
530 return err;
531
532 return 0;
533}
534
535static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
536{
537 time64_t modtime;
538 unsigned long long actual;
539 int err;
540
541 err = os_file_modtime(file, &modtime);
542 if (err < 0) {
543 printk(KERN_ERR "Failed to get modification time of backing "
544 "file \"%s\", err = %d\n", file, -err);
545 return err;
546 }
547
548 err = os_file_size(file, &actual);
549 if (err < 0) {
550 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
551 "err = %d\n", file, -err);
552 return err;
553 }
554
555 if (actual != size) {
556 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
557 * the typecast.*/
558 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
559 "vs backing file\n", (unsigned long long) size, actual);
560 return -EINVAL;
561 }
562 if (modtime != mtime) {
563 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
564 "backing file\n", mtime, modtime);
565 return -EINVAL;
566 }
567 return 0;
568}
569
570static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
571{
572 struct uml_stat buf1, buf2;
573 int err;
574
575 if (from_cmdline == NULL)
576 return 0;
577 if (!strcmp(from_cmdline, from_cow))
578 return 0;
579
580 err = os_stat_file(from_cmdline, &buf1);
581 if (err < 0) {
582 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
583 -err);
584 return 0;
585 }
586 err = os_stat_file(from_cow, &buf2);
587 if (err < 0) {
588 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
589 -err);
590 return 1;
591 }
592 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
593 return 0;
594
595 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
596 "\"%s\" specified in COW header of \"%s\"\n",
597 from_cmdline, from_cow, cow);
598 return 1;
599}
600
601static int open_ubd_file(char *file, struct openflags *openflags, int shared,
602 char **backing_file_out, int *bitmap_offset_out,
603 unsigned long *bitmap_len_out, int *data_offset_out,
604 int *create_cow_out)
605{
606 time64_t mtime;
607 unsigned long long size;
608 __u32 version, align;
609 char *backing_file;
610 int fd, err, sectorsize, asked_switch, mode = 0644;
611
612 fd = os_open_file(file, *openflags, mode);
613 if (fd < 0) {
614 if ((fd == -ENOENT) && (create_cow_out != NULL))
615 *create_cow_out = 1;
616 if (!openflags->w ||
617 ((fd != -EROFS) && (fd != -EACCES)))
618 return fd;
619 openflags->w = 0;
620 fd = os_open_file(file, *openflags, mode);
621 if (fd < 0)
622 return fd;
623 }
624
625 if (shared)
626 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
627 else {
628 err = os_lock_file(fd, openflags->w);
629 if (err < 0) {
630 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
631 file, -err);
632 goto out_close;
633 }
634 }
635
636 /* Successful return case! */
637 if (backing_file_out == NULL)
638 return fd;
639
640 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
641 &size, §orsize, &align, bitmap_offset_out);
642 if (err && (*backing_file_out != NULL)) {
643 printk(KERN_ERR "Failed to read COW header from COW file "
644 "\"%s\", errno = %d\n", file, -err);
645 goto out_close;
646 }
647 if (err)
648 return fd;
649
650 asked_switch = path_requires_switch(*backing_file_out, backing_file,
651 file);
652
653 /* Allow switching only if no mismatch. */
654 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
655 mtime)) {
656 printk(KERN_ERR "Switching backing file to '%s'\n",
657 *backing_file_out);
658 err = write_cow_header(file, fd, *backing_file_out,
659 sectorsize, align, &size);
660 if (err) {
661 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
662 goto out_close;
663 }
664 } else {
665 *backing_file_out = backing_file;
666 err = backing_file_mismatch(*backing_file_out, size, mtime);
667 if (err)
668 goto out_close;
669 }
670
671 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
672 bitmap_len_out, data_offset_out);
673
674 return fd;
675 out_close:
676 os_close_file(fd);
677 return err;
678}
679
680static int create_cow_file(char *cow_file, char *backing_file,
681 struct openflags flags,
682 int sectorsize, int alignment, int *bitmap_offset_out,
683 unsigned long *bitmap_len_out, int *data_offset_out)
684{
685 int err, fd;
686
687 flags.c = 1;
688 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
689 if (fd < 0) {
690 err = fd;
691 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
692 cow_file, -err);
693 goto out;
694 }
695
696 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
697 bitmap_offset_out, bitmap_len_out,
698 data_offset_out);
699 if (!err)
700 return fd;
701 os_close_file(fd);
702 out:
703 return err;
704}
705
706static void ubd_close_dev(struct ubd *ubd_dev)
707{
708 os_close_file(ubd_dev->fd);
709 if(ubd_dev->cow.file == NULL)
710 return;
711
712 os_close_file(ubd_dev->cow.fd);
713 vfree(ubd_dev->cow.bitmap);
714 ubd_dev->cow.bitmap = NULL;
715}
716
717static int ubd_open_dev(struct ubd *ubd_dev)
718{
719 struct openflags flags;
720 char **back_ptr;
721 int err, create_cow, *create_ptr;
722 int fd;
723
724 ubd_dev->openflags = ubd_dev->boot_openflags;
725 create_cow = 0;
726 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
727 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
728
729 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
730 back_ptr, &ubd_dev->cow.bitmap_offset,
731 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
732 create_ptr);
733
734 if((fd == -ENOENT) && create_cow){
735 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
736 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
737 &ubd_dev->cow.bitmap_offset,
738 &ubd_dev->cow.bitmap_len,
739 &ubd_dev->cow.data_offset);
740 if(fd >= 0){
741 printk(KERN_INFO "Creating \"%s\" as COW file for "
742 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
743 }
744 }
745
746 if(fd < 0){
747 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
748 -fd);
749 return fd;
750 }
751 ubd_dev->fd = fd;
752
753 if(ubd_dev->cow.file != NULL){
754 err = -ENOMEM;
755 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
756 if(ubd_dev->cow.bitmap == NULL){
757 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
758 goto error;
759 }
760
761 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
762 ubd_dev->cow.bitmap_offset,
763 ubd_dev->cow.bitmap_len);
764 if(err < 0)
765 goto error;
766
767 flags = ubd_dev->openflags;
768 flags.w = 0;
769 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
770 NULL, NULL, NULL, NULL);
771 if(err < 0) goto error;
772 ubd_dev->cow.fd = err;
773 }
774 return 0;
775 error:
776 os_close_file(ubd_dev->fd);
777 return err;
778}
779
780static void ubd_device_release(struct device *dev)
781{
782 struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev);
783
784 blk_mq_free_tag_set(&ubd_dev->tag_set);
785 *ubd_dev = ((struct ubd) DEFAULT_UBD);
786}
787
788static ssize_t serial_show(struct device *dev,
789 struct device_attribute *attr, char *buf)
790{
791 struct gendisk *disk = dev_to_disk(dev);
792 struct ubd *ubd_dev = disk->private_data;
793
794 if (!ubd_dev)
795 return 0;
796
797 return sprintf(buf, "%s", ubd_dev->serial);
798}
799
800static DEVICE_ATTR_RO(serial);
801
802static struct attribute *ubd_attrs[] = {
803 &dev_attr_serial.attr,
804 NULL,
805};
806
807static umode_t ubd_attrs_are_visible(struct kobject *kobj,
808 struct attribute *a, int n)
809{
810 return a->mode;
811}
812
813static const struct attribute_group ubd_attr_group = {
814 .attrs = ubd_attrs,
815 .is_visible = ubd_attrs_are_visible,
816};
817
818static const struct attribute_group *ubd_attr_groups[] = {
819 &ubd_attr_group,
820 NULL,
821};
822
823#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
824
825static const struct blk_mq_ops ubd_mq_ops = {
826 .queue_rq = ubd_queue_rq,
827};
828
829static int ubd_add(int n, char **error_out)
830{
831 struct ubd *ubd_dev = &ubd_devs[n];
832 struct queue_limits lim = {
833 .max_segments = MAX_SG,
834 .seg_boundary_mask = PAGE_SIZE - 1,
835 .features = BLK_FEAT_WRITE_CACHE,
836 };
837 struct gendisk *disk;
838 int err = 0;
839
840 if(ubd_dev->file == NULL)
841 goto out;
842
843 if (ubd_dev->cow.file)
844 lim.max_hw_sectors = 8 * sizeof(long);
845 if (!ubd_dev->no_trim) {
846 lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
847 lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
848 }
849
850 err = ubd_file_size(ubd_dev, &ubd_dev->size);
851 if(err < 0){
852 *error_out = "Couldn't determine size of device's file";
853 goto out;
854 }
855
856 err = ubd_open_dev(ubd_dev);
857 if (err) {
858 pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
859 'a' + n, ubd_dev->file, -err);
860 goto out;
861 }
862
863 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
864
865 ubd_dev->tag_set.ops = &ubd_mq_ops;
866 ubd_dev->tag_set.queue_depth = 64;
867 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
868 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
869 ubd_dev->tag_set.driver_data = ubd_dev;
870 ubd_dev->tag_set.nr_hw_queues = 1;
871
872 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
873 if (err)
874 goto out_close;
875
876 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
877 if (IS_ERR(disk)) {
878 err = PTR_ERR(disk);
879 goto out_cleanup_tags;
880 }
881
882 disk->major = UBD_MAJOR;
883 disk->first_minor = n << UBD_SHIFT;
884 disk->minors = 1 << UBD_SHIFT;
885 disk->fops = &ubd_blops;
886 set_capacity(disk, ubd_dev->size / 512);
887 sprintf(disk->disk_name, "ubd%c", 'a' + n);
888 disk->private_data = ubd_dev;
889 set_disk_ro(disk, !ubd_dev->openflags.w);
890
891 ubd_dev->pdev.id = n;
892 ubd_dev->pdev.name = DRIVER_NAME;
893 ubd_dev->pdev.dev.release = ubd_device_release;
894 dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
895 platform_device_register(&ubd_dev->pdev);
896
897 err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
898 if (err)
899 goto out_cleanup_disk;
900
901 ubd_dev->disk = disk;
902
903 return 0;
904
905out_cleanup_disk:
906 put_disk(disk);
907out_cleanup_tags:
908 blk_mq_free_tag_set(&ubd_dev->tag_set);
909out_close:
910 ubd_close_dev(ubd_dev);
911out:
912 return err;
913}
914
915static int ubd_config(char *str, char **error_out)
916{
917 int n, ret;
918
919 /* This string is possibly broken up and stored, so it's only
920 * freed if ubd_setup_common fails, or if only general options
921 * were set.
922 */
923 str = kstrdup(str, GFP_KERNEL);
924 if (str == NULL) {
925 *error_out = "Failed to allocate memory";
926 return -ENOMEM;
927 }
928
929 ret = ubd_setup_common(str, &n, error_out);
930 if (ret)
931 goto err_free;
932
933 if (n == -1) {
934 ret = 0;
935 goto err_free;
936 }
937
938 mutex_lock(&ubd_lock);
939 ret = ubd_add(n, error_out);
940 if (ret)
941 ubd_devs[n].file = NULL;
942 mutex_unlock(&ubd_lock);
943
944out:
945 return ret;
946
947err_free:
948 kfree(str);
949 goto out;
950}
951
952static int ubd_get_config(char *name, char *str, int size, char **error_out)
953{
954 struct ubd *ubd_dev;
955 int n, len = 0;
956
957 n = parse_unit(&name);
958 if((n >= MAX_DEV) || (n < 0)){
959 *error_out = "ubd_get_config : device number out of range";
960 return -1;
961 }
962
963 ubd_dev = &ubd_devs[n];
964 mutex_lock(&ubd_lock);
965
966 if(ubd_dev->file == NULL){
967 CONFIG_CHUNK(str, size, len, "", 1);
968 goto out;
969 }
970
971 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
972
973 if(ubd_dev->cow.file != NULL){
974 CONFIG_CHUNK(str, size, len, ",", 0);
975 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
976 }
977 else CONFIG_CHUNK(str, size, len, "", 1);
978
979 out:
980 mutex_unlock(&ubd_lock);
981 return len;
982}
983
984static int ubd_id(char **str, int *start_out, int *end_out)
985{
986 int n;
987
988 n = parse_unit(str);
989 *start_out = 0;
990 *end_out = MAX_DEV - 1;
991 return n;
992}
993
994static int ubd_remove(int n, char **error_out)
995{
996 struct ubd *ubd_dev;
997 int err = -ENODEV;
998
999 mutex_lock(&ubd_lock);
1000
1001 ubd_dev = &ubd_devs[n];
1002
1003 if(ubd_dev->file == NULL)
1004 goto out;
1005
1006 if (ubd_dev->disk) {
1007 /* you cannot remove a open disk */
1008 err = -EBUSY;
1009 if (disk_openers(ubd_dev->disk))
1010 goto out;
1011
1012 del_gendisk(ubd_dev->disk);
1013 ubd_close_dev(ubd_dev);
1014 put_disk(ubd_dev->disk);
1015 }
1016
1017 err = 0;
1018 platform_device_unregister(&ubd_dev->pdev);
1019out:
1020 mutex_unlock(&ubd_lock);
1021 return err;
1022}
1023
1024/* All these are called by mconsole in process context and without
1025 * ubd-specific locks. The structure itself is const except for .list.
1026 */
1027static struct mc_device ubd_mc = {
1028 .list = LIST_HEAD_INIT(ubd_mc.list),
1029 .name = "ubd",
1030 .config = ubd_config,
1031 .get_config = ubd_get_config,
1032 .id = ubd_id,
1033 .remove = ubd_remove,
1034};
1035
1036static int __init ubd_mc_init(void)
1037{
1038 mconsole_register_dev(&ubd_mc);
1039 return 0;
1040}
1041
1042__initcall(ubd_mc_init);
1043
1044static int __init ubd0_init(void)
1045{
1046 struct ubd *ubd_dev = &ubd_devs[0];
1047
1048 mutex_lock(&ubd_lock);
1049 if(ubd_dev->file == NULL)
1050 ubd_dev->file = "root_fs";
1051 mutex_unlock(&ubd_lock);
1052
1053 return 0;
1054}
1055
1056__initcall(ubd0_init);
1057
1058/* Used in ubd_init, which is an initcall */
1059static struct platform_driver ubd_driver = {
1060 .driver = {
1061 .name = DRIVER_NAME,
1062 },
1063};
1064
1065static int __init ubd_init(void)
1066{
1067 char *error;
1068 int i, err;
1069
1070 if (register_blkdev(UBD_MAJOR, "ubd"))
1071 return -1;
1072
1073 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1074 sizeof(struct io_thread_req *),
1075 GFP_KERNEL
1076 );
1077 irq_remainder = 0;
1078
1079 if (irq_req_buffer == NULL) {
1080 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1081 return -ENOMEM;
1082 }
1083 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1084 sizeof(struct io_thread_req *),
1085 GFP_KERNEL
1086 );
1087
1088 io_remainder = 0;
1089
1090 if (io_req_buffer == NULL) {
1091 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1092 return -ENOMEM;
1093 }
1094 platform_driver_register(&ubd_driver);
1095 mutex_lock(&ubd_lock);
1096 for (i = 0; i < MAX_DEV; i++){
1097 err = ubd_add(i, &error);
1098 if(err)
1099 printk(KERN_ERR "Failed to initialize ubd device %d :"
1100 "%s\n", i, error);
1101 }
1102 mutex_unlock(&ubd_lock);
1103 return 0;
1104}
1105
1106late_initcall(ubd_init);
1107
1108static int __init ubd_driver_init(void){
1109 unsigned long stack;
1110 int err;
1111
1112 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1113 if(global_openflags.s){
1114 printk(KERN_INFO "ubd: Synchronous mode\n");
1115 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1116 * enough. So use anyway the io thread. */
1117 }
1118 stack = alloc_stack(0, 0);
1119 io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
1120 if(io_pid < 0){
1121 printk(KERN_ERR
1122 "ubd : Failed to start I/O thread (errno = %d) - "
1123 "falling back to synchronous I/O\n", -io_pid);
1124 io_pid = -1;
1125 return 0;
1126 }
1127 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1128 0, "ubd", ubd_devs);
1129 if(err < 0)
1130 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1131 return 0;
1132}
1133
1134device_initcall(ubd_driver_init);
1135
1136static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1137 __u64 *cow_offset, unsigned long *bitmap,
1138 __u64 bitmap_offset, unsigned long *bitmap_words,
1139 __u64 bitmap_len)
1140{
1141 __u64 sector = io_offset >> SECTOR_SHIFT;
1142 int i, update_bitmap = 0;
1143
1144 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1145 if(cow_mask != NULL)
1146 ubd_set_bit(i, (unsigned char *) cow_mask);
1147 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1148 continue;
1149
1150 update_bitmap = 1;
1151 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1152 }
1153
1154 if(!update_bitmap)
1155 return;
1156
1157 *cow_offset = sector / (sizeof(unsigned long) * 8);
1158
1159 /* This takes care of the case where we're exactly at the end of the
1160 * device, and *cow_offset + 1 is off the end. So, just back it up
1161 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1162 * for the original diagnosis.
1163 */
1164 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1165 sizeof(unsigned long)) - 1))
1166 (*cow_offset)--;
1167
1168 bitmap_words[0] = bitmap[*cow_offset];
1169 bitmap_words[1] = bitmap[*cow_offset + 1];
1170
1171 *cow_offset *= sizeof(unsigned long);
1172 *cow_offset += bitmap_offset;
1173}
1174
1175static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1176 unsigned long offset, unsigned long *bitmap,
1177 __u64 bitmap_offset, __u64 bitmap_len)
1178{
1179 __u64 sector = offset >> SECTOR_SHIFT;
1180 int i;
1181
1182 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1183 panic("Operation too long");
1184
1185 if (req_op(req->req) == REQ_OP_READ) {
1186 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1187 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1188 ubd_set_bit(i, (unsigned char *)
1189 &segment->sector_mask);
1190 }
1191 } else {
1192 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1193 &segment->cow_offset, bitmap, bitmap_offset,
1194 segment->bitmap_words, bitmap_len);
1195 }
1196}
1197
1198static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1199 struct request *req)
1200{
1201 struct bio_vec bvec;
1202 struct req_iterator iter;
1203 int i = 0;
1204 unsigned long byte_offset = io_req->offset;
1205 enum req_op op = req_op(req);
1206
1207 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1208 io_req->io_desc[0].buffer = NULL;
1209 io_req->io_desc[0].length = blk_rq_bytes(req);
1210 } else {
1211 rq_for_each_segment(bvec, req, iter) {
1212 BUG_ON(i >= io_req->desc_cnt);
1213
1214 io_req->io_desc[i].buffer = bvec_virt(&bvec);
1215 io_req->io_desc[i].length = bvec.bv_len;
1216 i++;
1217 }
1218 }
1219
1220 if (dev->cow.file) {
1221 for (i = 0; i < io_req->desc_cnt; i++) {
1222 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1223 dev->cow.bitmap, dev->cow.bitmap_offset,
1224 dev->cow.bitmap_len);
1225 byte_offset += io_req->io_desc[i].length;
1226 }
1227
1228 }
1229}
1230
1231static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1232 int desc_cnt)
1233{
1234 struct io_thread_req *io_req;
1235 int i;
1236
1237 io_req = kmalloc(sizeof(*io_req) +
1238 (desc_cnt * sizeof(struct io_desc)),
1239 GFP_ATOMIC);
1240 if (!io_req)
1241 return NULL;
1242
1243 io_req->req = req;
1244 if (dev->cow.file)
1245 io_req->fds[0] = dev->cow.fd;
1246 else
1247 io_req->fds[0] = dev->fd;
1248 io_req->error = 0;
1249 io_req->sectorsize = SECTOR_SIZE;
1250 io_req->fds[1] = dev->fd;
1251 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1252 io_req->offsets[0] = 0;
1253 io_req->offsets[1] = dev->cow.data_offset;
1254
1255 for (i = 0 ; i < desc_cnt; i++) {
1256 io_req->io_desc[i].sector_mask = 0;
1257 io_req->io_desc[i].cow_offset = -1;
1258 }
1259
1260 return io_req;
1261}
1262
1263static int ubd_submit_request(struct ubd *dev, struct request *req)
1264{
1265 int segs = 0;
1266 struct io_thread_req *io_req;
1267 int ret;
1268 enum req_op op = req_op(req);
1269
1270 if (op == REQ_OP_FLUSH)
1271 segs = 0;
1272 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1273 segs = 1;
1274 else
1275 segs = blk_rq_nr_phys_segments(req);
1276
1277 io_req = ubd_alloc_req(dev, req, segs);
1278 if (!io_req)
1279 return -ENOMEM;
1280
1281 io_req->desc_cnt = segs;
1282 if (segs)
1283 ubd_map_req(dev, io_req, req);
1284
1285 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1286 if (ret != sizeof(io_req)) {
1287 if (ret != -EAGAIN)
1288 pr_err("write to io thread failed: %d\n", -ret);
1289 kfree(io_req);
1290 }
1291 return ret;
1292}
1293
1294static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1295 const struct blk_mq_queue_data *bd)
1296{
1297 struct ubd *ubd_dev = hctx->queue->queuedata;
1298 struct request *req = bd->rq;
1299 int ret = 0, res = BLK_STS_OK;
1300
1301 blk_mq_start_request(req);
1302
1303 spin_lock_irq(&ubd_dev->lock);
1304
1305 switch (req_op(req)) {
1306 case REQ_OP_FLUSH:
1307 case REQ_OP_READ:
1308 case REQ_OP_WRITE:
1309 case REQ_OP_DISCARD:
1310 case REQ_OP_WRITE_ZEROES:
1311 ret = ubd_submit_request(ubd_dev, req);
1312 break;
1313 default:
1314 WARN_ON_ONCE(1);
1315 res = BLK_STS_NOTSUPP;
1316 }
1317
1318 spin_unlock_irq(&ubd_dev->lock);
1319
1320 if (ret < 0) {
1321 if (ret == -ENOMEM)
1322 res = BLK_STS_RESOURCE;
1323 else
1324 res = BLK_STS_DEV_RESOURCE;
1325 }
1326
1327 return res;
1328}
1329
1330static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1331{
1332 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1333
1334 geo->heads = 128;
1335 geo->sectors = 32;
1336 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1337 return 0;
1338}
1339
1340static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1341 unsigned int cmd, unsigned long arg)
1342{
1343 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1344 u16 ubd_id[ATA_ID_WORDS];
1345
1346 switch (cmd) {
1347 struct cdrom_volctrl volume;
1348 case HDIO_GET_IDENTITY:
1349 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1350 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1351 ubd_id[ATA_ID_HEADS] = 128;
1352 ubd_id[ATA_ID_SECTORS] = 32;
1353 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1354 sizeof(ubd_id)))
1355 return -EFAULT;
1356 return 0;
1357
1358 case CDROMVOLREAD:
1359 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1360 return -EFAULT;
1361 volume.channel0 = 255;
1362 volume.channel1 = 255;
1363 volume.channel2 = 255;
1364 volume.channel3 = 255;
1365 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1366 return -EFAULT;
1367 return 0;
1368 }
1369 return -EINVAL;
1370}
1371
1372static int map_error(int error_code)
1373{
1374 switch (error_code) {
1375 case 0:
1376 return BLK_STS_OK;
1377 case ENOSYS:
1378 case EOPNOTSUPP:
1379 return BLK_STS_NOTSUPP;
1380 case ENOSPC:
1381 return BLK_STS_NOSPC;
1382 }
1383 return BLK_STS_IOERR;
1384}
1385
1386/*
1387 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1388 *
1389 * The following functions are part of UML hypervisor code.
1390 * All functions from here onwards are executed as a helper
1391 * thread and are not allowed to execute any kernel functions.
1392 *
1393 * Any communication must occur strictly via shared memory and IPC.
1394 *
1395 * Do not add printks, locks, kernel memory operations, etc - it
1396 * will result in unpredictable behaviour and/or crashes.
1397 */
1398
1399static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1400{
1401 int n;
1402
1403 if (segment->cow_offset == -1)
1404 return map_error(0);
1405
1406 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1407 sizeof(segment->bitmap_words), segment->cow_offset);
1408 if (n != sizeof(segment->bitmap_words))
1409 return map_error(-n);
1410
1411 return map_error(0);
1412}
1413
1414static void do_io(struct io_thread_req *req, struct io_desc *desc)
1415{
1416 char *buf = NULL;
1417 unsigned long len;
1418 int n, nsectors, start, end, bit;
1419 __u64 off;
1420
1421 /* FLUSH is really a special case, we cannot "case" it with others */
1422
1423 if (req_op(req->req) == REQ_OP_FLUSH) {
1424 /* fds[0] is always either the rw image or our cow file */
1425 req->error = map_error(-os_sync_file(req->fds[0]));
1426 return;
1427 }
1428
1429 nsectors = desc->length / req->sectorsize;
1430 start = 0;
1431 do {
1432 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1433 end = start;
1434 while((end < nsectors) &&
1435 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1436 end++;
1437
1438 off = req->offset + req->offsets[bit] +
1439 start * req->sectorsize;
1440 len = (end - start) * req->sectorsize;
1441 if (desc->buffer != NULL)
1442 buf = &desc->buffer[start * req->sectorsize];
1443
1444 switch (req_op(req->req)) {
1445 case REQ_OP_READ:
1446 n = 0;
1447 do {
1448 buf = &buf[n];
1449 len -= n;
1450 n = os_pread_file(req->fds[bit], buf, len, off);
1451 if (n < 0) {
1452 req->error = map_error(-n);
1453 return;
1454 }
1455 } while((n < len) && (n != 0));
1456 if (n < len) memset(&buf[n], 0, len - n);
1457 break;
1458 case REQ_OP_WRITE:
1459 n = os_pwrite_file(req->fds[bit], buf, len, off);
1460 if(n != len){
1461 req->error = map_error(-n);
1462 return;
1463 }
1464 break;
1465 case REQ_OP_DISCARD:
1466 n = os_falloc_punch(req->fds[bit], off, len);
1467 if (n) {
1468 req->error = map_error(-n);
1469 return;
1470 }
1471 break;
1472 case REQ_OP_WRITE_ZEROES:
1473 n = os_falloc_zeroes(req->fds[bit], off, len);
1474 if (n) {
1475 req->error = map_error(-n);
1476 return;
1477 }
1478 break;
1479 default:
1480 WARN_ON_ONCE(1);
1481 req->error = BLK_STS_NOTSUPP;
1482 return;
1483 }
1484
1485 start = end;
1486 } while(start < nsectors);
1487
1488 req->offset += len;
1489 req->error = update_bitmap(req, desc);
1490}
1491
1492/* Changed in start_io_thread, which is serialized by being called only
1493 * from ubd_init, which is an initcall.
1494 */
1495int kernel_fd = -1;
1496
1497/* Only changed by the io thread. XXX: currently unused. */
1498static int io_count;
1499
1500int io_thread(void *arg)
1501{
1502 int n, count, written, res;
1503
1504 os_set_pdeathsig();
1505 os_fix_helper_signals();
1506
1507 while(1){
1508 n = bulk_req_safe_read(
1509 kernel_fd,
1510 io_req_buffer,
1511 &io_remainder,
1512 &io_remainder_size,
1513 UBD_REQ_BUFFER_SIZE
1514 );
1515 if (n <= 0) {
1516 if (n == -EAGAIN)
1517 ubd_read_poll(-1);
1518
1519 continue;
1520 }
1521
1522 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1523 struct io_thread_req *req = (*io_req_buffer)[count];
1524 int i;
1525
1526 io_count++;
1527 for (i = 0; !req->error && i < req->desc_cnt; i++)
1528 do_io(req, &(req->io_desc[i]));
1529
1530 }
1531
1532 written = 0;
1533
1534 do {
1535 res = os_write_file(kernel_fd,
1536 ((char *) io_req_buffer) + written,
1537 n - written);
1538 if (res >= 0) {
1539 written += res;
1540 }
1541 if (written < n) {
1542 ubd_write_poll(-1);
1543 }
1544 } while (written < n);
1545 }
1546
1547 return 0;
1548}