Linux Audio

Check our new training course

Loading...
v3.5.6
 
   1/*
 
 
   2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   3 * Licensed under the GPL
   4 */
   5
   6/* 2001-09-28...2002-04-17
   7 * Partition stuff by James_McMechan@hotmail.com
   8 * old style ubd by setting UBD_SHIFT to 0
   9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  10 * partitions have changed in 2.5
  11 * 2003-01-29 more tinkering for 2.5.59-1
  12 * This should now address the sysfs problems and has
  13 * the symlink for devfs to allow for booting with
  14 * the common /dev/ubd/discX/... names rather than
  15 * only /dev/ubdN/discN this version also has lots of
  16 * clean ups preparing for ubd-many.
  17 * James McMechan
  18 */
  19
  20#define UBD_SHIFT 4
  21
  22#include <linux/module.h>
  23#include <linux/init.h>
  24#include <linux/blkdev.h>
 
  25#include <linux/ata.h>
  26#include <linux/hdreg.h>
  27#include <linux/cdrom.h>
  28#include <linux/proc_fs.h>
  29#include <linux/seq_file.h>
  30#include <linux/ctype.h>
  31#include <linux/slab.h>
  32#include <linux/vmalloc.h>
  33#include <linux/platform_device.h>
  34#include <linux/scatterlist.h>
  35#include <asm/tlbflush.h>
  36#include "kern_util.h"
  37#include "mconsole_kern.h"
  38#include "init.h"
  39#include "irq_kern.h"
  40#include "ubd.h"
  41#include "os.h"
  42#include "cow.h"
  43
  44enum ubd_req { UBD_READ, UBD_WRITE };
 
  45
  46struct io_thread_req {
  47	struct request *req;
  48	enum ubd_req op;
  49	int fds[2];
  50	unsigned long offsets[2];
  51	unsigned long long offset;
  52	unsigned long length;
  53	char *buffer;
  54	int sectorsize;
  55	unsigned long sector_mask;
  56	unsigned long long cow_offset;
  57	unsigned long bitmap_words[2];
  58	int error;
  59};
  60
 
 
 
 
 
 
 
 
 
 
 
  61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  62{
  63	__u64 n;
  64	int bits, off;
  65
  66	bits = sizeof(data[0]) * 8;
  67	n = bit / bits;
  68	off = bit % bits;
  69	return (data[n] & (1 << off)) != 0;
  70}
  71
  72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  73{
  74	__u64 n;
  75	int bits, off;
  76
  77	bits = sizeof(data[0]) * 8;
  78	n = bit / bits;
  79	off = bit % bits;
  80	data[n] |= (1 << off);
  81}
  82/*End stuff from ubd_user.h*/
  83
  84#define DRIVER_NAME "uml-blkdev"
  85
  86static DEFINE_MUTEX(ubd_lock);
  87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
  88
  89static int ubd_open(struct block_device *bdev, fmode_t mode);
  90static int ubd_release(struct gendisk *disk, fmode_t mode);
  91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
  92		     unsigned int cmd, unsigned long arg);
  93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
  94
  95#define MAX_DEV (16)
  96
  97static const struct block_device_operations ubd_blops = {
  98        .owner		= THIS_MODULE,
  99        .open		= ubd_open,
 100        .release	= ubd_release,
 101        .ioctl		= ubd_ioctl,
 
 102	.getgeo		= ubd_getgeo,
 103};
 104
 105/* Protected by ubd_lock */
 106static int fake_major = UBD_MAJOR;
 107static struct gendisk *ubd_gendisk[MAX_DEV];
 108static struct gendisk *fake_gendisk[MAX_DEV];
 109
 110#ifdef CONFIG_BLK_DEV_UBD_SYNC
 111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 112					 .cl = 1 })
 113#else
 114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 115					 .cl = 1 })
 116#endif
 117static struct openflags global_openflags = OPEN_FLAGS;
 118
 119struct cow {
 120	/* backing file name */
 121	char *file;
 122	/* backing file fd */
 123	int fd;
 124	unsigned long *bitmap;
 125	unsigned long bitmap_len;
 126	int bitmap_offset;
 127	int data_offset;
 128};
 129
 130#define MAX_SG 64
 131
 132struct ubd {
 133	struct list_head restart;
 134	/* name (and fd, below) of the file opened for writing, either the
 135	 * backing or the cow file. */
 136	char *file;
 137	int count;
 138	int fd;
 139	__u64 size;
 140	struct openflags boot_openflags;
 141	struct openflags openflags;
 142	unsigned shared:1;
 143	unsigned no_cow:1;
 
 144	struct cow cow;
 145	struct platform_device pdev;
 146	struct request_queue *queue;
 
 147	spinlock_t lock;
 148	struct scatterlist sg[MAX_SG];
 149	struct request *request;
 150	int start_sg, end_sg;
 151	sector_t rq_pos;
 152};
 153
 154#define DEFAULT_COW { \
 155	.file =			NULL, \
 156	.fd =			-1,	\
 157	.bitmap =		NULL, \
 158	.bitmap_offset =	0, \
 159	.data_offset =		0, \
 160}
 161
 162#define DEFAULT_UBD { \
 163	.file = 		NULL, \
 164	.count =		0, \
 165	.fd =			-1, \
 166	.size =			-1, \
 167	.boot_openflags =	OPEN_FLAGS, \
 168	.openflags =		OPEN_FLAGS, \
 169	.no_cow =               0, \
 
 170	.shared =		0, \
 171	.cow =			DEFAULT_COW, \
 172	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 173	.request =		NULL, \
 174	.start_sg =		0, \
 175	.end_sg =		0, \
 176	.rq_pos =		0, \
 177}
 178
 179/* Protected by ubd_lock */
 180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 181
 182/* Only changed by fake_ide_setup which is a setup */
 183static int fake_ide = 0;
 184static struct proc_dir_entry *proc_ide_root = NULL;
 185static struct proc_dir_entry *proc_ide = NULL;
 186
 
 
 
 187static void make_proc_ide(void)
 188{
 189	proc_ide_root = proc_mkdir("ide", NULL);
 190	proc_ide = proc_mkdir("ide0", proc_ide_root);
 191}
 192
 193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
 194{
 195	seq_puts(m, "disk\n");
 196	return 0;
 197}
 198
 199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
 200{
 201	return single_open(file, fake_ide_media_proc_show, NULL);
 202}
 203
 204static const struct file_operations fake_ide_media_proc_fops = {
 205	.owner		= THIS_MODULE,
 206	.open		= fake_ide_media_proc_open,
 207	.read		= seq_read,
 208	.llseek		= seq_lseek,
 209	.release	= single_release,
 210};
 211
 212static void make_ide_entries(const char *dev_name)
 213{
 214	struct proc_dir_entry *dir, *ent;
 215	char name[64];
 216
 217	if(proc_ide_root == NULL) make_proc_ide();
 218
 219	dir = proc_mkdir(dev_name, proc_ide);
 220	if(!dir) return;
 221
 222	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
 
 223	if(!ent) return;
 224	snprintf(name, sizeof(name), "ide0/%s", dev_name);
 225	proc_symlink(dev_name, proc_ide_root, name);
 226}
 227
 228static int fake_ide_setup(char *str)
 229{
 230	fake_ide = 1;
 231	return 1;
 232}
 233
 234__setup("fake_ide", fake_ide_setup);
 235
 236__uml_help(fake_ide_setup,
 237"fake_ide\n"
 238"    Create ide0 entries that map onto ubd devices.\n\n"
 239);
 240
 241static int parse_unit(char **ptr)
 242{
 243	char *str = *ptr, *end;
 244	int n = -1;
 245
 246	if(isdigit(*str)) {
 247		n = simple_strtoul(str, &end, 0);
 248		if(end == str)
 249			return -1;
 250		*ptr = end;
 251	}
 252	else if (('a' <= *str) && (*str <= 'z')) {
 253		n = *str - 'a';
 254		str++;
 255		*ptr = str;
 256	}
 257	return n;
 258}
 259
 260/* If *index_out == -1 at exit, the passed option was a general one;
 261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 262 * should not be freed on exit.
 263 */
 264static int ubd_setup_common(char *str, int *index_out, char **error_out)
 265{
 266	struct ubd *ubd_dev;
 267	struct openflags flags = global_openflags;
 268	char *backing_file;
 269	int n, err = 0, i;
 270
 271	if(index_out) *index_out = -1;
 272	n = *str;
 273	if(n == '='){
 274		char *end;
 275		int major;
 276
 277		str++;
 278		if(!strcmp(str, "sync")){
 279			global_openflags = of_sync(global_openflags);
 280			goto out1;
 281		}
 282
 283		err = -EINVAL;
 284		major = simple_strtoul(str, &end, 0);
 285		if((*end != '\0') || (end == str)){
 286			*error_out = "Didn't parse major number";
 287			goto out1;
 288		}
 289
 290		mutex_lock(&ubd_lock);
 291		if (fake_major != UBD_MAJOR) {
 292			*error_out = "Can't assign a fake major twice";
 293			goto out1;
 294		}
 295
 296		fake_major = major;
 297
 298		printk(KERN_INFO "Setting extra ubd major number to %d\n",
 299		       major);
 300		err = 0;
 301	out1:
 302		mutex_unlock(&ubd_lock);
 303		return err;
 304	}
 305
 306	n = parse_unit(&str);
 307	if(n < 0){
 308		*error_out = "Couldn't parse device number";
 309		return -EINVAL;
 310	}
 311	if(n >= MAX_DEV){
 312		*error_out = "Device number out of range";
 313		return 1;
 314	}
 315
 316	err = -EBUSY;
 317	mutex_lock(&ubd_lock);
 318
 319	ubd_dev = &ubd_devs[n];
 320	if(ubd_dev->file != NULL){
 321		*error_out = "Device is already configured";
 322		goto out;
 323	}
 324
 325	if (index_out)
 326		*index_out = n;
 327
 328	err = -EINVAL;
 329	for (i = 0; i < sizeof("rscd="); i++) {
 330		switch (*str) {
 331		case 'r':
 332			flags.w = 0;
 333			break;
 334		case 's':
 335			flags.s = 1;
 336			break;
 337		case 'd':
 338			ubd_dev->no_cow = 1;
 339			break;
 340		case 'c':
 341			ubd_dev->shared = 1;
 342			break;
 
 
 
 343		case '=':
 344			str++;
 345			goto break_loop;
 346		default:
 347			*error_out = "Expected '=' or flag letter "
 348				"(r, s, c, or d)";
 349			goto out;
 350		}
 351		str++;
 352	}
 353
 354	if (*str == '=')
 355		*error_out = "Too many flags specified";
 356	else
 357		*error_out = "Missing '='";
 358	goto out;
 359
 360break_loop:
 361	backing_file = strchr(str, ',');
 362
 363	if (backing_file == NULL)
 364		backing_file = strchr(str, ':');
 365
 366	if(backing_file != NULL){
 367		if(ubd_dev->no_cow){
 368			*error_out = "Can't specify both 'd' and a cow file";
 369			goto out;
 370		}
 371		else {
 372			*backing_file = '\0';
 373			backing_file++;
 374		}
 375	}
 376	err = 0;
 377	ubd_dev->file = str;
 378	ubd_dev->cow.file = backing_file;
 379	ubd_dev->boot_openflags = flags;
 380out:
 381	mutex_unlock(&ubd_lock);
 382	return err;
 383}
 384
 385static int ubd_setup(char *str)
 386{
 387	char *error;
 388	int err;
 389
 390	err = ubd_setup_common(str, NULL, &error);
 391	if(err)
 392		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 393		       "%s\n", str, error);
 394	return 1;
 395}
 396
 397__setup("ubd", ubd_setup);
 398__uml_help(ubd_setup,
 399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
 400"    This is used to associate a device with a file in the underlying\n"
 401"    filesystem. When specifying two filenames, the first one is the\n"
 402"    COW name and the second is the backing file name. As separator you can\n"
 403"    use either a ':' or a ',': the first one allows writing things like;\n"
 404"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 405"    while with a ',' the shell would not expand the 2nd '~'.\n"
 406"    When using only one filename, UML will detect whether to treat it like\n"
 407"    a COW file or a backing file. To override this detection, add the 'd'\n"
 408"    flag:\n"
 409"	ubd0d=BackingFile\n"
 410"    Usually, there is a filesystem in the file, but \n"
 411"    that's not required. Swap devices containing swap files can be\n"
 412"    specified like this. Also, a file which doesn't contain a\n"
 413"    filesystem can have its contents read in the virtual \n"
 414"    machine by running 'dd' on the device. <n> must be in the range\n"
 415"    0 to 7. Appending an 'r' to the number will cause that device\n"
 416"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 417"    an 's' will cause data to be written to disk on the host immediately.\n"
 418"    'c' will cause the device to be treated as being shared between multiple\n"
 419"    UMLs and file locking will be turned off - this is appropriate for a\n"
 420"    cluster filesystem and inappropriate at almost all other times.\n\n"
 
 421);
 422
 423static int udb_setup(char *str)
 424{
 425	printk("udb%s specified on command line is almost certainly a ubd -> "
 426	       "udb TYPO\n", str);
 427	return 1;
 428}
 429
 430__setup("udb", udb_setup);
 431__uml_help(udb_setup,
 432"udb\n"
 433"    This option is here solely to catch ubd -> udb typos, which can be\n"
 434"    to impossible to catch visually unless you specifically look for\n"
 435"    them.  The only result of any option starting with 'udb' is an error\n"
 436"    in the boot output.\n\n"
 437);
 438
 439static void do_ubd_request(struct request_queue * q);
 440
 441/* Only changed by ubd_init, which is an initcall. */
 442static int thread_fd = -1;
 443static LIST_HEAD(restart);
 444
 445/* XXX - move this inside ubd_intr. */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 446/* Called without dev->lock held, and only in interrupt context. */
 447static void ubd_handler(void)
 448{
 449	struct io_thread_req *req;
 450	struct ubd *ubd;
 451	struct list_head *list, *next_ele;
 452	unsigned long flags;
 453	int n;
 
 454
 455	while(1){
 456		n = os_read_file(thread_fd, &req,
 457				 sizeof(struct io_thread_req *));
 458		if(n != sizeof(req)){
 
 
 
 
 
 459			if(n == -EAGAIN)
 460				break;
 461			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 462			       "err = %d\n", -n);
 463			return;
 464		}
 
 
 465
 466		blk_end_request(req->req, 0, req->length);
 467		kfree(req);
 468	}
 469	reactivate_fd(thread_fd, UBD_IRQ);
 470
 471	list_for_each_safe(list, next_ele, &restart){
 472		ubd = container_of(list, struct ubd, restart);
 473		list_del_init(&ubd->restart);
 474		spin_lock_irqsave(&ubd->lock, flags);
 475		do_ubd_request(ubd->queue);
 476		spin_unlock_irqrestore(&ubd->lock, flags);
 
 
 477	}
 478}
 479
 480static irqreturn_t ubd_intr(int irq, void *dev)
 481{
 482	ubd_handler();
 483	return IRQ_HANDLED;
 484}
 485
 486/* Only changed by ubd_init, which is an initcall. */
 487static int io_pid = -1;
 488
 489static void kill_io_thread(void)
 490{
 491	if(io_pid != -1)
 492		os_kill_process(io_pid, 1);
 493}
 494
 495__uml_exitcall(kill_io_thread);
 496
 497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 498{
 499	char *file;
 500	int fd;
 501	int err;
 502
 503	__u32 version;
 504	__u32 align;
 505	char *backing_file;
 506	time_t mtime;
 507	unsigned long long size;
 508	int sector_size;
 509	int bitmap_offset;
 510
 511	if (ubd_dev->file && ubd_dev->cow.file) {
 512		file = ubd_dev->cow.file;
 513
 514		goto out;
 515	}
 516
 517	fd = os_open_file(ubd_dev->file, global_openflags, 0);
 518	if (fd < 0)
 519		return fd;
 520
 521	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 522		&mtime, &size, &sector_size, &align, &bitmap_offset);
 523	os_close_file(fd);
 524
 525	if(err == -EINVAL)
 526		file = ubd_dev->file;
 527	else
 528		file = backing_file;
 529
 530out:
 531	return os_file_size(file, size_out);
 532}
 533
 534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 535{
 536	int err;
 537
 538	err = os_seek_file(fd, offset);
 539	if (err < 0)
 540		return err;
 541
 542	err = os_read_file(fd, buf, len);
 543	if (err < 0)
 544		return err;
 545
 546	return 0;
 547}
 548
 549static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 550{
 551	unsigned long modtime;
 552	unsigned long long actual;
 553	int err;
 554
 555	err = os_file_modtime(file, &modtime);
 556	if (err < 0) {
 557		printk(KERN_ERR "Failed to get modification time of backing "
 558		       "file \"%s\", err = %d\n", file, -err);
 559		return err;
 560	}
 561
 562	err = os_file_size(file, &actual);
 563	if (err < 0) {
 564		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 565		       "err = %d\n", file, -err);
 566		return err;
 567	}
 568
 569	if (actual != size) {
 570		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 571		 * the typecast.*/
 572		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 573		       "vs backing file\n", (unsigned long long) size, actual);
 574		return -EINVAL;
 575	}
 576	if (modtime != mtime) {
 577		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
 578		       "backing file\n", mtime, modtime);
 579		return -EINVAL;
 580	}
 581	return 0;
 582}
 583
 584static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 585{
 586	struct uml_stat buf1, buf2;
 587	int err;
 588
 589	if (from_cmdline == NULL)
 590		return 0;
 591	if (!strcmp(from_cmdline, from_cow))
 592		return 0;
 593
 594	err = os_stat_file(from_cmdline, &buf1);
 595	if (err < 0) {
 596		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 597		       -err);
 598		return 0;
 599	}
 600	err = os_stat_file(from_cow, &buf2);
 601	if (err < 0) {
 602		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 603		       -err);
 604		return 1;
 605	}
 606	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 607		return 0;
 608
 609	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 610	       "\"%s\" specified in COW header of \"%s\"\n",
 611	       from_cmdline, from_cow, cow);
 612	return 1;
 613}
 614
 615static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 616		  char **backing_file_out, int *bitmap_offset_out,
 617		  unsigned long *bitmap_len_out, int *data_offset_out,
 618		  int *create_cow_out)
 619{
 620	time_t mtime;
 621	unsigned long long size;
 622	__u32 version, align;
 623	char *backing_file;
 624	int fd, err, sectorsize, asked_switch, mode = 0644;
 625
 626	fd = os_open_file(file, *openflags, mode);
 627	if (fd < 0) {
 628		if ((fd == -ENOENT) && (create_cow_out != NULL))
 629			*create_cow_out = 1;
 630		if (!openflags->w ||
 631		    ((fd != -EROFS) && (fd != -EACCES)))
 632			return fd;
 633		openflags->w = 0;
 634		fd = os_open_file(file, *openflags, mode);
 635		if (fd < 0)
 636			return fd;
 637	}
 638
 639	if (shared)
 640		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 641	else {
 642		err = os_lock_file(fd, openflags->w);
 643		if (err < 0) {
 644			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 645			       file, -err);
 646			goto out_close;
 647		}
 648	}
 649
 650	/* Successful return case! */
 651	if (backing_file_out == NULL)
 652		return fd;
 653
 654	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 655			      &size, &sectorsize, &align, bitmap_offset_out);
 656	if (err && (*backing_file_out != NULL)) {
 657		printk(KERN_ERR "Failed to read COW header from COW file "
 658		       "\"%s\", errno = %d\n", file, -err);
 659		goto out_close;
 660	}
 661	if (err)
 662		return fd;
 663
 664	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 665					    file);
 666
 667	/* Allow switching only if no mismatch. */
 668	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 669						   mtime)) {
 670		printk(KERN_ERR "Switching backing file to '%s'\n",
 671		       *backing_file_out);
 672		err = write_cow_header(file, fd, *backing_file_out,
 673				       sectorsize, align, &size);
 674		if (err) {
 675			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 676			goto out_close;
 677		}
 678	} else {
 679		*backing_file_out = backing_file;
 680		err = backing_file_mismatch(*backing_file_out, size, mtime);
 681		if (err)
 682			goto out_close;
 683	}
 684
 685	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 686		  bitmap_len_out, data_offset_out);
 687
 688	return fd;
 689 out_close:
 690	os_close_file(fd);
 691	return err;
 692}
 693
 694static int create_cow_file(char *cow_file, char *backing_file,
 695		    struct openflags flags,
 696		    int sectorsize, int alignment, int *bitmap_offset_out,
 697		    unsigned long *bitmap_len_out, int *data_offset_out)
 698{
 699	int err, fd;
 700
 701	flags.c = 1;
 702	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 703	if (fd < 0) {
 704		err = fd;
 705		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 706		       cow_file, -err);
 707		goto out;
 708	}
 709
 710	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 711			    bitmap_offset_out, bitmap_len_out,
 712			    data_offset_out);
 713	if (!err)
 714		return fd;
 715	os_close_file(fd);
 716 out:
 717	return err;
 718}
 719
 720static void ubd_close_dev(struct ubd *ubd_dev)
 721{
 722	os_close_file(ubd_dev->fd);
 723	if(ubd_dev->cow.file == NULL)
 724		return;
 725
 726	os_close_file(ubd_dev->cow.fd);
 727	vfree(ubd_dev->cow.bitmap);
 728	ubd_dev->cow.bitmap = NULL;
 729}
 730
 731static int ubd_open_dev(struct ubd *ubd_dev)
 732{
 733	struct openflags flags;
 734	char **back_ptr;
 735	int err, create_cow, *create_ptr;
 736	int fd;
 737
 738	ubd_dev->openflags = ubd_dev->boot_openflags;
 739	create_cow = 0;
 740	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 741	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 742
 743	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 744				back_ptr, &ubd_dev->cow.bitmap_offset,
 745				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 746				create_ptr);
 747
 748	if((fd == -ENOENT) && create_cow){
 749		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 750					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
 751					  &ubd_dev->cow.bitmap_offset,
 752					  &ubd_dev->cow.bitmap_len,
 753					  &ubd_dev->cow.data_offset);
 754		if(fd >= 0){
 755			printk(KERN_INFO "Creating \"%s\" as COW file for "
 756			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 757		}
 758	}
 759
 760	if(fd < 0){
 761		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 762		       -fd);
 763		return fd;
 764	}
 765	ubd_dev->fd = fd;
 766
 767	if(ubd_dev->cow.file != NULL){
 768		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 769
 770		err = -ENOMEM;
 771		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 772		if(ubd_dev->cow.bitmap == NULL){
 773			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 774			goto error;
 775		}
 776		flush_tlb_kernel_vm();
 777
 778		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 779				      ubd_dev->cow.bitmap_offset,
 780				      ubd_dev->cow.bitmap_len);
 781		if(err < 0)
 782			goto error;
 783
 784		flags = ubd_dev->openflags;
 785		flags.w = 0;
 786		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 787				    NULL, NULL, NULL, NULL);
 788		if(err < 0) goto error;
 789		ubd_dev->cow.fd = err;
 790	}
 
 
 
 
 
 
 
 
 791	return 0;
 792 error:
 793	os_close_file(ubd_dev->fd);
 794	return err;
 795}
 796
 797static void ubd_device_release(struct device *dev)
 798{
 799	struct ubd *ubd_dev = dev_get_drvdata(dev);
 800
 801	blk_cleanup_queue(ubd_dev->queue);
 
 802	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 803}
 804
 805static int ubd_disk_register(int major, u64 size, int unit,
 806			     struct gendisk **disk_out)
 807{
 
 808	struct gendisk *disk;
 809
 810	disk = alloc_disk(1 << UBD_SHIFT);
 811	if(disk == NULL)
 812		return -ENOMEM;
 813
 814	disk->major = major;
 815	disk->first_minor = unit << UBD_SHIFT;
 816	disk->fops = &ubd_blops;
 817	set_capacity(disk, size / 512);
 818	if (major == UBD_MAJOR)
 819		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 820	else
 821		sprintf(disk->disk_name, "ubd_fake%d", unit);
 822
 823	/* sysfs register (not for ide fake devices) */
 824	if (major == UBD_MAJOR) {
 825		ubd_devs[unit].pdev.id   = unit;
 826		ubd_devs[unit].pdev.name = DRIVER_NAME;
 827		ubd_devs[unit].pdev.dev.release = ubd_device_release;
 828		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 829		platform_device_register(&ubd_devs[unit].pdev);
 830		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
 831	}
 832
 833	disk->private_data = &ubd_devs[unit];
 834	disk->queue = ubd_devs[unit].queue;
 835	add_disk(disk);
 836
 837	*disk_out = disk;
 838	return 0;
 839}
 840
 841#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
 
 
 
 
 842
 843static int ubd_add(int n, char **error_out)
 844{
 845	struct ubd *ubd_dev = &ubd_devs[n];
 846	int err = 0;
 847
 848	if(ubd_dev->file == NULL)
 849		goto out;
 850
 851	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 852	if(err < 0){
 853		*error_out = "Couldn't determine size of device's file";
 854		goto out;
 855	}
 856
 857	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 858
 859	INIT_LIST_HEAD(&ubd_dev->restart);
 860	sg_init_table(ubd_dev->sg, MAX_SG);
 
 
 
 
 861
 862	err = -ENOMEM;
 863	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
 864	if (ubd_dev->queue == NULL) {
 865		*error_out = "Failed to initialize device queue";
 866		goto out;
 
 
 
 
 
 867	}
 
 868	ubd_dev->queue->queuedata = ubd_dev;
 
 869
 870	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 871	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 872	if(err){
 873		*error_out = "Failed to register device";
 874		goto out_cleanup;
 875	}
 876
 877	if (fake_major != UBD_MAJOR)
 878		ubd_disk_register(fake_major, ubd_dev->size, n,
 879				  &fake_gendisk[n]);
 880
 881	/*
 882	 * Perhaps this should also be under the "if (fake_major)" above
 883	 * using the fake_disk->disk_name
 884	 */
 885	if (fake_ide)
 886		make_ide_entries(ubd_gendisk[n]->disk_name);
 887
 888	err = 0;
 889out:
 890	return err;
 891
 892out_cleanup:
 893	blk_cleanup_queue(ubd_dev->queue);
 
 
 894	goto out;
 895}
 896
 897static int ubd_config(char *str, char **error_out)
 898{
 899	int n, ret;
 900
 901	/* This string is possibly broken up and stored, so it's only
 902	 * freed if ubd_setup_common fails, or if only general options
 903	 * were set.
 904	 */
 905	str = kstrdup(str, GFP_KERNEL);
 906	if (str == NULL) {
 907		*error_out = "Failed to allocate memory";
 908		return -ENOMEM;
 909	}
 910
 911	ret = ubd_setup_common(str, &n, error_out);
 912	if (ret)
 913		goto err_free;
 914
 915	if (n == -1) {
 916		ret = 0;
 917		goto err_free;
 918	}
 919
 920	mutex_lock(&ubd_lock);
 921	ret = ubd_add(n, error_out);
 922	if (ret)
 923		ubd_devs[n].file = NULL;
 924	mutex_unlock(&ubd_lock);
 925
 926out:
 927	return ret;
 928
 929err_free:
 930	kfree(str);
 931	goto out;
 932}
 933
 934static int ubd_get_config(char *name, char *str, int size, char **error_out)
 935{
 936	struct ubd *ubd_dev;
 937	int n, len = 0;
 938
 939	n = parse_unit(&name);
 940	if((n >= MAX_DEV) || (n < 0)){
 941		*error_out = "ubd_get_config : device number out of range";
 942		return -1;
 943	}
 944
 945	ubd_dev = &ubd_devs[n];
 946	mutex_lock(&ubd_lock);
 947
 948	if(ubd_dev->file == NULL){
 949		CONFIG_CHUNK(str, size, len, "", 1);
 950		goto out;
 951	}
 952
 953	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 954
 955	if(ubd_dev->cow.file != NULL){
 956		CONFIG_CHUNK(str, size, len, ",", 0);
 957		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 958	}
 959	else CONFIG_CHUNK(str, size, len, "", 1);
 960
 961 out:
 962	mutex_unlock(&ubd_lock);
 963	return len;
 964}
 965
 966static int ubd_id(char **str, int *start_out, int *end_out)
 967{
 968	int n;
 969
 970	n = parse_unit(str);
 971	*start_out = 0;
 972	*end_out = MAX_DEV - 1;
 973	return n;
 974}
 975
 976static int ubd_remove(int n, char **error_out)
 977{
 978	struct gendisk *disk = ubd_gendisk[n];
 979	struct ubd *ubd_dev;
 980	int err = -ENODEV;
 981
 982	mutex_lock(&ubd_lock);
 983
 984	ubd_dev = &ubd_devs[n];
 985
 986	if(ubd_dev->file == NULL)
 987		goto out;
 988
 989	/* you cannot remove a open disk */
 990	err = -EBUSY;
 991	if(ubd_dev->count > 0)
 992		goto out;
 993
 994	ubd_gendisk[n] = NULL;
 995	if(disk != NULL){
 996		del_gendisk(disk);
 997		put_disk(disk);
 998	}
 999
1000	if(fake_gendisk[n] != NULL){
1001		del_gendisk(fake_gendisk[n]);
1002		put_disk(fake_gendisk[n]);
1003		fake_gendisk[n] = NULL;
1004	}
1005
1006	err = 0;
1007	platform_device_unregister(&ubd_dev->pdev);
1008out:
1009	mutex_unlock(&ubd_lock);
1010	return err;
1011}
1012
1013/* All these are called by mconsole in process context and without
1014 * ubd-specific locks.  The structure itself is const except for .list.
1015 */
1016static struct mc_device ubd_mc = {
1017	.list		= LIST_HEAD_INIT(ubd_mc.list),
1018	.name		= "ubd",
1019	.config		= ubd_config,
1020	.get_config	= ubd_get_config,
1021	.id		= ubd_id,
1022	.remove		= ubd_remove,
1023};
1024
1025static int __init ubd_mc_init(void)
1026{
1027	mconsole_register_dev(&ubd_mc);
1028	return 0;
1029}
1030
1031__initcall(ubd_mc_init);
1032
1033static int __init ubd0_init(void)
1034{
1035	struct ubd *ubd_dev = &ubd_devs[0];
1036
1037	mutex_lock(&ubd_lock);
1038	if(ubd_dev->file == NULL)
1039		ubd_dev->file = "root_fs";
1040	mutex_unlock(&ubd_lock);
1041
1042	return 0;
1043}
1044
1045__initcall(ubd0_init);
1046
1047/* Used in ubd_init, which is an initcall */
1048static struct platform_driver ubd_driver = {
1049	.driver = {
1050		.name  = DRIVER_NAME,
1051	},
1052};
1053
1054static int __init ubd_init(void)
1055{
1056	char *error;
1057	int i, err;
1058
1059	if (register_blkdev(UBD_MAJOR, "ubd"))
1060		return -1;
1061
1062	if (fake_major != UBD_MAJOR) {
1063		char name[sizeof("ubd_nnn\0")];
1064
1065		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1066		if (register_blkdev(fake_major, "ubd"))
1067			return -1;
1068	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1069	platform_driver_register(&ubd_driver);
1070	mutex_lock(&ubd_lock);
1071	for (i = 0; i < MAX_DEV; i++){
1072		err = ubd_add(i, &error);
1073		if(err)
1074			printk(KERN_ERR "Failed to initialize ubd device %d :"
1075			       "%s\n", i, error);
1076	}
1077	mutex_unlock(&ubd_lock);
1078	return 0;
1079}
1080
1081late_initcall(ubd_init);
1082
1083static int __init ubd_driver_init(void){
1084	unsigned long stack;
1085	int err;
1086
1087	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1088	if(global_openflags.s){
1089		printk(KERN_INFO "ubd: Synchronous mode\n");
1090		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1091		 * enough. So use anyway the io thread. */
1092	}
1093	stack = alloc_stack(0, 0);
1094	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1095				 &thread_fd);
1096	if(io_pid < 0){
1097		printk(KERN_ERR
1098		       "ubd : Failed to start I/O thread (errno = %d) - "
1099		       "falling back to synchronous I/O\n", -io_pid);
1100		io_pid = -1;
1101		return 0;
1102	}
1103	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1104			     0, "ubd", ubd_devs);
1105	if(err != 0)
1106		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1107	return 0;
1108}
1109
1110device_initcall(ubd_driver_init);
1111
1112static int ubd_open(struct block_device *bdev, fmode_t mode)
1113{
1114	struct gendisk *disk = bdev->bd_disk;
1115	struct ubd *ubd_dev = disk->private_data;
1116	int err = 0;
1117
1118	mutex_lock(&ubd_mutex);
1119	if(ubd_dev->count == 0){
1120		err = ubd_open_dev(ubd_dev);
1121		if(err){
1122			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1123			       disk->disk_name, ubd_dev->file, -err);
1124			goto out;
1125		}
1126	}
1127	ubd_dev->count++;
1128	set_disk_ro(disk, !ubd_dev->openflags.w);
1129
1130	/* This should no more be needed. And it didn't work anyway to exclude
1131	 * read-write remounting of filesystems.*/
1132	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1133	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1134	        err = -EROFS;
1135	}*/
1136out:
1137	mutex_unlock(&ubd_mutex);
1138	return err;
1139}
1140
1141static int ubd_release(struct gendisk *disk, fmode_t mode)
1142{
1143	struct ubd *ubd_dev = disk->private_data;
1144
1145	mutex_lock(&ubd_mutex);
1146	if(--ubd_dev->count == 0)
1147		ubd_close_dev(ubd_dev);
1148	mutex_unlock(&ubd_mutex);
1149	return 0;
1150}
1151
1152static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1153			  __u64 *cow_offset, unsigned long *bitmap,
1154			  __u64 bitmap_offset, unsigned long *bitmap_words,
1155			  __u64 bitmap_len)
1156{
1157	__u64 sector = io_offset >> 9;
1158	int i, update_bitmap = 0;
1159
1160	for(i = 0; i < length >> 9; i++){
1161		if(cow_mask != NULL)
1162			ubd_set_bit(i, (unsigned char *) cow_mask);
1163		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1164			continue;
1165
1166		update_bitmap = 1;
1167		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1168	}
1169
1170	if(!update_bitmap)
1171		return;
1172
1173	*cow_offset = sector / (sizeof(unsigned long) * 8);
1174
1175	/* This takes care of the case where we're exactly at the end of the
1176	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1177	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1178	 * for the original diagnosis.
1179	 */
1180	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1181					 sizeof(unsigned long)) - 1))
1182		(*cow_offset)--;
1183
1184	bitmap_words[0] = bitmap[*cow_offset];
1185	bitmap_words[1] = bitmap[*cow_offset + 1];
1186
1187	*cow_offset *= sizeof(unsigned long);
1188	*cow_offset += bitmap_offset;
1189}
1190
1191static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1192		       __u64 bitmap_offset, __u64 bitmap_len)
1193{
1194	__u64 sector = req->offset >> 9;
1195	int i;
1196
1197	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1198		panic("Operation too long");
1199
1200	if(req->op == UBD_READ) {
1201		for(i = 0; i < req->length >> 9; i++){
1202			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1203				ubd_set_bit(i, (unsigned char *)
1204					    &req->sector_mask);
1205		}
1206	}
1207	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1208			   &req->cow_offset, bitmap, bitmap_offset,
1209			   req->bitmap_words, bitmap_len);
1210}
1211
1212/* Called with dev->lock held */
1213static void prepare_request(struct request *req, struct io_thread_req *io_req,
1214			    unsigned long long offset, int page_offset,
1215			    int len, struct page *page)
1216{
1217	struct gendisk *disk = req->rq_disk;
1218	struct ubd *ubd_dev = disk->private_data;
 
 
 
 
 
1219
1220	io_req->req = req;
1221	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1222		ubd_dev->fd;
1223	io_req->fds[1] = ubd_dev->fd;
1224	io_req->cow_offset = -1;
1225	io_req->offset = offset;
1226	io_req->length = len;
1227	io_req->error = 0;
1228	io_req->sector_mask = 0;
1229
1230	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1231	io_req->offsets[0] = 0;
1232	io_req->offsets[1] = ubd_dev->cow.data_offset;
1233	io_req->buffer = page_address(page) + page_offset;
1234	io_req->sectorsize = 1 << 9;
 
 
1235
1236	if(ubd_dev->cow.file != NULL)
1237		cowify_req(io_req, ubd_dev->cow.bitmap,
1238			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
 
 
 
 
1239
 
 
 
 
 
 
 
 
 
 
 
1240}
1241
1242/* Called with dev->lock held */
1243static void do_ubd_request(struct request_queue *q)
1244{
1245	struct io_thread_req *io_req;
1246	struct request *req;
1247	int n;
1248
1249	while(1){
1250		struct ubd *dev = q->queuedata;
1251		if(dev->end_sg == 0){
1252			struct request *req = blk_fetch_request(q);
1253			if(req == NULL)
1254				return;
1255
1256			dev->request = req;
1257			dev->rq_pos = blk_rq_pos(req);
1258			dev->start_sg = 0;
1259			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1260		}
1261
1262		req = dev->request;
1263		while(dev->start_sg < dev->end_sg){
1264			struct scatterlist *sg = &dev->sg[dev->start_sg];
1265
1266			io_req = kmalloc(sizeof(struct io_thread_req),
1267					 GFP_ATOMIC);
1268			if(io_req == NULL){
1269				if(list_empty(&dev->restart))
1270					list_add(&dev->restart, &restart);
1271				return;
1272			}
1273			prepare_request(req, io_req,
1274					(unsigned long long)dev->rq_pos << 9,
1275					sg->offset, sg->length, sg_page(sg));
1276
1277			n = os_write_file(thread_fd, &io_req,
1278					  sizeof(struct io_thread_req *));
1279			if(n != sizeof(struct io_thread_req *)){
1280				if(n != -EAGAIN)
1281					printk("write to io thread failed, "
1282					       "errno = %d\n", -n);
1283				else if(list_empty(&dev->restart))
1284					list_add(&dev->restart, &restart);
1285				kfree(io_req);
1286				return;
1287			}
1288
1289			dev->rq_pos += sg->length >> 9;
1290			dev->start_sg++;
1291		}
1292		dev->end_sg = 0;
1293		dev->request = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1294	}
 
 
1295}
1296
1297static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1298{
1299	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1300
1301	geo->heads = 128;
1302	geo->sectors = 32;
1303	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1304	return 0;
1305}
1306
1307static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1308		     unsigned int cmd, unsigned long arg)
1309{
1310	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1311	u16 ubd_id[ATA_ID_WORDS];
1312
1313	switch (cmd) {
1314		struct cdrom_volctrl volume;
1315	case HDIO_GET_IDENTITY:
1316		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1317		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1318		ubd_id[ATA_ID_HEADS]	= 128;
1319		ubd_id[ATA_ID_SECTORS]	= 32;
1320		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1321				 sizeof(ubd_id)))
1322			return -EFAULT;
1323		return 0;
1324
1325	case CDROMVOLREAD:
1326		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1327			return -EFAULT;
1328		volume.channel0 = 255;
1329		volume.channel1 = 255;
1330		volume.channel2 = 255;
1331		volume.channel3 = 255;
1332		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1333			return -EFAULT;
1334		return 0;
1335	}
1336	return -EINVAL;
1337}
1338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1339static int update_bitmap(struct io_thread_req *req)
1340{
1341	int n;
1342
1343	if(req->cow_offset == -1)
1344		return 0;
1345
1346	n = os_seek_file(req->fds[1], req->cow_offset);
1347	if(n < 0){
1348		printk("do_io - bitmap lseek failed : err = %d\n", -n);
1349		return 1;
1350	}
1351
1352	n = os_write_file(req->fds[1], &req->bitmap_words,
1353			  sizeof(req->bitmap_words));
1354	if(n != sizeof(req->bitmap_words)){
1355		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1356		       req->fds[1]);
1357		return 1;
1358	}
1359
1360	return 0;
1361}
1362
1363static void do_io(struct io_thread_req *req)
1364{
1365	char *buf;
1366	unsigned long len;
1367	int n, nsectors, start, end, bit;
1368	int err;
1369	__u64 off;
1370
 
 
 
 
 
 
 
 
1371	nsectors = req->length / req->sectorsize;
1372	start = 0;
1373	do {
1374		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1375		end = start;
1376		while((end < nsectors) &&
1377		      (ubd_test_bit(end, (unsigned char *)
1378				    &req->sector_mask) == bit))
1379			end++;
1380
1381		off = req->offset + req->offsets[bit] +
1382			start * req->sectorsize;
1383		len = (end - start) * req->sectorsize;
1384		buf = &req->buffer[start * req->sectorsize];
 
1385
1386		err = os_seek_file(req->fds[bit], off);
1387		if(err < 0){
1388			printk("do_io - lseek failed : err = %d\n", -err);
1389			req->error = 1;
1390			return;
1391		}
1392		if(req->op == UBD_READ){
1393			n = 0;
1394			do {
1395				buf = &buf[n];
1396				len -= n;
1397				n = os_read_file(req->fds[bit], buf, len);
1398				if (n < 0) {
1399					printk("do_io - read failed, err = %d "
1400					       "fd = %d\n", -n, req->fds[bit]);
1401					req->error = 1;
1402					return;
1403				}
1404			} while((n < len) && (n != 0));
1405			if (n < len) memset(&buf[n], 0, len - n);
1406		} else {
1407			n = os_write_file(req->fds[bit], buf, len);
 
1408			if(n != len){
1409				printk("do_io - write failed err = %d "
1410				       "fd = %d\n", -n, req->fds[bit]);
1411				req->error = 1;
 
 
 
 
 
 
1412				return;
1413			}
 
 
 
 
 
1414		}
1415
1416		start = end;
1417	} while(start < nsectors);
1418
1419	req->error = update_bitmap(req);
1420}
1421
1422/* Changed in start_io_thread, which is serialized by being called only
1423 * from ubd_init, which is an initcall.
1424 */
1425int kernel_fd = -1;
1426
1427/* Only changed by the io thread. XXX: currently unused. */
1428static int io_count = 0;
1429
1430int io_thread(void *arg)
1431{
1432	struct io_thread_req *req;
1433	int n;
 
1434
1435	ignore_sigwinch_sig();
1436	while(1){
1437		n = os_read_file(kernel_fd, &req,
1438				 sizeof(struct io_thread_req *));
1439		if(n != sizeof(struct io_thread_req *)){
1440			if(n < 0)
1441				printk("io_thread - read failed, fd = %d, "
1442				       "err = %d\n", kernel_fd, -n);
1443			else {
1444				printk("io_thread - short read, fd = %d, "
1445				       "length = %d\n", kernel_fd, n);
1446			}
 
1447			continue;
1448		}
1449		io_count++;
1450		do_io(req);
1451		n = os_write_file(kernel_fd, &req,
1452				  sizeof(struct io_thread_req *));
1453		if(n != sizeof(struct io_thread_req *))
1454			printk("io_thread - write failed, fd = %d, err = %d\n",
1455			       kernel_fd, -n);
 
 
 
 
 
 
 
 
 
 
 
 
1456	}
1457
1458	return 0;
1459}
v5.9
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Cambridge Greys Ltd
   4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
 
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
  30#include <linux/cdrom.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/ctype.h>
  34#include <linux/slab.h>
  35#include <linux/vmalloc.h>
  36#include <linux/platform_device.h>
  37#include <linux/scatterlist.h>
  38#include <asm/tlbflush.h>
  39#include <kern_util.h>
  40#include "mconsole_kern.h"
  41#include <init.h>
  42#include <irq_kern.h>
  43#include "ubd.h"
  44#include <os.h>
  45#include "cow.h"
  46
  47/* Max request size is determined by sector mask - 32K */
  48#define UBD_MAX_REQUEST (8 * sizeof(long))
  49
  50struct io_thread_req {
  51	struct request *req;
 
  52	int fds[2];
  53	unsigned long offsets[2];
  54	unsigned long long offset;
  55	unsigned long length;
  56	char *buffer;
  57	int sectorsize;
  58	unsigned long sector_mask;
  59	unsigned long long cow_offset;
  60	unsigned long bitmap_words[2];
  61	int error;
  62};
  63
  64
  65static struct io_thread_req * (*irq_req_buffer)[];
  66static struct io_thread_req *irq_remainder;
  67static int irq_remainder_size;
  68
  69static struct io_thread_req * (*io_req_buffer)[];
  70static struct io_thread_req *io_remainder;
  71static int io_remainder_size;
  72
  73
  74
  75static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  76{
  77	__u64 n;
  78	int bits, off;
  79
  80	bits = sizeof(data[0]) * 8;
  81	n = bit / bits;
  82	off = bit % bits;
  83	return (data[n] & (1 << off)) != 0;
  84}
  85
  86static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  87{
  88	__u64 n;
  89	int bits, off;
  90
  91	bits = sizeof(data[0]) * 8;
  92	n = bit / bits;
  93	off = bit % bits;
  94	data[n] |= (1 << off);
  95}
  96/*End stuff from ubd_user.h*/
  97
  98#define DRIVER_NAME "uml-blkdev"
  99
 100static DEFINE_MUTEX(ubd_lock);
 101static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 102
 103static int ubd_open(struct block_device *bdev, fmode_t mode);
 104static void ubd_release(struct gendisk *disk, fmode_t mode);
 105static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 106		     unsigned int cmd, unsigned long arg);
 107static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 108
 109#define MAX_DEV (16)
 110
 111static const struct block_device_operations ubd_blops = {
 112        .owner		= THIS_MODULE,
 113        .open		= ubd_open,
 114        .release	= ubd_release,
 115        .ioctl		= ubd_ioctl,
 116        .compat_ioctl	= blkdev_compat_ptr_ioctl,
 117	.getgeo		= ubd_getgeo,
 118};
 119
 120/* Protected by ubd_lock */
 121static int fake_major = UBD_MAJOR;
 122static struct gendisk *ubd_gendisk[MAX_DEV];
 123static struct gendisk *fake_gendisk[MAX_DEV];
 124
 125#ifdef CONFIG_BLK_DEV_UBD_SYNC
 126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 127					 .cl = 1 })
 128#else
 129#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 130					 .cl = 1 })
 131#endif
 132static struct openflags global_openflags = OPEN_FLAGS;
 133
 134struct cow {
 135	/* backing file name */
 136	char *file;
 137	/* backing file fd */
 138	int fd;
 139	unsigned long *bitmap;
 140	unsigned long bitmap_len;
 141	int bitmap_offset;
 142	int data_offset;
 143};
 144
 145#define MAX_SG 64
 146
 147struct ubd {
 
 148	/* name (and fd, below) of the file opened for writing, either the
 149	 * backing or the cow file. */
 150	char *file;
 151	int count;
 152	int fd;
 153	__u64 size;
 154	struct openflags boot_openflags;
 155	struct openflags openflags;
 156	unsigned shared:1;
 157	unsigned no_cow:1;
 158	unsigned no_trim:1;
 159	struct cow cow;
 160	struct platform_device pdev;
 161	struct request_queue *queue;
 162	struct blk_mq_tag_set tag_set;
 163	spinlock_t lock;
 
 
 
 
 164};
 165
 166#define DEFAULT_COW { \
 167	.file =			NULL, \
 168	.fd =			-1,	\
 169	.bitmap =		NULL, \
 170	.bitmap_offset =	0, \
 171	.data_offset =		0, \
 172}
 173
 174#define DEFAULT_UBD { \
 175	.file = 		NULL, \
 176	.count =		0, \
 177	.fd =			-1, \
 178	.size =			-1, \
 179	.boot_openflags =	OPEN_FLAGS, \
 180	.openflags =		OPEN_FLAGS, \
 181	.no_cow =               0, \
 182	.no_trim =		0, \
 183	.shared =		0, \
 184	.cow =			DEFAULT_COW, \
 185	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 
 
 
 
 186}
 187
 188/* Protected by ubd_lock */
 189static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 190
 191/* Only changed by fake_ide_setup which is a setup */
 192static int fake_ide = 0;
 193static struct proc_dir_entry *proc_ide_root = NULL;
 194static struct proc_dir_entry *proc_ide = NULL;
 195
 196static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 197				 const struct blk_mq_queue_data *bd);
 198
 199static void make_proc_ide(void)
 200{
 201	proc_ide_root = proc_mkdir("ide", NULL);
 202	proc_ide = proc_mkdir("ide0", proc_ide_root);
 203}
 204
 205static int fake_ide_media_proc_show(struct seq_file *m, void *v)
 206{
 207	seq_puts(m, "disk\n");
 208	return 0;
 209}
 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 211static void make_ide_entries(const char *dev_name)
 212{
 213	struct proc_dir_entry *dir, *ent;
 214	char name[64];
 215
 216	if(proc_ide_root == NULL) make_proc_ide();
 217
 218	dir = proc_mkdir(dev_name, proc_ide);
 219	if(!dir) return;
 220
 221	ent = proc_create_single("media", S_IRUGO, dir,
 222			fake_ide_media_proc_show);
 223	if(!ent) return;
 224	snprintf(name, sizeof(name), "ide0/%s", dev_name);
 225	proc_symlink(dev_name, proc_ide_root, name);
 226}
 227
 228static int fake_ide_setup(char *str)
 229{
 230	fake_ide = 1;
 231	return 1;
 232}
 233
 234__setup("fake_ide", fake_ide_setup);
 235
 236__uml_help(fake_ide_setup,
 237"fake_ide\n"
 238"    Create ide0 entries that map onto ubd devices.\n\n"
 239);
 240
 241static int parse_unit(char **ptr)
 242{
 243	char *str = *ptr, *end;
 244	int n = -1;
 245
 246	if(isdigit(*str)) {
 247		n = simple_strtoul(str, &end, 0);
 248		if(end == str)
 249			return -1;
 250		*ptr = end;
 251	}
 252	else if (('a' <= *str) && (*str <= 'z')) {
 253		n = *str - 'a';
 254		str++;
 255		*ptr = str;
 256	}
 257	return n;
 258}
 259
 260/* If *index_out == -1 at exit, the passed option was a general one;
 261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 262 * should not be freed on exit.
 263 */
 264static int ubd_setup_common(char *str, int *index_out, char **error_out)
 265{
 266	struct ubd *ubd_dev;
 267	struct openflags flags = global_openflags;
 268	char *backing_file;
 269	int n, err = 0, i;
 270
 271	if(index_out) *index_out = -1;
 272	n = *str;
 273	if(n == '='){
 274		char *end;
 275		int major;
 276
 277		str++;
 278		if(!strcmp(str, "sync")){
 279			global_openflags = of_sync(global_openflags);
 280			return err;
 281		}
 282
 283		err = -EINVAL;
 284		major = simple_strtoul(str, &end, 0);
 285		if((*end != '\0') || (end == str)){
 286			*error_out = "Didn't parse major number";
 287			return err;
 288		}
 289
 290		mutex_lock(&ubd_lock);
 291		if (fake_major != UBD_MAJOR) {
 292			*error_out = "Can't assign a fake major twice";
 293			goto out1;
 294		}
 295
 296		fake_major = major;
 297
 298		printk(KERN_INFO "Setting extra ubd major number to %d\n",
 299		       major);
 300		err = 0;
 301	out1:
 302		mutex_unlock(&ubd_lock);
 303		return err;
 304	}
 305
 306	n = parse_unit(&str);
 307	if(n < 0){
 308		*error_out = "Couldn't parse device number";
 309		return -EINVAL;
 310	}
 311	if(n >= MAX_DEV){
 312		*error_out = "Device number out of range";
 313		return 1;
 314	}
 315
 316	err = -EBUSY;
 317	mutex_lock(&ubd_lock);
 318
 319	ubd_dev = &ubd_devs[n];
 320	if(ubd_dev->file != NULL){
 321		*error_out = "Device is already configured";
 322		goto out;
 323	}
 324
 325	if (index_out)
 326		*index_out = n;
 327
 328	err = -EINVAL;
 329	for (i = 0; i < sizeof("rscdt="); i++) {
 330		switch (*str) {
 331		case 'r':
 332			flags.w = 0;
 333			break;
 334		case 's':
 335			flags.s = 1;
 336			break;
 337		case 'd':
 338			ubd_dev->no_cow = 1;
 339			break;
 340		case 'c':
 341			ubd_dev->shared = 1;
 342			break;
 343		case 't':
 344			ubd_dev->no_trim = 1;
 345			break;
 346		case '=':
 347			str++;
 348			goto break_loop;
 349		default:
 350			*error_out = "Expected '=' or flag letter "
 351				"(r, s, c, t or d)";
 352			goto out;
 353		}
 354		str++;
 355	}
 356
 357	if (*str == '=')
 358		*error_out = "Too many flags specified";
 359	else
 360		*error_out = "Missing '='";
 361	goto out;
 362
 363break_loop:
 364	backing_file = strchr(str, ',');
 365
 366	if (backing_file == NULL)
 367		backing_file = strchr(str, ':');
 368
 369	if(backing_file != NULL){
 370		if(ubd_dev->no_cow){
 371			*error_out = "Can't specify both 'd' and a cow file";
 372			goto out;
 373		}
 374		else {
 375			*backing_file = '\0';
 376			backing_file++;
 377		}
 378	}
 379	err = 0;
 380	ubd_dev->file = str;
 381	ubd_dev->cow.file = backing_file;
 382	ubd_dev->boot_openflags = flags;
 383out:
 384	mutex_unlock(&ubd_lock);
 385	return err;
 386}
 387
 388static int ubd_setup(char *str)
 389{
 390	char *error;
 391	int err;
 392
 393	err = ubd_setup_common(str, NULL, &error);
 394	if(err)
 395		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 396		       "%s\n", str, error);
 397	return 1;
 398}
 399
 400__setup("ubd", ubd_setup);
 401__uml_help(ubd_setup,
 402"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
 403"    This is used to associate a device with a file in the underlying\n"
 404"    filesystem. When specifying two filenames, the first one is the\n"
 405"    COW name and the second is the backing file name. As separator you can\n"
 406"    use either a ':' or a ',': the first one allows writing things like;\n"
 407"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 408"    while with a ',' the shell would not expand the 2nd '~'.\n"
 409"    When using only one filename, UML will detect whether to treat it like\n"
 410"    a COW file or a backing file. To override this detection, add the 'd'\n"
 411"    flag:\n"
 412"	ubd0d=BackingFile\n"
 413"    Usually, there is a filesystem in the file, but \n"
 414"    that's not required. Swap devices containing swap files can be\n"
 415"    specified like this. Also, a file which doesn't contain a\n"
 416"    filesystem can have its contents read in the virtual \n"
 417"    machine by running 'dd' on the device. <n> must be in the range\n"
 418"    0 to 7. Appending an 'r' to the number will cause that device\n"
 419"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 420"    an 's' will cause data to be written to disk on the host immediately.\n"
 421"    'c' will cause the device to be treated as being shared between multiple\n"
 422"    UMLs and file locking will be turned off - this is appropriate for a\n"
 423"    cluster filesystem and inappropriate at almost all other times.\n\n"
 424"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 425);
 426
 427static int udb_setup(char *str)
 428{
 429	printk("udb%s specified on command line is almost certainly a ubd -> "
 430	       "udb TYPO\n", str);
 431	return 1;
 432}
 433
 434__setup("udb", udb_setup);
 435__uml_help(udb_setup,
 436"udb\n"
 437"    This option is here solely to catch ubd -> udb typos, which can be\n"
 438"    to impossible to catch visually unless you specifically look for\n"
 439"    them.  The only result of any option starting with 'udb' is an error\n"
 440"    in the boot output.\n\n"
 441);
 442
 
 
 443/* Only changed by ubd_init, which is an initcall. */
 444static int thread_fd = -1;
 
 445
 446/* Function to read several request pointers at a time
 447* handling fractional reads if (and as) needed
 448*/
 449
 450static int bulk_req_safe_read(
 451	int fd,
 452	struct io_thread_req * (*request_buffer)[],
 453	struct io_thread_req **remainder,
 454	int *remainder_size,
 455	int max_recs
 456	)
 457{
 458	int n = 0;
 459	int res = 0;
 460
 461	if (*remainder_size > 0) {
 462		memmove(
 463			(char *) request_buffer,
 464			(char *) remainder, *remainder_size
 465		);
 466		n = *remainder_size;
 467	}
 468
 469	res = os_read_file(
 470			fd,
 471			((char *) request_buffer) + *remainder_size,
 472			sizeof(struct io_thread_req *)*max_recs
 473				- *remainder_size
 474		);
 475	if (res > 0) {
 476		n += res;
 477		if ((n % sizeof(struct io_thread_req *)) > 0) {
 478			/*
 479			* Read somehow returned not a multiple of dword
 480			* theoretically possible, but never observed in the
 481			* wild, so read routine must be able to handle it
 482			*/
 483			*remainder_size = n % sizeof(struct io_thread_req *);
 484			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 485			memmove(
 486				remainder,
 487				((char *) request_buffer) +
 488					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 489				*remainder_size
 490			);
 491			n = n - *remainder_size;
 492		}
 493	} else {
 494		n = res;
 495	}
 496	return n;
 497}
 498
 499/* Called without dev->lock held, and only in interrupt context. */
 500static void ubd_handler(void)
 501{
 
 
 
 
 502	int n;
 503	int count;
 504
 505	while(1){
 506		n = bulk_req_safe_read(
 507			thread_fd,
 508			irq_req_buffer,
 509			&irq_remainder,
 510			&irq_remainder_size,
 511			UBD_REQ_BUFFER_SIZE
 512		);
 513		if (n < 0) {
 514			if(n == -EAGAIN)
 515				break;
 516			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 517			       "err = %d\n", -n);
 518			return;
 519		}
 520		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 521			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 522
 523			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 524				blk_queue_max_discard_sectors(io_req->req->q, 0);
 525				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 526				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
 527			}
 528			if ((io_req->error) || (io_req->buffer == NULL))
 529				blk_mq_end_request(io_req->req, io_req->error);
 530			else {
 531				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
 532					__blk_mq_end_request(io_req->req, io_req->error);
 533			}
 534			kfree(io_req);
 535		}
 536	}
 537}
 538
 539static irqreturn_t ubd_intr(int irq, void *dev)
 540{
 541	ubd_handler();
 542	return IRQ_HANDLED;
 543}
 544
 545/* Only changed by ubd_init, which is an initcall. */
 546static int io_pid = -1;
 547
 548static void kill_io_thread(void)
 549{
 550	if(io_pid != -1)
 551		os_kill_process(io_pid, 1);
 552}
 553
 554__uml_exitcall(kill_io_thread);
 555
 556static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 557{
 558	char *file;
 559	int fd;
 560	int err;
 561
 562	__u32 version;
 563	__u32 align;
 564	char *backing_file;
 565	time64_t mtime;
 566	unsigned long long size;
 567	int sector_size;
 568	int bitmap_offset;
 569
 570	if (ubd_dev->file && ubd_dev->cow.file) {
 571		file = ubd_dev->cow.file;
 572
 573		goto out;
 574	}
 575
 576	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 577	if (fd < 0)
 578		return fd;
 579
 580	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 581		&mtime, &size, &sector_size, &align, &bitmap_offset);
 582	os_close_file(fd);
 583
 584	if(err == -EINVAL)
 585		file = ubd_dev->file;
 586	else
 587		file = backing_file;
 588
 589out:
 590	return os_file_size(file, size_out);
 591}
 592
 593static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 594{
 595	int err;
 596
 597	err = os_pread_file(fd, buf, len, offset);
 
 
 
 
 598	if (err < 0)
 599		return err;
 600
 601	return 0;
 602}
 603
 604static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 605{
 606	time64_t modtime;
 607	unsigned long long actual;
 608	int err;
 609
 610	err = os_file_modtime(file, &modtime);
 611	if (err < 0) {
 612		printk(KERN_ERR "Failed to get modification time of backing "
 613		       "file \"%s\", err = %d\n", file, -err);
 614		return err;
 615	}
 616
 617	err = os_file_size(file, &actual);
 618	if (err < 0) {
 619		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 620		       "err = %d\n", file, -err);
 621		return err;
 622	}
 623
 624	if (actual != size) {
 625		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 626		 * the typecast.*/
 627		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 628		       "vs backing file\n", (unsigned long long) size, actual);
 629		return -EINVAL;
 630	}
 631	if (modtime != mtime) {
 632		printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 633		       "backing file\n", mtime, modtime);
 634		return -EINVAL;
 635	}
 636	return 0;
 637}
 638
 639static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 640{
 641	struct uml_stat buf1, buf2;
 642	int err;
 643
 644	if (from_cmdline == NULL)
 645		return 0;
 646	if (!strcmp(from_cmdline, from_cow))
 647		return 0;
 648
 649	err = os_stat_file(from_cmdline, &buf1);
 650	if (err < 0) {
 651		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 652		       -err);
 653		return 0;
 654	}
 655	err = os_stat_file(from_cow, &buf2);
 656	if (err < 0) {
 657		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 658		       -err);
 659		return 1;
 660	}
 661	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 662		return 0;
 663
 664	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 665	       "\"%s\" specified in COW header of \"%s\"\n",
 666	       from_cmdline, from_cow, cow);
 667	return 1;
 668}
 669
 670static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 671		  char **backing_file_out, int *bitmap_offset_out,
 672		  unsigned long *bitmap_len_out, int *data_offset_out,
 673		  int *create_cow_out)
 674{
 675	time64_t mtime;
 676	unsigned long long size;
 677	__u32 version, align;
 678	char *backing_file;
 679	int fd, err, sectorsize, asked_switch, mode = 0644;
 680
 681	fd = os_open_file(file, *openflags, mode);
 682	if (fd < 0) {
 683		if ((fd == -ENOENT) && (create_cow_out != NULL))
 684			*create_cow_out = 1;
 685		if (!openflags->w ||
 686		    ((fd != -EROFS) && (fd != -EACCES)))
 687			return fd;
 688		openflags->w = 0;
 689		fd = os_open_file(file, *openflags, mode);
 690		if (fd < 0)
 691			return fd;
 692	}
 693
 694	if (shared)
 695		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 696	else {
 697		err = os_lock_file(fd, openflags->w);
 698		if (err < 0) {
 699			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 700			       file, -err);
 701			goto out_close;
 702		}
 703	}
 704
 705	/* Successful return case! */
 706	if (backing_file_out == NULL)
 707		return fd;
 708
 709	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 710			      &size, &sectorsize, &align, bitmap_offset_out);
 711	if (err && (*backing_file_out != NULL)) {
 712		printk(KERN_ERR "Failed to read COW header from COW file "
 713		       "\"%s\", errno = %d\n", file, -err);
 714		goto out_close;
 715	}
 716	if (err)
 717		return fd;
 718
 719	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 720					    file);
 721
 722	/* Allow switching only if no mismatch. */
 723	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 724						   mtime)) {
 725		printk(KERN_ERR "Switching backing file to '%s'\n",
 726		       *backing_file_out);
 727		err = write_cow_header(file, fd, *backing_file_out,
 728				       sectorsize, align, &size);
 729		if (err) {
 730			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 731			goto out_close;
 732		}
 733	} else {
 734		*backing_file_out = backing_file;
 735		err = backing_file_mismatch(*backing_file_out, size, mtime);
 736		if (err)
 737			goto out_close;
 738	}
 739
 740	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 741		  bitmap_len_out, data_offset_out);
 742
 743	return fd;
 744 out_close:
 745	os_close_file(fd);
 746	return err;
 747}
 748
 749static int create_cow_file(char *cow_file, char *backing_file,
 750		    struct openflags flags,
 751		    int sectorsize, int alignment, int *bitmap_offset_out,
 752		    unsigned long *bitmap_len_out, int *data_offset_out)
 753{
 754	int err, fd;
 755
 756	flags.c = 1;
 757	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 758	if (fd < 0) {
 759		err = fd;
 760		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 761		       cow_file, -err);
 762		goto out;
 763	}
 764
 765	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 766			    bitmap_offset_out, bitmap_len_out,
 767			    data_offset_out);
 768	if (!err)
 769		return fd;
 770	os_close_file(fd);
 771 out:
 772	return err;
 773}
 774
 775static void ubd_close_dev(struct ubd *ubd_dev)
 776{
 777	os_close_file(ubd_dev->fd);
 778	if(ubd_dev->cow.file == NULL)
 779		return;
 780
 781	os_close_file(ubd_dev->cow.fd);
 782	vfree(ubd_dev->cow.bitmap);
 783	ubd_dev->cow.bitmap = NULL;
 784}
 785
 786static int ubd_open_dev(struct ubd *ubd_dev)
 787{
 788	struct openflags flags;
 789	char **back_ptr;
 790	int err, create_cow, *create_ptr;
 791	int fd;
 792
 793	ubd_dev->openflags = ubd_dev->boot_openflags;
 794	create_cow = 0;
 795	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 796	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 797
 798	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 799				back_ptr, &ubd_dev->cow.bitmap_offset,
 800				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 801				create_ptr);
 802
 803	if((fd == -ENOENT) && create_cow){
 804		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 805					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 806					  &ubd_dev->cow.bitmap_offset,
 807					  &ubd_dev->cow.bitmap_len,
 808					  &ubd_dev->cow.data_offset);
 809		if(fd >= 0){
 810			printk(KERN_INFO "Creating \"%s\" as COW file for "
 811			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 812		}
 813	}
 814
 815	if(fd < 0){
 816		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 817		       -fd);
 818		return fd;
 819	}
 820	ubd_dev->fd = fd;
 821
 822	if(ubd_dev->cow.file != NULL){
 823		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 824
 825		err = -ENOMEM;
 826		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 827		if(ubd_dev->cow.bitmap == NULL){
 828			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 829			goto error;
 830		}
 831		flush_tlb_kernel_vm();
 832
 833		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 834				      ubd_dev->cow.bitmap_offset,
 835				      ubd_dev->cow.bitmap_len);
 836		if(err < 0)
 837			goto error;
 838
 839		flags = ubd_dev->openflags;
 840		flags.w = 0;
 841		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 842				    NULL, NULL, NULL, NULL);
 843		if(err < 0) goto error;
 844		ubd_dev->cow.fd = err;
 845	}
 846	if (ubd_dev->no_trim == 0) {
 847		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
 848		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
 849		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 850		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 851		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
 852	}
 853	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 854	return 0;
 855 error:
 856	os_close_file(ubd_dev->fd);
 857	return err;
 858}
 859
 860static void ubd_device_release(struct device *dev)
 861{
 862	struct ubd *ubd_dev = dev_get_drvdata(dev);
 863
 864	blk_cleanup_queue(ubd_dev->queue);
 865	blk_mq_free_tag_set(&ubd_dev->tag_set);
 866	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 867}
 868
 869static int ubd_disk_register(int major, u64 size, int unit,
 870			     struct gendisk **disk_out)
 871{
 872	struct device *parent = NULL;
 873	struct gendisk *disk;
 874
 875	disk = alloc_disk(1 << UBD_SHIFT);
 876	if(disk == NULL)
 877		return -ENOMEM;
 878
 879	disk->major = major;
 880	disk->first_minor = unit << UBD_SHIFT;
 881	disk->fops = &ubd_blops;
 882	set_capacity(disk, size / 512);
 883	if (major == UBD_MAJOR)
 884		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 885	else
 886		sprintf(disk->disk_name, "ubd_fake%d", unit);
 887
 888	/* sysfs register (not for ide fake devices) */
 889	if (major == UBD_MAJOR) {
 890		ubd_devs[unit].pdev.id   = unit;
 891		ubd_devs[unit].pdev.name = DRIVER_NAME;
 892		ubd_devs[unit].pdev.dev.release = ubd_device_release;
 893		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 894		platform_device_register(&ubd_devs[unit].pdev);
 895		parent = &ubd_devs[unit].pdev.dev;
 896	}
 897
 898	disk->private_data = &ubd_devs[unit];
 899	disk->queue = ubd_devs[unit].queue;
 900	device_add_disk(parent, disk, NULL);
 901
 902	*disk_out = disk;
 903	return 0;
 904}
 905
 906#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 907
 908static const struct blk_mq_ops ubd_mq_ops = {
 909	.queue_rq = ubd_queue_rq,
 910};
 911
 912static int ubd_add(int n, char **error_out)
 913{
 914	struct ubd *ubd_dev = &ubd_devs[n];
 915	int err = 0;
 916
 917	if(ubd_dev->file == NULL)
 918		goto out;
 919
 920	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 921	if(err < 0){
 922		*error_out = "Couldn't determine size of device's file";
 923		goto out;
 924	}
 925
 926	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 927
 928	ubd_dev->tag_set.ops = &ubd_mq_ops;
 929	ubd_dev->tag_set.queue_depth = 64;
 930	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 931	ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 932	ubd_dev->tag_set.driver_data = ubd_dev;
 933	ubd_dev->tag_set.nr_hw_queues = 1;
 934
 935	err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 936	if (err)
 
 
 937		goto out;
 938
 939	ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
 940	if (IS_ERR(ubd_dev->queue)) {
 941		err = PTR_ERR(ubd_dev->queue);
 942		goto out_cleanup_tags;
 943	}
 944
 945	ubd_dev->queue->queuedata = ubd_dev;
 946	blk_queue_write_cache(ubd_dev->queue, true, false);
 947
 948	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 949	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 950	if(err){
 951		*error_out = "Failed to register device";
 952		goto out_cleanup_tags;
 953	}
 954
 955	if (fake_major != UBD_MAJOR)
 956		ubd_disk_register(fake_major, ubd_dev->size, n,
 957				  &fake_gendisk[n]);
 958
 959	/*
 960	 * Perhaps this should also be under the "if (fake_major)" above
 961	 * using the fake_disk->disk_name
 962	 */
 963	if (fake_ide)
 964		make_ide_entries(ubd_gendisk[n]->disk_name);
 965
 966	err = 0;
 967out:
 968	return err;
 969
 970out_cleanup_tags:
 971	blk_mq_free_tag_set(&ubd_dev->tag_set);
 972	if (!(IS_ERR(ubd_dev->queue)))
 973		blk_cleanup_queue(ubd_dev->queue);
 974	goto out;
 975}
 976
 977static int ubd_config(char *str, char **error_out)
 978{
 979	int n, ret;
 980
 981	/* This string is possibly broken up and stored, so it's only
 982	 * freed if ubd_setup_common fails, or if only general options
 983	 * were set.
 984	 */
 985	str = kstrdup(str, GFP_KERNEL);
 986	if (str == NULL) {
 987		*error_out = "Failed to allocate memory";
 988		return -ENOMEM;
 989	}
 990
 991	ret = ubd_setup_common(str, &n, error_out);
 992	if (ret)
 993		goto err_free;
 994
 995	if (n == -1) {
 996		ret = 0;
 997		goto err_free;
 998	}
 999
1000	mutex_lock(&ubd_lock);
1001	ret = ubd_add(n, error_out);
1002	if (ret)
1003		ubd_devs[n].file = NULL;
1004	mutex_unlock(&ubd_lock);
1005
1006out:
1007	return ret;
1008
1009err_free:
1010	kfree(str);
1011	goto out;
1012}
1013
1014static int ubd_get_config(char *name, char *str, int size, char **error_out)
1015{
1016	struct ubd *ubd_dev;
1017	int n, len = 0;
1018
1019	n = parse_unit(&name);
1020	if((n >= MAX_DEV) || (n < 0)){
1021		*error_out = "ubd_get_config : device number out of range";
1022		return -1;
1023	}
1024
1025	ubd_dev = &ubd_devs[n];
1026	mutex_lock(&ubd_lock);
1027
1028	if(ubd_dev->file == NULL){
1029		CONFIG_CHUNK(str, size, len, "", 1);
1030		goto out;
1031	}
1032
1033	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1034
1035	if(ubd_dev->cow.file != NULL){
1036		CONFIG_CHUNK(str, size, len, ",", 0);
1037		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1038	}
1039	else CONFIG_CHUNK(str, size, len, "", 1);
1040
1041 out:
1042	mutex_unlock(&ubd_lock);
1043	return len;
1044}
1045
1046static int ubd_id(char **str, int *start_out, int *end_out)
1047{
1048	int n;
1049
1050	n = parse_unit(str);
1051	*start_out = 0;
1052	*end_out = MAX_DEV - 1;
1053	return n;
1054}
1055
1056static int ubd_remove(int n, char **error_out)
1057{
1058	struct gendisk *disk = ubd_gendisk[n];
1059	struct ubd *ubd_dev;
1060	int err = -ENODEV;
1061
1062	mutex_lock(&ubd_lock);
1063
1064	ubd_dev = &ubd_devs[n];
1065
1066	if(ubd_dev->file == NULL)
1067		goto out;
1068
1069	/* you cannot remove a open disk */
1070	err = -EBUSY;
1071	if(ubd_dev->count > 0)
1072		goto out;
1073
1074	ubd_gendisk[n] = NULL;
1075	if(disk != NULL){
1076		del_gendisk(disk);
1077		put_disk(disk);
1078	}
1079
1080	if(fake_gendisk[n] != NULL){
1081		del_gendisk(fake_gendisk[n]);
1082		put_disk(fake_gendisk[n]);
1083		fake_gendisk[n] = NULL;
1084	}
1085
1086	err = 0;
1087	platform_device_unregister(&ubd_dev->pdev);
1088out:
1089	mutex_unlock(&ubd_lock);
1090	return err;
1091}
1092
1093/* All these are called by mconsole in process context and without
1094 * ubd-specific locks.  The structure itself is const except for .list.
1095 */
1096static struct mc_device ubd_mc = {
1097	.list		= LIST_HEAD_INIT(ubd_mc.list),
1098	.name		= "ubd",
1099	.config		= ubd_config,
1100	.get_config	= ubd_get_config,
1101	.id		= ubd_id,
1102	.remove		= ubd_remove,
1103};
1104
1105static int __init ubd_mc_init(void)
1106{
1107	mconsole_register_dev(&ubd_mc);
1108	return 0;
1109}
1110
1111__initcall(ubd_mc_init);
1112
1113static int __init ubd0_init(void)
1114{
1115	struct ubd *ubd_dev = &ubd_devs[0];
1116
1117	mutex_lock(&ubd_lock);
1118	if(ubd_dev->file == NULL)
1119		ubd_dev->file = "root_fs";
1120	mutex_unlock(&ubd_lock);
1121
1122	return 0;
1123}
1124
1125__initcall(ubd0_init);
1126
1127/* Used in ubd_init, which is an initcall */
1128static struct platform_driver ubd_driver = {
1129	.driver = {
1130		.name  = DRIVER_NAME,
1131	},
1132};
1133
1134static int __init ubd_init(void)
1135{
1136	char *error;
1137	int i, err;
1138
1139	if (register_blkdev(UBD_MAJOR, "ubd"))
1140		return -1;
1141
1142	if (fake_major != UBD_MAJOR) {
1143		char name[sizeof("ubd_nnn\0")];
1144
1145		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1146		if (register_blkdev(fake_major, "ubd"))
1147			return -1;
1148	}
1149
1150	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1151				       sizeof(struct io_thread_req *),
1152				       GFP_KERNEL
1153		);
1154	irq_remainder = 0;
1155
1156	if (irq_req_buffer == NULL) {
1157		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1158		return -1;
1159	}
1160	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1161				      sizeof(struct io_thread_req *),
1162				      GFP_KERNEL
1163		);
1164
1165	io_remainder = 0;
1166
1167	if (io_req_buffer == NULL) {
1168		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1169		return -1;
1170	}
1171	platform_driver_register(&ubd_driver);
1172	mutex_lock(&ubd_lock);
1173	for (i = 0; i < MAX_DEV; i++){
1174		err = ubd_add(i, &error);
1175		if(err)
1176			printk(KERN_ERR "Failed to initialize ubd device %d :"
1177			       "%s\n", i, error);
1178	}
1179	mutex_unlock(&ubd_lock);
1180	return 0;
1181}
1182
1183late_initcall(ubd_init);
1184
1185static int __init ubd_driver_init(void){
1186	unsigned long stack;
1187	int err;
1188
1189	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1190	if(global_openflags.s){
1191		printk(KERN_INFO "ubd: Synchronous mode\n");
1192		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1193		 * enough. So use anyway the io thread. */
1194	}
1195	stack = alloc_stack(0, 0);
1196	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1197				 &thread_fd);
1198	if(io_pid < 0){
1199		printk(KERN_ERR
1200		       "ubd : Failed to start I/O thread (errno = %d) - "
1201		       "falling back to synchronous I/O\n", -io_pid);
1202		io_pid = -1;
1203		return 0;
1204	}
1205	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1206			     0, "ubd", ubd_devs);
1207	if(err != 0)
1208		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1209	return 0;
1210}
1211
1212device_initcall(ubd_driver_init);
1213
1214static int ubd_open(struct block_device *bdev, fmode_t mode)
1215{
1216	struct gendisk *disk = bdev->bd_disk;
1217	struct ubd *ubd_dev = disk->private_data;
1218	int err = 0;
1219
1220	mutex_lock(&ubd_mutex);
1221	if(ubd_dev->count == 0){
1222		err = ubd_open_dev(ubd_dev);
1223		if(err){
1224			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1225			       disk->disk_name, ubd_dev->file, -err);
1226			goto out;
1227		}
1228	}
1229	ubd_dev->count++;
1230	set_disk_ro(disk, !ubd_dev->openflags.w);
1231
1232	/* This should no more be needed. And it didn't work anyway to exclude
1233	 * read-write remounting of filesystems.*/
1234	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1235	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1236	        err = -EROFS;
1237	}*/
1238out:
1239	mutex_unlock(&ubd_mutex);
1240	return err;
1241}
1242
1243static void ubd_release(struct gendisk *disk, fmode_t mode)
1244{
1245	struct ubd *ubd_dev = disk->private_data;
1246
1247	mutex_lock(&ubd_mutex);
1248	if(--ubd_dev->count == 0)
1249		ubd_close_dev(ubd_dev);
1250	mutex_unlock(&ubd_mutex);
 
1251}
1252
1253static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1254			  __u64 *cow_offset, unsigned long *bitmap,
1255			  __u64 bitmap_offset, unsigned long *bitmap_words,
1256			  __u64 bitmap_len)
1257{
1258	__u64 sector = io_offset >> SECTOR_SHIFT;
1259	int i, update_bitmap = 0;
1260
1261	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1262		if(cow_mask != NULL)
1263			ubd_set_bit(i, (unsigned char *) cow_mask);
1264		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1265			continue;
1266
1267		update_bitmap = 1;
1268		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1269	}
1270
1271	if(!update_bitmap)
1272		return;
1273
1274	*cow_offset = sector / (sizeof(unsigned long) * 8);
1275
1276	/* This takes care of the case where we're exactly at the end of the
1277	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1278	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1279	 * for the original diagnosis.
1280	 */
1281	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1282					 sizeof(unsigned long)) - 1))
1283		(*cow_offset)--;
1284
1285	bitmap_words[0] = bitmap[*cow_offset];
1286	bitmap_words[1] = bitmap[*cow_offset + 1];
1287
1288	*cow_offset *= sizeof(unsigned long);
1289	*cow_offset += bitmap_offset;
1290}
1291
1292static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1293		       __u64 bitmap_offset, __u64 bitmap_len)
1294{
1295	__u64 sector = req->offset >> SECTOR_SHIFT;
1296	int i;
1297
1298	if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
1299		panic("Operation too long");
1300
1301	if (req_op(req->req) == REQ_OP_READ) {
1302		for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
1303			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1304				ubd_set_bit(i, (unsigned char *)
1305					    &req->sector_mask);
1306		}
1307	}
1308	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1309			   &req->cow_offset, bitmap, bitmap_offset,
1310			   req->bitmap_words, bitmap_len);
1311}
1312
1313static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
1314		u64 off, struct bio_vec *bvec)
 
 
1315{
1316	struct ubd *dev = hctx->queue->queuedata;
1317	struct io_thread_req *io_req;
1318	int ret;
1319
1320	io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
1321	if (!io_req)
1322		return -ENOMEM;
1323
1324	io_req->req = req;
1325	if (dev->cow.file)
1326		io_req->fds[0] = dev->cow.fd;
1327	else
1328		io_req->fds[0] = dev->fd;
 
 
1329	io_req->error = 0;
 
1330
1331	if (bvec != NULL) {
1332		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
1333		io_req->length = bvec->bv_len;
1334	} else {
1335		io_req->buffer = NULL;
1336		io_req->length = blk_rq_bytes(req);
1337	}
1338
1339	io_req->sectorsize = SECTOR_SIZE;
1340	io_req->fds[1] = dev->fd;
1341	io_req->cow_offset = -1;
1342	io_req->offset = off;
1343	io_req->sector_mask = 0;
1344	io_req->offsets[0] = 0;
1345	io_req->offsets[1] = dev->cow.data_offset;
1346
1347	if (dev->cow.file)
1348		cowify_req(io_req, dev->cow.bitmap,
1349			   dev->cow.bitmap_offset, dev->cow.bitmap_len);
1350
1351	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1352	if (ret != sizeof(io_req)) {
1353		if (ret != -EAGAIN)
1354			pr_err("write to io thread failed: %d\n", -ret);
1355		kfree(io_req);
1356	}
1357	return ret;
1358}
1359
1360static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
 
1361{
1362	struct req_iterator iter;
1363	struct bio_vec bvec;
1364	int ret;
1365	u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
1366
1367	rq_for_each_segment(bvec, req, iter) {
1368		ret = ubd_queue_one_vec(hctx, req, off, &bvec);
1369		if (ret < 0)
1370			return ret;
1371		off += bvec.bv_len;
1372	}
1373	return 0;
1374}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1375
1376static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1377				 const struct blk_mq_queue_data *bd)
1378{
1379	struct ubd *ubd_dev = hctx->queue->queuedata;
1380	struct request *req = bd->rq;
1381	int ret = 0, res = BLK_STS_OK;
1382
1383	blk_mq_start_request(req);
1384
1385	spin_lock_irq(&ubd_dev->lock);
1386
1387	switch (req_op(req)) {
1388	/* operations with no lentgth/offset arguments */
1389	case REQ_OP_FLUSH:
1390		ret = ubd_queue_one_vec(hctx, req, 0, NULL);
1391		break;
1392	case REQ_OP_READ:
1393	case REQ_OP_WRITE:
1394		ret = queue_rw_req(hctx, req);
1395		break;
1396	case REQ_OP_DISCARD:
1397	case REQ_OP_WRITE_ZEROES:
1398		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
1399		break;
1400	default:
1401		WARN_ON_ONCE(1);
1402		res = BLK_STS_NOTSUPP;
1403	}
1404
1405	spin_unlock_irq(&ubd_dev->lock);
1406
1407	if (ret < 0) {
1408		if (ret == -ENOMEM)
1409			res = BLK_STS_RESOURCE;
1410		else
1411			res = BLK_STS_DEV_RESOURCE;
1412	}
1413
1414	return res;
1415}
1416
1417static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1418{
1419	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1420
1421	geo->heads = 128;
1422	geo->sectors = 32;
1423	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1424	return 0;
1425}
1426
1427static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1428		     unsigned int cmd, unsigned long arg)
1429{
1430	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1431	u16 ubd_id[ATA_ID_WORDS];
1432
1433	switch (cmd) {
1434		struct cdrom_volctrl volume;
1435	case HDIO_GET_IDENTITY:
1436		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1437		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1438		ubd_id[ATA_ID_HEADS]	= 128;
1439		ubd_id[ATA_ID_SECTORS]	= 32;
1440		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1441				 sizeof(ubd_id)))
1442			return -EFAULT;
1443		return 0;
1444
1445	case CDROMVOLREAD:
1446		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1447			return -EFAULT;
1448		volume.channel0 = 255;
1449		volume.channel1 = 255;
1450		volume.channel2 = 255;
1451		volume.channel3 = 255;
1452		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1453			return -EFAULT;
1454		return 0;
1455	}
1456	return -EINVAL;
1457}
1458
1459static int map_error(int error_code)
1460{
1461	switch (error_code) {
1462	case 0:
1463		return BLK_STS_OK;
1464	case ENOSYS:
1465	case EOPNOTSUPP:
1466		return BLK_STS_NOTSUPP;
1467	case ENOSPC:
1468		return BLK_STS_NOSPC;
1469	}
1470	return BLK_STS_IOERR;
1471}
1472
1473/*
1474 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1475 *
1476 * The following functions are part of UML hypervisor code.
1477 * All functions from here onwards are executed as a helper
1478 * thread and are not allowed to execute any kernel functions.
1479 *
1480 * Any communication must occur strictly via shared memory and IPC.
1481 *
1482 * Do not add printks, locks, kernel memory operations, etc - it
1483 * will result in unpredictable behaviour and/or crashes.
1484 */
1485
1486static int update_bitmap(struct io_thread_req *req)
1487{
1488	int n;
1489
1490	if(req->cow_offset == -1)
1491		return map_error(0);
 
 
 
 
 
 
1492
1493	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1494			  sizeof(req->bitmap_words), req->cow_offset);
1495	if (n != sizeof(req->bitmap_words))
1496		return map_error(-n);
 
 
 
1497
1498	return map_error(0);
1499}
1500
1501static void do_io(struct io_thread_req *req)
1502{
1503	char *buf = NULL;
1504	unsigned long len;
1505	int n, nsectors, start, end, bit;
 
1506	__u64 off;
1507
1508	/* FLUSH is really a special case, we cannot "case" it with others */
1509
1510	if (req_op(req->req) == REQ_OP_FLUSH) {
1511		/* fds[0] is always either the rw image or our cow file */
1512		req->error = map_error(-os_sync_file(req->fds[0]));
1513		return;
1514	}
1515
1516	nsectors = req->length / req->sectorsize;
1517	start = 0;
1518	do {
1519		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1520		end = start;
1521		while((end < nsectors) &&
1522		      (ubd_test_bit(end, (unsigned char *)
1523				    &req->sector_mask) == bit))
1524			end++;
1525
1526		off = req->offset + req->offsets[bit] +
1527			start * req->sectorsize;
1528		len = (end - start) * req->sectorsize;
1529		if (req->buffer != NULL)
1530			buf = &req->buffer[start * req->sectorsize];
1531
1532		switch (req_op(req->req)) {
1533		case REQ_OP_READ:
 
 
 
 
 
1534			n = 0;
1535			do {
1536				buf = &buf[n];
1537				len -= n;
1538				n = os_pread_file(req->fds[bit], buf, len, off);
1539				if (n < 0) {
1540					req->error = map_error(-n);
 
 
1541					return;
1542				}
1543			} while((n < len) && (n != 0));
1544			if (n < len) memset(&buf[n], 0, len - n);
1545			break;
1546		case REQ_OP_WRITE:
1547			n = os_pwrite_file(req->fds[bit], buf, len, off);
1548			if(n != len){
1549				req->error = map_error(-n);
1550				return;
1551			}
1552			break;
1553		case REQ_OP_DISCARD:
1554		case REQ_OP_WRITE_ZEROES:
1555			n = os_falloc_punch(req->fds[bit], off, len);
1556			if (n) {
1557				req->error = map_error(-n);
1558				return;
1559			}
1560			break;
1561		default:
1562			WARN_ON_ONCE(1);
1563			req->error = BLK_STS_NOTSUPP;
1564			return;
1565		}
1566
1567		start = end;
1568	} while(start < nsectors);
1569
1570	req->error = update_bitmap(req);
1571}
1572
1573/* Changed in start_io_thread, which is serialized by being called only
1574 * from ubd_init, which is an initcall.
1575 */
1576int kernel_fd = -1;
1577
1578/* Only changed by the io thread. XXX: currently unused. */
1579static int io_count = 0;
1580
1581int io_thread(void *arg)
1582{
1583	int n, count, written, res;
1584
1585	os_fix_helper_signals();
1586
 
1587	while(1){
1588		n = bulk_req_safe_read(
1589			kernel_fd,
1590			io_req_buffer,
1591			&io_remainder,
1592			&io_remainder_size,
1593			UBD_REQ_BUFFER_SIZE
1594		);
1595		if (n <= 0) {
1596			if (n == -EAGAIN)
1597				ubd_read_poll(-1);
1598
1599			continue;
1600		}
1601
1602		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1603			io_count++;
1604			do_io((*io_req_buffer)[count]);
1605		}
1606
1607		written = 0;
1608
1609		do {
1610			res = os_write_file(kernel_fd,
1611					    ((char *) io_req_buffer) + written,
1612					    n - written);
1613			if (res >= 0) {
1614				written += res;
1615			}
1616			if (written < n) {
1617				ubd_write_poll(-1);
1618			}
1619		} while (written < n);
1620	}
1621
1622	return 0;
1623}