Linux Audio

Check our new training course

Loading...
v3.5.6
 
   1/*
 
 
   2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   3 * Licensed under the GPL
   4 */
   5
   6/* 2001-09-28...2002-04-17
   7 * Partition stuff by James_McMechan@hotmail.com
   8 * old style ubd by setting UBD_SHIFT to 0
   9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  10 * partitions have changed in 2.5
  11 * 2003-01-29 more tinkering for 2.5.59-1
  12 * This should now address the sysfs problems and has
  13 * the symlink for devfs to allow for booting with
  14 * the common /dev/ubd/discX/... names rather than
  15 * only /dev/ubdN/discN this version also has lots of
  16 * clean ups preparing for ubd-many.
  17 * James McMechan
  18 */
  19
  20#define UBD_SHIFT 4
  21
  22#include <linux/module.h>
  23#include <linux/init.h>
  24#include <linux/blkdev.h>
 
  25#include <linux/ata.h>
  26#include <linux/hdreg.h>
  27#include <linux/cdrom.h>
  28#include <linux/proc_fs.h>
  29#include <linux/seq_file.h>
  30#include <linux/ctype.h>
  31#include <linux/slab.h>
  32#include <linux/vmalloc.h>
  33#include <linux/platform_device.h>
  34#include <linux/scatterlist.h>
  35#include <asm/tlbflush.h>
  36#include "kern_util.h"
  37#include "mconsole_kern.h"
  38#include "init.h"
  39#include "irq_kern.h"
  40#include "ubd.h"
  41#include "os.h"
  42#include "cow.h"
  43
  44enum ubd_req { UBD_READ, UBD_WRITE };
 
  45
  46struct io_thread_req {
  47	struct request *req;
  48	enum ubd_req op;
  49	int fds[2];
  50	unsigned long offsets[2];
  51	unsigned long long offset;
  52	unsigned long length;
  53	char *buffer;
  54	int sectorsize;
  55	unsigned long sector_mask;
  56	unsigned long long cow_offset;
  57	unsigned long bitmap_words[2];
  58	int error;
  59};
  60
 
 
 
 
 
 
 
 
 
 
 
  61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  62{
  63	__u64 n;
  64	int bits, off;
  65
  66	bits = sizeof(data[0]) * 8;
  67	n = bit / bits;
  68	off = bit % bits;
  69	return (data[n] & (1 << off)) != 0;
  70}
  71
  72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  73{
  74	__u64 n;
  75	int bits, off;
  76
  77	bits = sizeof(data[0]) * 8;
  78	n = bit / bits;
  79	off = bit % bits;
  80	data[n] |= (1 << off);
  81}
  82/*End stuff from ubd_user.h*/
  83
  84#define DRIVER_NAME "uml-blkdev"
  85
  86static DEFINE_MUTEX(ubd_lock);
  87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
  88
  89static int ubd_open(struct block_device *bdev, fmode_t mode);
  90static int ubd_release(struct gendisk *disk, fmode_t mode);
  91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
  92		     unsigned int cmd, unsigned long arg);
  93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
  94
  95#define MAX_DEV (16)
  96
  97static const struct block_device_operations ubd_blops = {
  98        .owner		= THIS_MODULE,
  99        .open		= ubd_open,
 100        .release	= ubd_release,
 101        .ioctl		= ubd_ioctl,
 102	.getgeo		= ubd_getgeo,
 103};
 104
 105/* Protected by ubd_lock */
 106static int fake_major = UBD_MAJOR;
 107static struct gendisk *ubd_gendisk[MAX_DEV];
 108static struct gendisk *fake_gendisk[MAX_DEV];
 109
 110#ifdef CONFIG_BLK_DEV_UBD_SYNC
 111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 112					 .cl = 1 })
 113#else
 114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 115					 .cl = 1 })
 116#endif
 117static struct openflags global_openflags = OPEN_FLAGS;
 118
 119struct cow {
 120	/* backing file name */
 121	char *file;
 122	/* backing file fd */
 123	int fd;
 124	unsigned long *bitmap;
 125	unsigned long bitmap_len;
 126	int bitmap_offset;
 127	int data_offset;
 128};
 129
 130#define MAX_SG 64
 131
 132struct ubd {
 133	struct list_head restart;
 134	/* name (and fd, below) of the file opened for writing, either the
 135	 * backing or the cow file. */
 136	char *file;
 137	int count;
 138	int fd;
 139	__u64 size;
 140	struct openflags boot_openflags;
 141	struct openflags openflags;
 142	unsigned shared:1;
 143	unsigned no_cow:1;
 
 144	struct cow cow;
 145	struct platform_device pdev;
 146	struct request_queue *queue;
 
 147	spinlock_t lock;
 148	struct scatterlist sg[MAX_SG];
 149	struct request *request;
 150	int start_sg, end_sg;
 151	sector_t rq_pos;
 152};
 153
 154#define DEFAULT_COW { \
 155	.file =			NULL, \
 156	.fd =			-1,	\
 157	.bitmap =		NULL, \
 158	.bitmap_offset =	0, \
 159	.data_offset =		0, \
 160}
 161
 162#define DEFAULT_UBD { \
 163	.file = 		NULL, \
 164	.count =		0, \
 165	.fd =			-1, \
 166	.size =			-1, \
 167	.boot_openflags =	OPEN_FLAGS, \
 168	.openflags =		OPEN_FLAGS, \
 169	.no_cow =               0, \
 
 170	.shared =		0, \
 171	.cow =			DEFAULT_COW, \
 172	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 173	.request =		NULL, \
 174	.start_sg =		0, \
 175	.end_sg =		0, \
 176	.rq_pos =		0, \
 177}
 178
 179/* Protected by ubd_lock */
 180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 181
 182/* Only changed by fake_ide_setup which is a setup */
 183static int fake_ide = 0;
 184static struct proc_dir_entry *proc_ide_root = NULL;
 185static struct proc_dir_entry *proc_ide = NULL;
 186
 
 
 
 187static void make_proc_ide(void)
 188{
 189	proc_ide_root = proc_mkdir("ide", NULL);
 190	proc_ide = proc_mkdir("ide0", proc_ide_root);
 191}
 192
 193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
 194{
 195	seq_puts(m, "disk\n");
 196	return 0;
 197}
 198
 199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
 200{
 201	return single_open(file, fake_ide_media_proc_show, NULL);
 202}
 203
 204static const struct file_operations fake_ide_media_proc_fops = {
 205	.owner		= THIS_MODULE,
 206	.open		= fake_ide_media_proc_open,
 207	.read		= seq_read,
 208	.llseek		= seq_lseek,
 209	.release	= single_release,
 210};
 211
 212static void make_ide_entries(const char *dev_name)
 213{
 214	struct proc_dir_entry *dir, *ent;
 215	char name[64];
 216
 217	if(proc_ide_root == NULL) make_proc_ide();
 218
 219	dir = proc_mkdir(dev_name, proc_ide);
 220	if(!dir) return;
 221
 222	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
 
 223	if(!ent) return;
 224	snprintf(name, sizeof(name), "ide0/%s", dev_name);
 225	proc_symlink(dev_name, proc_ide_root, name);
 226}
 227
 228static int fake_ide_setup(char *str)
 229{
 230	fake_ide = 1;
 231	return 1;
 232}
 233
 234__setup("fake_ide", fake_ide_setup);
 235
 236__uml_help(fake_ide_setup,
 237"fake_ide\n"
 238"    Create ide0 entries that map onto ubd devices.\n\n"
 239);
 240
 241static int parse_unit(char **ptr)
 242{
 243	char *str = *ptr, *end;
 244	int n = -1;
 245
 246	if(isdigit(*str)) {
 247		n = simple_strtoul(str, &end, 0);
 248		if(end == str)
 249			return -1;
 250		*ptr = end;
 251	}
 252	else if (('a' <= *str) && (*str <= 'z')) {
 253		n = *str - 'a';
 254		str++;
 255		*ptr = str;
 256	}
 257	return n;
 258}
 259
 260/* If *index_out == -1 at exit, the passed option was a general one;
 261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 262 * should not be freed on exit.
 263 */
 264static int ubd_setup_common(char *str, int *index_out, char **error_out)
 265{
 266	struct ubd *ubd_dev;
 267	struct openflags flags = global_openflags;
 268	char *backing_file;
 269	int n, err = 0, i;
 270
 271	if(index_out) *index_out = -1;
 272	n = *str;
 273	if(n == '='){
 274		char *end;
 275		int major;
 276
 277		str++;
 278		if(!strcmp(str, "sync")){
 279			global_openflags = of_sync(global_openflags);
 280			goto out1;
 281		}
 282
 283		err = -EINVAL;
 284		major = simple_strtoul(str, &end, 0);
 285		if((*end != '\0') || (end == str)){
 286			*error_out = "Didn't parse major number";
 287			goto out1;
 288		}
 289
 290		mutex_lock(&ubd_lock);
 291		if (fake_major != UBD_MAJOR) {
 292			*error_out = "Can't assign a fake major twice";
 293			goto out1;
 294		}
 295
 296		fake_major = major;
 297
 298		printk(KERN_INFO "Setting extra ubd major number to %d\n",
 299		       major);
 300		err = 0;
 301	out1:
 302		mutex_unlock(&ubd_lock);
 303		return err;
 304	}
 305
 306	n = parse_unit(&str);
 307	if(n < 0){
 308		*error_out = "Couldn't parse device number";
 309		return -EINVAL;
 310	}
 311	if(n >= MAX_DEV){
 312		*error_out = "Device number out of range";
 313		return 1;
 314	}
 315
 316	err = -EBUSY;
 317	mutex_lock(&ubd_lock);
 318
 319	ubd_dev = &ubd_devs[n];
 320	if(ubd_dev->file != NULL){
 321		*error_out = "Device is already configured";
 322		goto out;
 323	}
 324
 325	if (index_out)
 326		*index_out = n;
 327
 328	err = -EINVAL;
 329	for (i = 0; i < sizeof("rscd="); i++) {
 330		switch (*str) {
 331		case 'r':
 332			flags.w = 0;
 333			break;
 334		case 's':
 335			flags.s = 1;
 336			break;
 337		case 'd':
 338			ubd_dev->no_cow = 1;
 339			break;
 340		case 'c':
 341			ubd_dev->shared = 1;
 342			break;
 
 
 
 343		case '=':
 344			str++;
 345			goto break_loop;
 346		default:
 347			*error_out = "Expected '=' or flag letter "
 348				"(r, s, c, or d)";
 349			goto out;
 350		}
 351		str++;
 352	}
 353
 354	if (*str == '=')
 355		*error_out = "Too many flags specified";
 356	else
 357		*error_out = "Missing '='";
 358	goto out;
 359
 360break_loop:
 361	backing_file = strchr(str, ',');
 362
 363	if (backing_file == NULL)
 364		backing_file = strchr(str, ':');
 365
 366	if(backing_file != NULL){
 367		if(ubd_dev->no_cow){
 368			*error_out = "Can't specify both 'd' and a cow file";
 369			goto out;
 370		}
 371		else {
 372			*backing_file = '\0';
 373			backing_file++;
 374		}
 375	}
 376	err = 0;
 377	ubd_dev->file = str;
 378	ubd_dev->cow.file = backing_file;
 379	ubd_dev->boot_openflags = flags;
 380out:
 381	mutex_unlock(&ubd_lock);
 382	return err;
 383}
 384
 385static int ubd_setup(char *str)
 386{
 387	char *error;
 388	int err;
 389
 390	err = ubd_setup_common(str, NULL, &error);
 391	if(err)
 392		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 393		       "%s\n", str, error);
 394	return 1;
 395}
 396
 397__setup("ubd", ubd_setup);
 398__uml_help(ubd_setup,
 399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
 400"    This is used to associate a device with a file in the underlying\n"
 401"    filesystem. When specifying two filenames, the first one is the\n"
 402"    COW name and the second is the backing file name. As separator you can\n"
 403"    use either a ':' or a ',': the first one allows writing things like;\n"
 404"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 405"    while with a ',' the shell would not expand the 2nd '~'.\n"
 406"    When using only one filename, UML will detect whether to treat it like\n"
 407"    a COW file or a backing file. To override this detection, add the 'd'\n"
 408"    flag:\n"
 409"	ubd0d=BackingFile\n"
 410"    Usually, there is a filesystem in the file, but \n"
 411"    that's not required. Swap devices containing swap files can be\n"
 412"    specified like this. Also, a file which doesn't contain a\n"
 413"    filesystem can have its contents read in the virtual \n"
 414"    machine by running 'dd' on the device. <n> must be in the range\n"
 415"    0 to 7. Appending an 'r' to the number will cause that device\n"
 416"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 417"    an 's' will cause data to be written to disk on the host immediately.\n"
 418"    'c' will cause the device to be treated as being shared between multiple\n"
 419"    UMLs and file locking will be turned off - this is appropriate for a\n"
 420"    cluster filesystem and inappropriate at almost all other times.\n\n"
 
 421);
 422
 423static int udb_setup(char *str)
 424{
 425	printk("udb%s specified on command line is almost certainly a ubd -> "
 426	       "udb TYPO\n", str);
 427	return 1;
 428}
 429
 430__setup("udb", udb_setup);
 431__uml_help(udb_setup,
 432"udb\n"
 433"    This option is here solely to catch ubd -> udb typos, which can be\n"
 434"    to impossible to catch visually unless you specifically look for\n"
 435"    them.  The only result of any option starting with 'udb' is an error\n"
 436"    in the boot output.\n\n"
 437);
 438
 439static void do_ubd_request(struct request_queue * q);
 440
 441/* Only changed by ubd_init, which is an initcall. */
 442static int thread_fd = -1;
 443static LIST_HEAD(restart);
 444
 445/* XXX - move this inside ubd_intr. */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 446/* Called without dev->lock held, and only in interrupt context. */
 447static void ubd_handler(void)
 448{
 449	struct io_thread_req *req;
 450	struct ubd *ubd;
 451	struct list_head *list, *next_ele;
 452	unsigned long flags;
 453	int n;
 
 454
 455	while(1){
 456		n = os_read_file(thread_fd, &req,
 457				 sizeof(struct io_thread_req *));
 458		if(n != sizeof(req)){
 
 
 
 
 
 459			if(n == -EAGAIN)
 460				break;
 461			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 462			       "err = %d\n", -n);
 463			return;
 464		}
 
 
 465
 466		blk_end_request(req->req, 0, req->length);
 467		kfree(req);
 468	}
 469	reactivate_fd(thread_fd, UBD_IRQ);
 470
 471	list_for_each_safe(list, next_ele, &restart){
 472		ubd = container_of(list, struct ubd, restart);
 473		list_del_init(&ubd->restart);
 474		spin_lock_irqsave(&ubd->lock, flags);
 475		do_ubd_request(ubd->queue);
 476		spin_unlock_irqrestore(&ubd->lock, flags);
 
 
 477	}
 478}
 479
 480static irqreturn_t ubd_intr(int irq, void *dev)
 481{
 482	ubd_handler();
 483	return IRQ_HANDLED;
 484}
 485
 486/* Only changed by ubd_init, which is an initcall. */
 487static int io_pid = -1;
 488
 489static void kill_io_thread(void)
 490{
 491	if(io_pid != -1)
 492		os_kill_process(io_pid, 1);
 493}
 494
 495__uml_exitcall(kill_io_thread);
 496
 497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 498{
 499	char *file;
 500	int fd;
 501	int err;
 502
 503	__u32 version;
 504	__u32 align;
 505	char *backing_file;
 506	time_t mtime;
 507	unsigned long long size;
 508	int sector_size;
 509	int bitmap_offset;
 510
 511	if (ubd_dev->file && ubd_dev->cow.file) {
 512		file = ubd_dev->cow.file;
 513
 514		goto out;
 515	}
 516
 517	fd = os_open_file(ubd_dev->file, global_openflags, 0);
 518	if (fd < 0)
 519		return fd;
 520
 521	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 522		&mtime, &size, &sector_size, &align, &bitmap_offset);
 523	os_close_file(fd);
 524
 525	if(err == -EINVAL)
 526		file = ubd_dev->file;
 527	else
 528		file = backing_file;
 529
 530out:
 531	return os_file_size(file, size_out);
 532}
 533
 534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 535{
 536	int err;
 537
 538	err = os_seek_file(fd, offset);
 539	if (err < 0)
 540		return err;
 541
 542	err = os_read_file(fd, buf, len);
 543	if (err < 0)
 544		return err;
 545
 546	return 0;
 547}
 548
 549static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 550{
 551	unsigned long modtime;
 552	unsigned long long actual;
 553	int err;
 554
 555	err = os_file_modtime(file, &modtime);
 556	if (err < 0) {
 557		printk(KERN_ERR "Failed to get modification time of backing "
 558		       "file \"%s\", err = %d\n", file, -err);
 559		return err;
 560	}
 561
 562	err = os_file_size(file, &actual);
 563	if (err < 0) {
 564		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 565		       "err = %d\n", file, -err);
 566		return err;
 567	}
 568
 569	if (actual != size) {
 570		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 571		 * the typecast.*/
 572		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 573		       "vs backing file\n", (unsigned long long) size, actual);
 574		return -EINVAL;
 575	}
 576	if (modtime != mtime) {
 577		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
 578		       "backing file\n", mtime, modtime);
 579		return -EINVAL;
 580	}
 581	return 0;
 582}
 583
 584static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 585{
 586	struct uml_stat buf1, buf2;
 587	int err;
 588
 589	if (from_cmdline == NULL)
 590		return 0;
 591	if (!strcmp(from_cmdline, from_cow))
 592		return 0;
 593
 594	err = os_stat_file(from_cmdline, &buf1);
 595	if (err < 0) {
 596		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 597		       -err);
 598		return 0;
 599	}
 600	err = os_stat_file(from_cow, &buf2);
 601	if (err < 0) {
 602		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 603		       -err);
 604		return 1;
 605	}
 606	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 607		return 0;
 608
 609	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 610	       "\"%s\" specified in COW header of \"%s\"\n",
 611	       from_cmdline, from_cow, cow);
 612	return 1;
 613}
 614
 615static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 616		  char **backing_file_out, int *bitmap_offset_out,
 617		  unsigned long *bitmap_len_out, int *data_offset_out,
 618		  int *create_cow_out)
 619{
 620	time_t mtime;
 621	unsigned long long size;
 622	__u32 version, align;
 623	char *backing_file;
 624	int fd, err, sectorsize, asked_switch, mode = 0644;
 625
 626	fd = os_open_file(file, *openflags, mode);
 627	if (fd < 0) {
 628		if ((fd == -ENOENT) && (create_cow_out != NULL))
 629			*create_cow_out = 1;
 630		if (!openflags->w ||
 631		    ((fd != -EROFS) && (fd != -EACCES)))
 632			return fd;
 633		openflags->w = 0;
 634		fd = os_open_file(file, *openflags, mode);
 635		if (fd < 0)
 636			return fd;
 637	}
 638
 639	if (shared)
 640		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 641	else {
 642		err = os_lock_file(fd, openflags->w);
 643		if (err < 0) {
 644			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 645			       file, -err);
 646			goto out_close;
 647		}
 648	}
 649
 650	/* Successful return case! */
 651	if (backing_file_out == NULL)
 652		return fd;
 653
 654	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 655			      &size, &sectorsize, &align, bitmap_offset_out);
 656	if (err && (*backing_file_out != NULL)) {
 657		printk(KERN_ERR "Failed to read COW header from COW file "
 658		       "\"%s\", errno = %d\n", file, -err);
 659		goto out_close;
 660	}
 661	if (err)
 662		return fd;
 663
 664	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 665					    file);
 666
 667	/* Allow switching only if no mismatch. */
 668	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 669						   mtime)) {
 670		printk(KERN_ERR "Switching backing file to '%s'\n",
 671		       *backing_file_out);
 672		err = write_cow_header(file, fd, *backing_file_out,
 673				       sectorsize, align, &size);
 674		if (err) {
 675			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 676			goto out_close;
 677		}
 678	} else {
 679		*backing_file_out = backing_file;
 680		err = backing_file_mismatch(*backing_file_out, size, mtime);
 681		if (err)
 682			goto out_close;
 683	}
 684
 685	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 686		  bitmap_len_out, data_offset_out);
 687
 688	return fd;
 689 out_close:
 690	os_close_file(fd);
 691	return err;
 692}
 693
 694static int create_cow_file(char *cow_file, char *backing_file,
 695		    struct openflags flags,
 696		    int sectorsize, int alignment, int *bitmap_offset_out,
 697		    unsigned long *bitmap_len_out, int *data_offset_out)
 698{
 699	int err, fd;
 700
 701	flags.c = 1;
 702	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 703	if (fd < 0) {
 704		err = fd;
 705		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 706		       cow_file, -err);
 707		goto out;
 708	}
 709
 710	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 711			    bitmap_offset_out, bitmap_len_out,
 712			    data_offset_out);
 713	if (!err)
 714		return fd;
 715	os_close_file(fd);
 716 out:
 717	return err;
 718}
 719
 720static void ubd_close_dev(struct ubd *ubd_dev)
 721{
 722	os_close_file(ubd_dev->fd);
 723	if(ubd_dev->cow.file == NULL)
 724		return;
 725
 726	os_close_file(ubd_dev->cow.fd);
 727	vfree(ubd_dev->cow.bitmap);
 728	ubd_dev->cow.bitmap = NULL;
 729}
 730
 731static int ubd_open_dev(struct ubd *ubd_dev)
 732{
 733	struct openflags flags;
 734	char **back_ptr;
 735	int err, create_cow, *create_ptr;
 736	int fd;
 737
 738	ubd_dev->openflags = ubd_dev->boot_openflags;
 739	create_cow = 0;
 740	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 741	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 742
 743	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 744				back_ptr, &ubd_dev->cow.bitmap_offset,
 745				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 746				create_ptr);
 747
 748	if((fd == -ENOENT) && create_cow){
 749		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 750					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
 751					  &ubd_dev->cow.bitmap_offset,
 752					  &ubd_dev->cow.bitmap_len,
 753					  &ubd_dev->cow.data_offset);
 754		if(fd >= 0){
 755			printk(KERN_INFO "Creating \"%s\" as COW file for "
 756			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 757		}
 758	}
 759
 760	if(fd < 0){
 761		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 762		       -fd);
 763		return fd;
 764	}
 765	ubd_dev->fd = fd;
 766
 767	if(ubd_dev->cow.file != NULL){
 768		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 769
 770		err = -ENOMEM;
 771		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 772		if(ubd_dev->cow.bitmap == NULL){
 773			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 774			goto error;
 775		}
 776		flush_tlb_kernel_vm();
 777
 778		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 779				      ubd_dev->cow.bitmap_offset,
 780				      ubd_dev->cow.bitmap_len);
 781		if(err < 0)
 782			goto error;
 783
 784		flags = ubd_dev->openflags;
 785		flags.w = 0;
 786		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 787				    NULL, NULL, NULL, NULL);
 788		if(err < 0) goto error;
 789		ubd_dev->cow.fd = err;
 790	}
 
 
 
 
 
 
 
 
 791	return 0;
 792 error:
 793	os_close_file(ubd_dev->fd);
 794	return err;
 795}
 796
 797static void ubd_device_release(struct device *dev)
 798{
 799	struct ubd *ubd_dev = dev_get_drvdata(dev);
 800
 801	blk_cleanup_queue(ubd_dev->queue);
 
 802	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 803}
 804
 805static int ubd_disk_register(int major, u64 size, int unit,
 806			     struct gendisk **disk_out)
 807{
 
 808	struct gendisk *disk;
 809
 810	disk = alloc_disk(1 << UBD_SHIFT);
 811	if(disk == NULL)
 812		return -ENOMEM;
 813
 814	disk->major = major;
 815	disk->first_minor = unit << UBD_SHIFT;
 816	disk->fops = &ubd_blops;
 817	set_capacity(disk, size / 512);
 818	if (major == UBD_MAJOR)
 819		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 820	else
 821		sprintf(disk->disk_name, "ubd_fake%d", unit);
 822
 823	/* sysfs register (not for ide fake devices) */
 824	if (major == UBD_MAJOR) {
 825		ubd_devs[unit].pdev.id   = unit;
 826		ubd_devs[unit].pdev.name = DRIVER_NAME;
 827		ubd_devs[unit].pdev.dev.release = ubd_device_release;
 828		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 829		platform_device_register(&ubd_devs[unit].pdev);
 830		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
 831	}
 832
 833	disk->private_data = &ubd_devs[unit];
 834	disk->queue = ubd_devs[unit].queue;
 835	add_disk(disk);
 836
 837	*disk_out = disk;
 838	return 0;
 839}
 840
 841#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
 
 
 
 
 842
 843static int ubd_add(int n, char **error_out)
 844{
 845	struct ubd *ubd_dev = &ubd_devs[n];
 846	int err = 0;
 847
 848	if(ubd_dev->file == NULL)
 849		goto out;
 850
 851	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 852	if(err < 0){
 853		*error_out = "Couldn't determine size of device's file";
 854		goto out;
 855	}
 856
 857	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 858
 859	INIT_LIST_HEAD(&ubd_dev->restart);
 860	sg_init_table(ubd_dev->sg, MAX_SG);
 
 
 
 
 861
 862	err = -ENOMEM;
 863	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
 864	if (ubd_dev->queue == NULL) {
 865		*error_out = "Failed to initialize device queue";
 866		goto out;
 
 
 
 
 
 867	}
 
 868	ubd_dev->queue->queuedata = ubd_dev;
 
 869
 870	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 871	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 872	if(err){
 873		*error_out = "Failed to register device";
 874		goto out_cleanup;
 875	}
 876
 877	if (fake_major != UBD_MAJOR)
 878		ubd_disk_register(fake_major, ubd_dev->size, n,
 879				  &fake_gendisk[n]);
 880
 881	/*
 882	 * Perhaps this should also be under the "if (fake_major)" above
 883	 * using the fake_disk->disk_name
 884	 */
 885	if (fake_ide)
 886		make_ide_entries(ubd_gendisk[n]->disk_name);
 887
 888	err = 0;
 889out:
 890	return err;
 891
 892out_cleanup:
 893	blk_cleanup_queue(ubd_dev->queue);
 
 
 894	goto out;
 895}
 896
 897static int ubd_config(char *str, char **error_out)
 898{
 899	int n, ret;
 900
 901	/* This string is possibly broken up and stored, so it's only
 902	 * freed if ubd_setup_common fails, or if only general options
 903	 * were set.
 904	 */
 905	str = kstrdup(str, GFP_KERNEL);
 906	if (str == NULL) {
 907		*error_out = "Failed to allocate memory";
 908		return -ENOMEM;
 909	}
 910
 911	ret = ubd_setup_common(str, &n, error_out);
 912	if (ret)
 913		goto err_free;
 914
 915	if (n == -1) {
 916		ret = 0;
 917		goto err_free;
 918	}
 919
 920	mutex_lock(&ubd_lock);
 921	ret = ubd_add(n, error_out);
 922	if (ret)
 923		ubd_devs[n].file = NULL;
 924	mutex_unlock(&ubd_lock);
 925
 926out:
 927	return ret;
 928
 929err_free:
 930	kfree(str);
 931	goto out;
 932}
 933
 934static int ubd_get_config(char *name, char *str, int size, char **error_out)
 935{
 936	struct ubd *ubd_dev;
 937	int n, len = 0;
 938
 939	n = parse_unit(&name);
 940	if((n >= MAX_DEV) || (n < 0)){
 941		*error_out = "ubd_get_config : device number out of range";
 942		return -1;
 943	}
 944
 945	ubd_dev = &ubd_devs[n];
 946	mutex_lock(&ubd_lock);
 947
 948	if(ubd_dev->file == NULL){
 949		CONFIG_CHUNK(str, size, len, "", 1);
 950		goto out;
 951	}
 952
 953	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 954
 955	if(ubd_dev->cow.file != NULL){
 956		CONFIG_CHUNK(str, size, len, ",", 0);
 957		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 958	}
 959	else CONFIG_CHUNK(str, size, len, "", 1);
 960
 961 out:
 962	mutex_unlock(&ubd_lock);
 963	return len;
 964}
 965
 966static int ubd_id(char **str, int *start_out, int *end_out)
 967{
 968	int n;
 969
 970	n = parse_unit(str);
 971	*start_out = 0;
 972	*end_out = MAX_DEV - 1;
 973	return n;
 974}
 975
 976static int ubd_remove(int n, char **error_out)
 977{
 978	struct gendisk *disk = ubd_gendisk[n];
 979	struct ubd *ubd_dev;
 980	int err = -ENODEV;
 981
 982	mutex_lock(&ubd_lock);
 983
 984	ubd_dev = &ubd_devs[n];
 985
 986	if(ubd_dev->file == NULL)
 987		goto out;
 988
 989	/* you cannot remove a open disk */
 990	err = -EBUSY;
 991	if(ubd_dev->count > 0)
 992		goto out;
 993
 994	ubd_gendisk[n] = NULL;
 995	if(disk != NULL){
 996		del_gendisk(disk);
 997		put_disk(disk);
 998	}
 999
1000	if(fake_gendisk[n] != NULL){
1001		del_gendisk(fake_gendisk[n]);
1002		put_disk(fake_gendisk[n]);
1003		fake_gendisk[n] = NULL;
1004	}
1005
1006	err = 0;
1007	platform_device_unregister(&ubd_dev->pdev);
1008out:
1009	mutex_unlock(&ubd_lock);
1010	return err;
1011}
1012
1013/* All these are called by mconsole in process context and without
1014 * ubd-specific locks.  The structure itself is const except for .list.
1015 */
1016static struct mc_device ubd_mc = {
1017	.list		= LIST_HEAD_INIT(ubd_mc.list),
1018	.name		= "ubd",
1019	.config		= ubd_config,
1020	.get_config	= ubd_get_config,
1021	.id		= ubd_id,
1022	.remove		= ubd_remove,
1023};
1024
1025static int __init ubd_mc_init(void)
1026{
1027	mconsole_register_dev(&ubd_mc);
1028	return 0;
1029}
1030
1031__initcall(ubd_mc_init);
1032
1033static int __init ubd0_init(void)
1034{
1035	struct ubd *ubd_dev = &ubd_devs[0];
1036
1037	mutex_lock(&ubd_lock);
1038	if(ubd_dev->file == NULL)
1039		ubd_dev->file = "root_fs";
1040	mutex_unlock(&ubd_lock);
1041
1042	return 0;
1043}
1044
1045__initcall(ubd0_init);
1046
1047/* Used in ubd_init, which is an initcall */
1048static struct platform_driver ubd_driver = {
1049	.driver = {
1050		.name  = DRIVER_NAME,
1051	},
1052};
1053
1054static int __init ubd_init(void)
1055{
1056	char *error;
1057	int i, err;
1058
1059	if (register_blkdev(UBD_MAJOR, "ubd"))
1060		return -1;
1061
1062	if (fake_major != UBD_MAJOR) {
1063		char name[sizeof("ubd_nnn\0")];
1064
1065		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1066		if (register_blkdev(fake_major, "ubd"))
1067			return -1;
1068	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1069	platform_driver_register(&ubd_driver);
1070	mutex_lock(&ubd_lock);
1071	for (i = 0; i < MAX_DEV; i++){
1072		err = ubd_add(i, &error);
1073		if(err)
1074			printk(KERN_ERR "Failed to initialize ubd device %d :"
1075			       "%s\n", i, error);
1076	}
1077	mutex_unlock(&ubd_lock);
1078	return 0;
1079}
1080
1081late_initcall(ubd_init);
1082
1083static int __init ubd_driver_init(void){
1084	unsigned long stack;
1085	int err;
1086
1087	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1088	if(global_openflags.s){
1089		printk(KERN_INFO "ubd: Synchronous mode\n");
1090		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1091		 * enough. So use anyway the io thread. */
1092	}
1093	stack = alloc_stack(0, 0);
1094	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1095				 &thread_fd);
1096	if(io_pid < 0){
1097		printk(KERN_ERR
1098		       "ubd : Failed to start I/O thread (errno = %d) - "
1099		       "falling back to synchronous I/O\n", -io_pid);
1100		io_pid = -1;
1101		return 0;
1102	}
1103	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1104			     0, "ubd", ubd_devs);
1105	if(err != 0)
1106		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1107	return 0;
1108}
1109
1110device_initcall(ubd_driver_init);
1111
1112static int ubd_open(struct block_device *bdev, fmode_t mode)
1113{
1114	struct gendisk *disk = bdev->bd_disk;
1115	struct ubd *ubd_dev = disk->private_data;
1116	int err = 0;
1117
1118	mutex_lock(&ubd_mutex);
1119	if(ubd_dev->count == 0){
1120		err = ubd_open_dev(ubd_dev);
1121		if(err){
1122			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1123			       disk->disk_name, ubd_dev->file, -err);
1124			goto out;
1125		}
1126	}
1127	ubd_dev->count++;
1128	set_disk_ro(disk, !ubd_dev->openflags.w);
1129
1130	/* This should no more be needed. And it didn't work anyway to exclude
1131	 * read-write remounting of filesystems.*/
1132	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1133	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1134	        err = -EROFS;
1135	}*/
1136out:
1137	mutex_unlock(&ubd_mutex);
1138	return err;
1139}
1140
1141static int ubd_release(struct gendisk *disk, fmode_t mode)
1142{
1143	struct ubd *ubd_dev = disk->private_data;
1144
1145	mutex_lock(&ubd_mutex);
1146	if(--ubd_dev->count == 0)
1147		ubd_close_dev(ubd_dev);
1148	mutex_unlock(&ubd_mutex);
1149	return 0;
1150}
1151
1152static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1153			  __u64 *cow_offset, unsigned long *bitmap,
1154			  __u64 bitmap_offset, unsigned long *bitmap_words,
1155			  __u64 bitmap_len)
1156{
1157	__u64 sector = io_offset >> 9;
1158	int i, update_bitmap = 0;
1159
1160	for(i = 0; i < length >> 9; i++){
1161		if(cow_mask != NULL)
1162			ubd_set_bit(i, (unsigned char *) cow_mask);
1163		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1164			continue;
1165
1166		update_bitmap = 1;
1167		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1168	}
1169
1170	if(!update_bitmap)
1171		return;
1172
1173	*cow_offset = sector / (sizeof(unsigned long) * 8);
1174
1175	/* This takes care of the case where we're exactly at the end of the
1176	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1177	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1178	 * for the original diagnosis.
1179	 */
1180	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1181					 sizeof(unsigned long)) - 1))
1182		(*cow_offset)--;
1183
1184	bitmap_words[0] = bitmap[*cow_offset];
1185	bitmap_words[1] = bitmap[*cow_offset + 1];
1186
1187	*cow_offset *= sizeof(unsigned long);
1188	*cow_offset += bitmap_offset;
1189}
1190
1191static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1192		       __u64 bitmap_offset, __u64 bitmap_len)
1193{
1194	__u64 sector = req->offset >> 9;
1195	int i;
1196
1197	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1198		panic("Operation too long");
1199
1200	if(req->op == UBD_READ) {
1201		for(i = 0; i < req->length >> 9; i++){
1202			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1203				ubd_set_bit(i, (unsigned char *)
1204					    &req->sector_mask);
1205		}
1206	}
1207	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1208			   &req->cow_offset, bitmap, bitmap_offset,
1209			   req->bitmap_words, bitmap_len);
1210}
1211
1212/* Called with dev->lock held */
1213static void prepare_request(struct request *req, struct io_thread_req *io_req,
1214			    unsigned long long offset, int page_offset,
1215			    int len, struct page *page)
1216{
1217	struct gendisk *disk = req->rq_disk;
1218	struct ubd *ubd_dev = disk->private_data;
 
 
 
 
 
1219
1220	io_req->req = req;
1221	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1222		ubd_dev->fd;
1223	io_req->fds[1] = ubd_dev->fd;
1224	io_req->cow_offset = -1;
1225	io_req->offset = offset;
1226	io_req->length = len;
1227	io_req->error = 0;
1228	io_req->sector_mask = 0;
1229
1230	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1231	io_req->offsets[0] = 0;
1232	io_req->offsets[1] = ubd_dev->cow.data_offset;
1233	io_req->buffer = page_address(page) + page_offset;
1234	io_req->sectorsize = 1 << 9;
 
 
1235
1236	if(ubd_dev->cow.file != NULL)
1237		cowify_req(io_req, ubd_dev->cow.bitmap,
1238			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
 
 
 
 
1239
 
 
 
 
 
 
 
 
 
 
 
1240}
1241
1242/* Called with dev->lock held */
1243static void do_ubd_request(struct request_queue *q)
1244{
1245	struct io_thread_req *io_req;
1246	struct request *req;
1247	int n;
1248
1249	while(1){
1250		struct ubd *dev = q->queuedata;
1251		if(dev->end_sg == 0){
1252			struct request *req = blk_fetch_request(q);
1253			if(req == NULL)
1254				return;
1255
1256			dev->request = req;
1257			dev->rq_pos = blk_rq_pos(req);
1258			dev->start_sg = 0;
1259			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1260		}
1261
1262		req = dev->request;
1263		while(dev->start_sg < dev->end_sg){
1264			struct scatterlist *sg = &dev->sg[dev->start_sg];
1265
1266			io_req = kmalloc(sizeof(struct io_thread_req),
1267					 GFP_ATOMIC);
1268			if(io_req == NULL){
1269				if(list_empty(&dev->restart))
1270					list_add(&dev->restart, &restart);
1271				return;
1272			}
1273			prepare_request(req, io_req,
1274					(unsigned long long)dev->rq_pos << 9,
1275					sg->offset, sg->length, sg_page(sg));
1276
1277			n = os_write_file(thread_fd, &io_req,
1278					  sizeof(struct io_thread_req *));
1279			if(n != sizeof(struct io_thread_req *)){
1280				if(n != -EAGAIN)
1281					printk("write to io thread failed, "
1282					       "errno = %d\n", -n);
1283				else if(list_empty(&dev->restart))
1284					list_add(&dev->restart, &restart);
1285				kfree(io_req);
1286				return;
1287			}
1288
1289			dev->rq_pos += sg->length >> 9;
1290			dev->start_sg++;
1291		}
1292		dev->end_sg = 0;
1293		dev->request = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1294	}
 
 
1295}
1296
1297static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1298{
1299	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1300
1301	geo->heads = 128;
1302	geo->sectors = 32;
1303	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1304	return 0;
1305}
1306
1307static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1308		     unsigned int cmd, unsigned long arg)
1309{
1310	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1311	u16 ubd_id[ATA_ID_WORDS];
1312
1313	switch (cmd) {
1314		struct cdrom_volctrl volume;
1315	case HDIO_GET_IDENTITY:
1316		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1317		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1318		ubd_id[ATA_ID_HEADS]	= 128;
1319		ubd_id[ATA_ID_SECTORS]	= 32;
1320		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1321				 sizeof(ubd_id)))
1322			return -EFAULT;
1323		return 0;
1324
1325	case CDROMVOLREAD:
1326		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1327			return -EFAULT;
1328		volume.channel0 = 255;
1329		volume.channel1 = 255;
1330		volume.channel2 = 255;
1331		volume.channel3 = 255;
1332		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1333			return -EFAULT;
1334		return 0;
1335	}
1336	return -EINVAL;
1337}
1338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1339static int update_bitmap(struct io_thread_req *req)
1340{
1341	int n;
1342
1343	if(req->cow_offset == -1)
1344		return 0;
1345
1346	n = os_seek_file(req->fds[1], req->cow_offset);
1347	if(n < 0){
1348		printk("do_io - bitmap lseek failed : err = %d\n", -n);
1349		return 1;
1350	}
1351
1352	n = os_write_file(req->fds[1], &req->bitmap_words,
1353			  sizeof(req->bitmap_words));
1354	if(n != sizeof(req->bitmap_words)){
1355		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1356		       req->fds[1]);
1357		return 1;
1358	}
1359
1360	return 0;
1361}
1362
1363static void do_io(struct io_thread_req *req)
1364{
1365	char *buf;
1366	unsigned long len;
1367	int n, nsectors, start, end, bit;
1368	int err;
1369	__u64 off;
1370
 
 
 
 
 
 
 
 
1371	nsectors = req->length / req->sectorsize;
1372	start = 0;
1373	do {
1374		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1375		end = start;
1376		while((end < nsectors) &&
1377		      (ubd_test_bit(end, (unsigned char *)
1378				    &req->sector_mask) == bit))
1379			end++;
1380
1381		off = req->offset + req->offsets[bit] +
1382			start * req->sectorsize;
1383		len = (end - start) * req->sectorsize;
1384		buf = &req->buffer[start * req->sectorsize];
 
1385
1386		err = os_seek_file(req->fds[bit], off);
1387		if(err < 0){
1388			printk("do_io - lseek failed : err = %d\n", -err);
1389			req->error = 1;
1390			return;
1391		}
1392		if(req->op == UBD_READ){
1393			n = 0;
1394			do {
1395				buf = &buf[n];
1396				len -= n;
1397				n = os_read_file(req->fds[bit], buf, len);
1398				if (n < 0) {
1399					printk("do_io - read failed, err = %d "
1400					       "fd = %d\n", -n, req->fds[bit]);
1401					req->error = 1;
1402					return;
1403				}
1404			} while((n < len) && (n != 0));
1405			if (n < len) memset(&buf[n], 0, len - n);
1406		} else {
1407			n = os_write_file(req->fds[bit], buf, len);
 
1408			if(n != len){
1409				printk("do_io - write failed err = %d "
1410				       "fd = %d\n", -n, req->fds[bit]);
1411				req->error = 1;
1412				return;
1413			}
 
 
 
 
 
 
 
 
 
 
 
 
 
1414		}
1415
1416		start = end;
1417	} while(start < nsectors);
1418
1419	req->error = update_bitmap(req);
1420}
1421
1422/* Changed in start_io_thread, which is serialized by being called only
1423 * from ubd_init, which is an initcall.
1424 */
1425int kernel_fd = -1;
1426
1427/* Only changed by the io thread. XXX: currently unused. */
1428static int io_count = 0;
1429
1430int io_thread(void *arg)
1431{
1432	struct io_thread_req *req;
1433	int n;
 
1434
1435	ignore_sigwinch_sig();
1436	while(1){
1437		n = os_read_file(kernel_fd, &req,
1438				 sizeof(struct io_thread_req *));
1439		if(n != sizeof(struct io_thread_req *)){
1440			if(n < 0)
1441				printk("io_thread - read failed, fd = %d, "
1442				       "err = %d\n", kernel_fd, -n);
1443			else {
1444				printk("io_thread - short read, fd = %d, "
1445				       "length = %d\n", kernel_fd, n);
 
 
1446			}
1447			continue;
1448		}
1449		io_count++;
1450		do_io(req);
1451		n = os_write_file(kernel_fd, &req,
1452				  sizeof(struct io_thread_req *));
1453		if(n != sizeof(struct io_thread_req *))
1454			printk("io_thread - write failed, fd = %d, err = %d\n",
1455			       kernel_fd, -n);
 
 
 
 
 
 
 
 
 
 
1456	}
1457
1458	return 0;
1459}
v5.4
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Cambridge Greys Ltd
   4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
 
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
  30#include <linux/cdrom.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/ctype.h>
  34#include <linux/slab.h>
  35#include <linux/vmalloc.h>
  36#include <linux/platform_device.h>
  37#include <linux/scatterlist.h>
  38#include <asm/tlbflush.h>
  39#include <kern_util.h>
  40#include "mconsole_kern.h"
  41#include <init.h>
  42#include <irq_kern.h>
  43#include "ubd.h"
  44#include <os.h>
  45#include "cow.h"
  46
  47/* Max request size is determined by sector mask - 32K */
  48#define UBD_MAX_REQUEST (8 * sizeof(long))
  49
  50struct io_thread_req {
  51	struct request *req;
 
  52	int fds[2];
  53	unsigned long offsets[2];
  54	unsigned long long offset;
  55	unsigned long length;
  56	char *buffer;
  57	int sectorsize;
  58	unsigned long sector_mask;
  59	unsigned long long cow_offset;
  60	unsigned long bitmap_words[2];
  61	int error;
  62};
  63
  64
  65static struct io_thread_req * (*irq_req_buffer)[];
  66static struct io_thread_req *irq_remainder;
  67static int irq_remainder_size;
  68
  69static struct io_thread_req * (*io_req_buffer)[];
  70static struct io_thread_req *io_remainder;
  71static int io_remainder_size;
  72
  73
  74
  75static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  76{
  77	__u64 n;
  78	int bits, off;
  79
  80	bits = sizeof(data[0]) * 8;
  81	n = bit / bits;
  82	off = bit % bits;
  83	return (data[n] & (1 << off)) != 0;
  84}
  85
  86static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  87{
  88	__u64 n;
  89	int bits, off;
  90
  91	bits = sizeof(data[0]) * 8;
  92	n = bit / bits;
  93	off = bit % bits;
  94	data[n] |= (1 << off);
  95}
  96/*End stuff from ubd_user.h*/
  97
  98#define DRIVER_NAME "uml-blkdev"
  99
 100static DEFINE_MUTEX(ubd_lock);
 101static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 102
 103static int ubd_open(struct block_device *bdev, fmode_t mode);
 104static void ubd_release(struct gendisk *disk, fmode_t mode);
 105static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 106		     unsigned int cmd, unsigned long arg);
 107static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 108
 109#define MAX_DEV (16)
 110
 111static const struct block_device_operations ubd_blops = {
 112        .owner		= THIS_MODULE,
 113        .open		= ubd_open,
 114        .release	= ubd_release,
 115        .ioctl		= ubd_ioctl,
 116	.getgeo		= ubd_getgeo,
 117};
 118
 119/* Protected by ubd_lock */
 120static int fake_major = UBD_MAJOR;
 121static struct gendisk *ubd_gendisk[MAX_DEV];
 122static struct gendisk *fake_gendisk[MAX_DEV];
 123
 124#ifdef CONFIG_BLK_DEV_UBD_SYNC
 125#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 126					 .cl = 1 })
 127#else
 128#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 129					 .cl = 1 })
 130#endif
 131static struct openflags global_openflags = OPEN_FLAGS;
 132
 133struct cow {
 134	/* backing file name */
 135	char *file;
 136	/* backing file fd */
 137	int fd;
 138	unsigned long *bitmap;
 139	unsigned long bitmap_len;
 140	int bitmap_offset;
 141	int data_offset;
 142};
 143
 144#define MAX_SG 64
 145
 146struct ubd {
 
 147	/* name (and fd, below) of the file opened for writing, either the
 148	 * backing or the cow file. */
 149	char *file;
 150	int count;
 151	int fd;
 152	__u64 size;
 153	struct openflags boot_openflags;
 154	struct openflags openflags;
 155	unsigned shared:1;
 156	unsigned no_cow:1;
 157	unsigned no_trim:1;
 158	struct cow cow;
 159	struct platform_device pdev;
 160	struct request_queue *queue;
 161	struct blk_mq_tag_set tag_set;
 162	spinlock_t lock;
 
 
 
 
 163};
 164
 165#define DEFAULT_COW { \
 166	.file =			NULL, \
 167	.fd =			-1,	\
 168	.bitmap =		NULL, \
 169	.bitmap_offset =	0, \
 170	.data_offset =		0, \
 171}
 172
 173#define DEFAULT_UBD { \
 174	.file = 		NULL, \
 175	.count =		0, \
 176	.fd =			-1, \
 177	.size =			-1, \
 178	.boot_openflags =	OPEN_FLAGS, \
 179	.openflags =		OPEN_FLAGS, \
 180	.no_cow =               0, \
 181	.no_trim =		0, \
 182	.shared =		0, \
 183	.cow =			DEFAULT_COW, \
 184	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 
 
 
 
 185}
 186
 187/* Protected by ubd_lock */
 188static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 189
 190/* Only changed by fake_ide_setup which is a setup */
 191static int fake_ide = 0;
 192static struct proc_dir_entry *proc_ide_root = NULL;
 193static struct proc_dir_entry *proc_ide = NULL;
 194
 195static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 196				 const struct blk_mq_queue_data *bd);
 197
 198static void make_proc_ide(void)
 199{
 200	proc_ide_root = proc_mkdir("ide", NULL);
 201	proc_ide = proc_mkdir("ide0", proc_ide_root);
 202}
 203
 204static int fake_ide_media_proc_show(struct seq_file *m, void *v)
 205{
 206	seq_puts(m, "disk\n");
 207	return 0;
 208}
 209
 
 
 
 
 
 
 
 
 
 
 
 
 
 210static void make_ide_entries(const char *dev_name)
 211{
 212	struct proc_dir_entry *dir, *ent;
 213	char name[64];
 214
 215	if(proc_ide_root == NULL) make_proc_ide();
 216
 217	dir = proc_mkdir(dev_name, proc_ide);
 218	if(!dir) return;
 219
 220	ent = proc_create_single("media", S_IRUGO, dir,
 221			fake_ide_media_proc_show);
 222	if(!ent) return;
 223	snprintf(name, sizeof(name), "ide0/%s", dev_name);
 224	proc_symlink(dev_name, proc_ide_root, name);
 225}
 226
 227static int fake_ide_setup(char *str)
 228{
 229	fake_ide = 1;
 230	return 1;
 231}
 232
 233__setup("fake_ide", fake_ide_setup);
 234
 235__uml_help(fake_ide_setup,
 236"fake_ide\n"
 237"    Create ide0 entries that map onto ubd devices.\n\n"
 238);
 239
 240static int parse_unit(char **ptr)
 241{
 242	char *str = *ptr, *end;
 243	int n = -1;
 244
 245	if(isdigit(*str)) {
 246		n = simple_strtoul(str, &end, 0);
 247		if(end == str)
 248			return -1;
 249		*ptr = end;
 250	}
 251	else if (('a' <= *str) && (*str <= 'z')) {
 252		n = *str - 'a';
 253		str++;
 254		*ptr = str;
 255	}
 256	return n;
 257}
 258
 259/* If *index_out == -1 at exit, the passed option was a general one;
 260 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 261 * should not be freed on exit.
 262 */
 263static int ubd_setup_common(char *str, int *index_out, char **error_out)
 264{
 265	struct ubd *ubd_dev;
 266	struct openflags flags = global_openflags;
 267	char *backing_file;
 268	int n, err = 0, i;
 269
 270	if(index_out) *index_out = -1;
 271	n = *str;
 272	if(n == '='){
 273		char *end;
 274		int major;
 275
 276		str++;
 277		if(!strcmp(str, "sync")){
 278			global_openflags = of_sync(global_openflags);
 279			return err;
 280		}
 281
 282		err = -EINVAL;
 283		major = simple_strtoul(str, &end, 0);
 284		if((*end != '\0') || (end == str)){
 285			*error_out = "Didn't parse major number";
 286			return err;
 287		}
 288
 289		mutex_lock(&ubd_lock);
 290		if (fake_major != UBD_MAJOR) {
 291			*error_out = "Can't assign a fake major twice";
 292			goto out1;
 293		}
 294
 295		fake_major = major;
 296
 297		printk(KERN_INFO "Setting extra ubd major number to %d\n",
 298		       major);
 299		err = 0;
 300	out1:
 301		mutex_unlock(&ubd_lock);
 302		return err;
 303	}
 304
 305	n = parse_unit(&str);
 306	if(n < 0){
 307		*error_out = "Couldn't parse device number";
 308		return -EINVAL;
 309	}
 310	if(n >= MAX_DEV){
 311		*error_out = "Device number out of range";
 312		return 1;
 313	}
 314
 315	err = -EBUSY;
 316	mutex_lock(&ubd_lock);
 317
 318	ubd_dev = &ubd_devs[n];
 319	if(ubd_dev->file != NULL){
 320		*error_out = "Device is already configured";
 321		goto out;
 322	}
 323
 324	if (index_out)
 325		*index_out = n;
 326
 327	err = -EINVAL;
 328	for (i = 0; i < sizeof("rscdt="); i++) {
 329		switch (*str) {
 330		case 'r':
 331			flags.w = 0;
 332			break;
 333		case 's':
 334			flags.s = 1;
 335			break;
 336		case 'd':
 337			ubd_dev->no_cow = 1;
 338			break;
 339		case 'c':
 340			ubd_dev->shared = 1;
 341			break;
 342		case 't':
 343			ubd_dev->no_trim = 1;
 344			break;
 345		case '=':
 346			str++;
 347			goto break_loop;
 348		default:
 349			*error_out = "Expected '=' or flag letter "
 350				"(r, s, c, t or d)";
 351			goto out;
 352		}
 353		str++;
 354	}
 355
 356	if (*str == '=')
 357		*error_out = "Too many flags specified";
 358	else
 359		*error_out = "Missing '='";
 360	goto out;
 361
 362break_loop:
 363	backing_file = strchr(str, ',');
 364
 365	if (backing_file == NULL)
 366		backing_file = strchr(str, ':');
 367
 368	if(backing_file != NULL){
 369		if(ubd_dev->no_cow){
 370			*error_out = "Can't specify both 'd' and a cow file";
 371			goto out;
 372		}
 373		else {
 374			*backing_file = '\0';
 375			backing_file++;
 376		}
 377	}
 378	err = 0;
 379	ubd_dev->file = str;
 380	ubd_dev->cow.file = backing_file;
 381	ubd_dev->boot_openflags = flags;
 382out:
 383	mutex_unlock(&ubd_lock);
 384	return err;
 385}
 386
 387static int ubd_setup(char *str)
 388{
 389	char *error;
 390	int err;
 391
 392	err = ubd_setup_common(str, NULL, &error);
 393	if(err)
 394		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 395		       "%s\n", str, error);
 396	return 1;
 397}
 398
 399__setup("ubd", ubd_setup);
 400__uml_help(ubd_setup,
 401"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
 402"    This is used to associate a device with a file in the underlying\n"
 403"    filesystem. When specifying two filenames, the first one is the\n"
 404"    COW name and the second is the backing file name. As separator you can\n"
 405"    use either a ':' or a ',': the first one allows writing things like;\n"
 406"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 407"    while with a ',' the shell would not expand the 2nd '~'.\n"
 408"    When using only one filename, UML will detect whether to treat it like\n"
 409"    a COW file or a backing file. To override this detection, add the 'd'\n"
 410"    flag:\n"
 411"	ubd0d=BackingFile\n"
 412"    Usually, there is a filesystem in the file, but \n"
 413"    that's not required. Swap devices containing swap files can be\n"
 414"    specified like this. Also, a file which doesn't contain a\n"
 415"    filesystem can have its contents read in the virtual \n"
 416"    machine by running 'dd' on the device. <n> must be in the range\n"
 417"    0 to 7. Appending an 'r' to the number will cause that device\n"
 418"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 419"    an 's' will cause data to be written to disk on the host immediately.\n"
 420"    'c' will cause the device to be treated as being shared between multiple\n"
 421"    UMLs and file locking will be turned off - this is appropriate for a\n"
 422"    cluster filesystem and inappropriate at almost all other times.\n\n"
 423"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 424);
 425
 426static int udb_setup(char *str)
 427{
 428	printk("udb%s specified on command line is almost certainly a ubd -> "
 429	       "udb TYPO\n", str);
 430	return 1;
 431}
 432
 433__setup("udb", udb_setup);
 434__uml_help(udb_setup,
 435"udb\n"
 436"    This option is here solely to catch ubd -> udb typos, which can be\n"
 437"    to impossible to catch visually unless you specifically look for\n"
 438"    them.  The only result of any option starting with 'udb' is an error\n"
 439"    in the boot output.\n\n"
 440);
 441
 
 
 442/* Only changed by ubd_init, which is an initcall. */
 443static int thread_fd = -1;
 
 444
 445/* Function to read several request pointers at a time
 446* handling fractional reads if (and as) needed
 447*/
 448
 449static int bulk_req_safe_read(
 450	int fd,
 451	struct io_thread_req * (*request_buffer)[],
 452	struct io_thread_req **remainder,
 453	int *remainder_size,
 454	int max_recs
 455	)
 456{
 457	int n = 0;
 458	int res = 0;
 459
 460	if (*remainder_size > 0) {
 461		memmove(
 462			(char *) request_buffer,
 463			(char *) remainder, *remainder_size
 464		);
 465		n = *remainder_size;
 466	}
 467
 468	res = os_read_file(
 469			fd,
 470			((char *) request_buffer) + *remainder_size,
 471			sizeof(struct io_thread_req *)*max_recs
 472				- *remainder_size
 473		);
 474	if (res > 0) {
 475		n += res;
 476		if ((n % sizeof(struct io_thread_req *)) > 0) {
 477			/*
 478			* Read somehow returned not a multiple of dword
 479			* theoretically possible, but never observed in the
 480			* wild, so read routine must be able to handle it
 481			*/
 482			*remainder_size = n % sizeof(struct io_thread_req *);
 483			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 484			memmove(
 485				remainder,
 486				((char *) request_buffer) +
 487					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 488				*remainder_size
 489			);
 490			n = n - *remainder_size;
 491		}
 492	} else {
 493		n = res;
 494	}
 495	return n;
 496}
 497
 498/* Called without dev->lock held, and only in interrupt context. */
 499static void ubd_handler(void)
 500{
 
 
 
 
 501	int n;
 502	int count;
 503
 504	while(1){
 505		n = bulk_req_safe_read(
 506			thread_fd,
 507			irq_req_buffer,
 508			&irq_remainder,
 509			&irq_remainder_size,
 510			UBD_REQ_BUFFER_SIZE
 511		);
 512		if (n < 0) {
 513			if(n == -EAGAIN)
 514				break;
 515			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 516			       "err = %d\n", -n);
 517			return;
 518		}
 519		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 520			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 521
 522			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 523				blk_queue_max_discard_sectors(io_req->req->q, 0);
 524				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 525				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
 526			}
 527			if ((io_req->error) || (io_req->buffer == NULL))
 528				blk_mq_end_request(io_req->req, io_req->error);
 529			else {
 530				if (!blk_update_request(io_req->req, io_req->error, io_req->length))
 531					__blk_mq_end_request(io_req->req, io_req->error);
 532			}
 533			kfree(io_req);
 534		}
 535	}
 536}
 537
 538static irqreturn_t ubd_intr(int irq, void *dev)
 539{
 540	ubd_handler();
 541	return IRQ_HANDLED;
 542}
 543
 544/* Only changed by ubd_init, which is an initcall. */
 545static int io_pid = -1;
 546
 547static void kill_io_thread(void)
 548{
 549	if(io_pid != -1)
 550		os_kill_process(io_pid, 1);
 551}
 552
 553__uml_exitcall(kill_io_thread);
 554
 555static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 556{
 557	char *file;
 558	int fd;
 559	int err;
 560
 561	__u32 version;
 562	__u32 align;
 563	char *backing_file;
 564	time_t mtime;
 565	unsigned long long size;
 566	int sector_size;
 567	int bitmap_offset;
 568
 569	if (ubd_dev->file && ubd_dev->cow.file) {
 570		file = ubd_dev->cow.file;
 571
 572		goto out;
 573	}
 574
 575	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 576	if (fd < 0)
 577		return fd;
 578
 579	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 580		&mtime, &size, &sector_size, &align, &bitmap_offset);
 581	os_close_file(fd);
 582
 583	if(err == -EINVAL)
 584		file = ubd_dev->file;
 585	else
 586		file = backing_file;
 587
 588out:
 589	return os_file_size(file, size_out);
 590}
 591
 592static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 593{
 594	int err;
 595
 596	err = os_pread_file(fd, buf, len, offset);
 
 
 
 
 597	if (err < 0)
 598		return err;
 599
 600	return 0;
 601}
 602
 603static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 604{
 605	unsigned long modtime;
 606	unsigned long long actual;
 607	int err;
 608
 609	err = os_file_modtime(file, &modtime);
 610	if (err < 0) {
 611		printk(KERN_ERR "Failed to get modification time of backing "
 612		       "file \"%s\", err = %d\n", file, -err);
 613		return err;
 614	}
 615
 616	err = os_file_size(file, &actual);
 617	if (err < 0) {
 618		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 619		       "err = %d\n", file, -err);
 620		return err;
 621	}
 622
 623	if (actual != size) {
 624		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 625		 * the typecast.*/
 626		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 627		       "vs backing file\n", (unsigned long long) size, actual);
 628		return -EINVAL;
 629	}
 630	if (modtime != mtime) {
 631		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
 632		       "backing file\n", mtime, modtime);
 633		return -EINVAL;
 634	}
 635	return 0;
 636}
 637
 638static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 639{
 640	struct uml_stat buf1, buf2;
 641	int err;
 642
 643	if (from_cmdline == NULL)
 644		return 0;
 645	if (!strcmp(from_cmdline, from_cow))
 646		return 0;
 647
 648	err = os_stat_file(from_cmdline, &buf1);
 649	if (err < 0) {
 650		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 651		       -err);
 652		return 0;
 653	}
 654	err = os_stat_file(from_cow, &buf2);
 655	if (err < 0) {
 656		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 657		       -err);
 658		return 1;
 659	}
 660	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 661		return 0;
 662
 663	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 664	       "\"%s\" specified in COW header of \"%s\"\n",
 665	       from_cmdline, from_cow, cow);
 666	return 1;
 667}
 668
 669static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 670		  char **backing_file_out, int *bitmap_offset_out,
 671		  unsigned long *bitmap_len_out, int *data_offset_out,
 672		  int *create_cow_out)
 673{
 674	time_t mtime;
 675	unsigned long long size;
 676	__u32 version, align;
 677	char *backing_file;
 678	int fd, err, sectorsize, asked_switch, mode = 0644;
 679
 680	fd = os_open_file(file, *openflags, mode);
 681	if (fd < 0) {
 682		if ((fd == -ENOENT) && (create_cow_out != NULL))
 683			*create_cow_out = 1;
 684		if (!openflags->w ||
 685		    ((fd != -EROFS) && (fd != -EACCES)))
 686			return fd;
 687		openflags->w = 0;
 688		fd = os_open_file(file, *openflags, mode);
 689		if (fd < 0)
 690			return fd;
 691	}
 692
 693	if (shared)
 694		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 695	else {
 696		err = os_lock_file(fd, openflags->w);
 697		if (err < 0) {
 698			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 699			       file, -err);
 700			goto out_close;
 701		}
 702	}
 703
 704	/* Successful return case! */
 705	if (backing_file_out == NULL)
 706		return fd;
 707
 708	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 709			      &size, &sectorsize, &align, bitmap_offset_out);
 710	if (err && (*backing_file_out != NULL)) {
 711		printk(KERN_ERR "Failed to read COW header from COW file "
 712		       "\"%s\", errno = %d\n", file, -err);
 713		goto out_close;
 714	}
 715	if (err)
 716		return fd;
 717
 718	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 719					    file);
 720
 721	/* Allow switching only if no mismatch. */
 722	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 723						   mtime)) {
 724		printk(KERN_ERR "Switching backing file to '%s'\n",
 725		       *backing_file_out);
 726		err = write_cow_header(file, fd, *backing_file_out,
 727				       sectorsize, align, &size);
 728		if (err) {
 729			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 730			goto out_close;
 731		}
 732	} else {
 733		*backing_file_out = backing_file;
 734		err = backing_file_mismatch(*backing_file_out, size, mtime);
 735		if (err)
 736			goto out_close;
 737	}
 738
 739	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 740		  bitmap_len_out, data_offset_out);
 741
 742	return fd;
 743 out_close:
 744	os_close_file(fd);
 745	return err;
 746}
 747
 748static int create_cow_file(char *cow_file, char *backing_file,
 749		    struct openflags flags,
 750		    int sectorsize, int alignment, int *bitmap_offset_out,
 751		    unsigned long *bitmap_len_out, int *data_offset_out)
 752{
 753	int err, fd;
 754
 755	flags.c = 1;
 756	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 757	if (fd < 0) {
 758		err = fd;
 759		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 760		       cow_file, -err);
 761		goto out;
 762	}
 763
 764	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 765			    bitmap_offset_out, bitmap_len_out,
 766			    data_offset_out);
 767	if (!err)
 768		return fd;
 769	os_close_file(fd);
 770 out:
 771	return err;
 772}
 773
 774static void ubd_close_dev(struct ubd *ubd_dev)
 775{
 776	os_close_file(ubd_dev->fd);
 777	if(ubd_dev->cow.file == NULL)
 778		return;
 779
 780	os_close_file(ubd_dev->cow.fd);
 781	vfree(ubd_dev->cow.bitmap);
 782	ubd_dev->cow.bitmap = NULL;
 783}
 784
 785static int ubd_open_dev(struct ubd *ubd_dev)
 786{
 787	struct openflags flags;
 788	char **back_ptr;
 789	int err, create_cow, *create_ptr;
 790	int fd;
 791
 792	ubd_dev->openflags = ubd_dev->boot_openflags;
 793	create_cow = 0;
 794	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 795	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 796
 797	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 798				back_ptr, &ubd_dev->cow.bitmap_offset,
 799				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 800				create_ptr);
 801
 802	if((fd == -ENOENT) && create_cow){
 803		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 804					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 805					  &ubd_dev->cow.bitmap_offset,
 806					  &ubd_dev->cow.bitmap_len,
 807					  &ubd_dev->cow.data_offset);
 808		if(fd >= 0){
 809			printk(KERN_INFO "Creating \"%s\" as COW file for "
 810			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 811		}
 812	}
 813
 814	if(fd < 0){
 815		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 816		       -fd);
 817		return fd;
 818	}
 819	ubd_dev->fd = fd;
 820
 821	if(ubd_dev->cow.file != NULL){
 822		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 823
 824		err = -ENOMEM;
 825		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 826		if(ubd_dev->cow.bitmap == NULL){
 827			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 828			goto error;
 829		}
 830		flush_tlb_kernel_vm();
 831
 832		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 833				      ubd_dev->cow.bitmap_offset,
 834				      ubd_dev->cow.bitmap_len);
 835		if(err < 0)
 836			goto error;
 837
 838		flags = ubd_dev->openflags;
 839		flags.w = 0;
 840		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 841				    NULL, NULL, NULL, NULL);
 842		if(err < 0) goto error;
 843		ubd_dev->cow.fd = err;
 844	}
 845	if (ubd_dev->no_trim == 0) {
 846		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
 847		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
 848		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 849		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 850		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
 851	}
 852	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 853	return 0;
 854 error:
 855	os_close_file(ubd_dev->fd);
 856	return err;
 857}
 858
 859static void ubd_device_release(struct device *dev)
 860{
 861	struct ubd *ubd_dev = dev_get_drvdata(dev);
 862
 863	blk_cleanup_queue(ubd_dev->queue);
 864	blk_mq_free_tag_set(&ubd_dev->tag_set);
 865	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 866}
 867
 868static int ubd_disk_register(int major, u64 size, int unit,
 869			     struct gendisk **disk_out)
 870{
 871	struct device *parent = NULL;
 872	struct gendisk *disk;
 873
 874	disk = alloc_disk(1 << UBD_SHIFT);
 875	if(disk == NULL)
 876		return -ENOMEM;
 877
 878	disk->major = major;
 879	disk->first_minor = unit << UBD_SHIFT;
 880	disk->fops = &ubd_blops;
 881	set_capacity(disk, size / 512);
 882	if (major == UBD_MAJOR)
 883		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 884	else
 885		sprintf(disk->disk_name, "ubd_fake%d", unit);
 886
 887	/* sysfs register (not for ide fake devices) */
 888	if (major == UBD_MAJOR) {
 889		ubd_devs[unit].pdev.id   = unit;
 890		ubd_devs[unit].pdev.name = DRIVER_NAME;
 891		ubd_devs[unit].pdev.dev.release = ubd_device_release;
 892		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 893		platform_device_register(&ubd_devs[unit].pdev);
 894		parent = &ubd_devs[unit].pdev.dev;
 895	}
 896
 897	disk->private_data = &ubd_devs[unit];
 898	disk->queue = ubd_devs[unit].queue;
 899	device_add_disk(parent, disk, NULL);
 900
 901	*disk_out = disk;
 902	return 0;
 903}
 904
 905#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 906
 907static const struct blk_mq_ops ubd_mq_ops = {
 908	.queue_rq = ubd_queue_rq,
 909};
 910
 911static int ubd_add(int n, char **error_out)
 912{
 913	struct ubd *ubd_dev = &ubd_devs[n];
 914	int err = 0;
 915
 916	if(ubd_dev->file == NULL)
 917		goto out;
 918
 919	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 920	if(err < 0){
 921		*error_out = "Couldn't determine size of device's file";
 922		goto out;
 923	}
 924
 925	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 926
 927	ubd_dev->tag_set.ops = &ubd_mq_ops;
 928	ubd_dev->tag_set.queue_depth = 64;
 929	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 930	ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 931	ubd_dev->tag_set.driver_data = ubd_dev;
 932	ubd_dev->tag_set.nr_hw_queues = 1;
 933
 934	err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 935	if (err)
 
 
 936		goto out;
 937
 938	ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
 939	if (IS_ERR(ubd_dev->queue)) {
 940		err = PTR_ERR(ubd_dev->queue);
 941		goto out_cleanup_tags;
 942	}
 943
 944	ubd_dev->queue->queuedata = ubd_dev;
 945	blk_queue_write_cache(ubd_dev->queue, true, false);
 946
 947	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 948	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 949	if(err){
 950		*error_out = "Failed to register device";
 951		goto out_cleanup_tags;
 952	}
 953
 954	if (fake_major != UBD_MAJOR)
 955		ubd_disk_register(fake_major, ubd_dev->size, n,
 956				  &fake_gendisk[n]);
 957
 958	/*
 959	 * Perhaps this should also be under the "if (fake_major)" above
 960	 * using the fake_disk->disk_name
 961	 */
 962	if (fake_ide)
 963		make_ide_entries(ubd_gendisk[n]->disk_name);
 964
 965	err = 0;
 966out:
 967	return err;
 968
 969out_cleanup_tags:
 970	blk_mq_free_tag_set(&ubd_dev->tag_set);
 971	if (!(IS_ERR(ubd_dev->queue)))
 972		blk_cleanup_queue(ubd_dev->queue);
 973	goto out;
 974}
 975
 976static int ubd_config(char *str, char **error_out)
 977{
 978	int n, ret;
 979
 980	/* This string is possibly broken up and stored, so it's only
 981	 * freed if ubd_setup_common fails, or if only general options
 982	 * were set.
 983	 */
 984	str = kstrdup(str, GFP_KERNEL);
 985	if (str == NULL) {
 986		*error_out = "Failed to allocate memory";
 987		return -ENOMEM;
 988	}
 989
 990	ret = ubd_setup_common(str, &n, error_out);
 991	if (ret)
 992		goto err_free;
 993
 994	if (n == -1) {
 995		ret = 0;
 996		goto err_free;
 997	}
 998
 999	mutex_lock(&ubd_lock);
1000	ret = ubd_add(n, error_out);
1001	if (ret)
1002		ubd_devs[n].file = NULL;
1003	mutex_unlock(&ubd_lock);
1004
1005out:
1006	return ret;
1007
1008err_free:
1009	kfree(str);
1010	goto out;
1011}
1012
1013static int ubd_get_config(char *name, char *str, int size, char **error_out)
1014{
1015	struct ubd *ubd_dev;
1016	int n, len = 0;
1017
1018	n = parse_unit(&name);
1019	if((n >= MAX_DEV) || (n < 0)){
1020		*error_out = "ubd_get_config : device number out of range";
1021		return -1;
1022	}
1023
1024	ubd_dev = &ubd_devs[n];
1025	mutex_lock(&ubd_lock);
1026
1027	if(ubd_dev->file == NULL){
1028		CONFIG_CHUNK(str, size, len, "", 1);
1029		goto out;
1030	}
1031
1032	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1033
1034	if(ubd_dev->cow.file != NULL){
1035		CONFIG_CHUNK(str, size, len, ",", 0);
1036		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1037	}
1038	else CONFIG_CHUNK(str, size, len, "", 1);
1039
1040 out:
1041	mutex_unlock(&ubd_lock);
1042	return len;
1043}
1044
1045static int ubd_id(char **str, int *start_out, int *end_out)
1046{
1047	int n;
1048
1049	n = parse_unit(str);
1050	*start_out = 0;
1051	*end_out = MAX_DEV - 1;
1052	return n;
1053}
1054
1055static int ubd_remove(int n, char **error_out)
1056{
1057	struct gendisk *disk = ubd_gendisk[n];
1058	struct ubd *ubd_dev;
1059	int err = -ENODEV;
1060
1061	mutex_lock(&ubd_lock);
1062
1063	ubd_dev = &ubd_devs[n];
1064
1065	if(ubd_dev->file == NULL)
1066		goto out;
1067
1068	/* you cannot remove a open disk */
1069	err = -EBUSY;
1070	if(ubd_dev->count > 0)
1071		goto out;
1072
1073	ubd_gendisk[n] = NULL;
1074	if(disk != NULL){
1075		del_gendisk(disk);
1076		put_disk(disk);
1077	}
1078
1079	if(fake_gendisk[n] != NULL){
1080		del_gendisk(fake_gendisk[n]);
1081		put_disk(fake_gendisk[n]);
1082		fake_gendisk[n] = NULL;
1083	}
1084
1085	err = 0;
1086	platform_device_unregister(&ubd_dev->pdev);
1087out:
1088	mutex_unlock(&ubd_lock);
1089	return err;
1090}
1091
1092/* All these are called by mconsole in process context and without
1093 * ubd-specific locks.  The structure itself is const except for .list.
1094 */
1095static struct mc_device ubd_mc = {
1096	.list		= LIST_HEAD_INIT(ubd_mc.list),
1097	.name		= "ubd",
1098	.config		= ubd_config,
1099	.get_config	= ubd_get_config,
1100	.id		= ubd_id,
1101	.remove		= ubd_remove,
1102};
1103
1104static int __init ubd_mc_init(void)
1105{
1106	mconsole_register_dev(&ubd_mc);
1107	return 0;
1108}
1109
1110__initcall(ubd_mc_init);
1111
1112static int __init ubd0_init(void)
1113{
1114	struct ubd *ubd_dev = &ubd_devs[0];
1115
1116	mutex_lock(&ubd_lock);
1117	if(ubd_dev->file == NULL)
1118		ubd_dev->file = "root_fs";
1119	mutex_unlock(&ubd_lock);
1120
1121	return 0;
1122}
1123
1124__initcall(ubd0_init);
1125
1126/* Used in ubd_init, which is an initcall */
1127static struct platform_driver ubd_driver = {
1128	.driver = {
1129		.name  = DRIVER_NAME,
1130	},
1131};
1132
1133static int __init ubd_init(void)
1134{
1135	char *error;
1136	int i, err;
1137
1138	if (register_blkdev(UBD_MAJOR, "ubd"))
1139		return -1;
1140
1141	if (fake_major != UBD_MAJOR) {
1142		char name[sizeof("ubd_nnn\0")];
1143
1144		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1145		if (register_blkdev(fake_major, "ubd"))
1146			return -1;
1147	}
1148
1149	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1150				       sizeof(struct io_thread_req *),
1151				       GFP_KERNEL
1152		);
1153	irq_remainder = 0;
1154
1155	if (irq_req_buffer == NULL) {
1156		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1157		return -1;
1158	}
1159	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1160				      sizeof(struct io_thread_req *),
1161				      GFP_KERNEL
1162		);
1163
1164	io_remainder = 0;
1165
1166	if (io_req_buffer == NULL) {
1167		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1168		return -1;
1169	}
1170	platform_driver_register(&ubd_driver);
1171	mutex_lock(&ubd_lock);
1172	for (i = 0; i < MAX_DEV; i++){
1173		err = ubd_add(i, &error);
1174		if(err)
1175			printk(KERN_ERR "Failed to initialize ubd device %d :"
1176			       "%s\n", i, error);
1177	}
1178	mutex_unlock(&ubd_lock);
1179	return 0;
1180}
1181
1182late_initcall(ubd_init);
1183
1184static int __init ubd_driver_init(void){
1185	unsigned long stack;
1186	int err;
1187
1188	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1189	if(global_openflags.s){
1190		printk(KERN_INFO "ubd: Synchronous mode\n");
1191		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1192		 * enough. So use anyway the io thread. */
1193	}
1194	stack = alloc_stack(0, 0);
1195	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1196				 &thread_fd);
1197	if(io_pid < 0){
1198		printk(KERN_ERR
1199		       "ubd : Failed to start I/O thread (errno = %d) - "
1200		       "falling back to synchronous I/O\n", -io_pid);
1201		io_pid = -1;
1202		return 0;
1203	}
1204	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1205			     0, "ubd", ubd_devs);
1206	if(err != 0)
1207		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1208	return 0;
1209}
1210
1211device_initcall(ubd_driver_init);
1212
1213static int ubd_open(struct block_device *bdev, fmode_t mode)
1214{
1215	struct gendisk *disk = bdev->bd_disk;
1216	struct ubd *ubd_dev = disk->private_data;
1217	int err = 0;
1218
1219	mutex_lock(&ubd_mutex);
1220	if(ubd_dev->count == 0){
1221		err = ubd_open_dev(ubd_dev);
1222		if(err){
1223			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1224			       disk->disk_name, ubd_dev->file, -err);
1225			goto out;
1226		}
1227	}
1228	ubd_dev->count++;
1229	set_disk_ro(disk, !ubd_dev->openflags.w);
1230
1231	/* This should no more be needed. And it didn't work anyway to exclude
1232	 * read-write remounting of filesystems.*/
1233	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1234	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1235	        err = -EROFS;
1236	}*/
1237out:
1238	mutex_unlock(&ubd_mutex);
1239	return err;
1240}
1241
1242static void ubd_release(struct gendisk *disk, fmode_t mode)
1243{
1244	struct ubd *ubd_dev = disk->private_data;
1245
1246	mutex_lock(&ubd_mutex);
1247	if(--ubd_dev->count == 0)
1248		ubd_close_dev(ubd_dev);
1249	mutex_unlock(&ubd_mutex);
 
1250}
1251
1252static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1253			  __u64 *cow_offset, unsigned long *bitmap,
1254			  __u64 bitmap_offset, unsigned long *bitmap_words,
1255			  __u64 bitmap_len)
1256{
1257	__u64 sector = io_offset >> SECTOR_SHIFT;
1258	int i, update_bitmap = 0;
1259
1260	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1261		if(cow_mask != NULL)
1262			ubd_set_bit(i, (unsigned char *) cow_mask);
1263		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1264			continue;
1265
1266		update_bitmap = 1;
1267		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1268	}
1269
1270	if(!update_bitmap)
1271		return;
1272
1273	*cow_offset = sector / (sizeof(unsigned long) * 8);
1274
1275	/* This takes care of the case where we're exactly at the end of the
1276	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1277	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1278	 * for the original diagnosis.
1279	 */
1280	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1281					 sizeof(unsigned long)) - 1))
1282		(*cow_offset)--;
1283
1284	bitmap_words[0] = bitmap[*cow_offset];
1285	bitmap_words[1] = bitmap[*cow_offset + 1];
1286
1287	*cow_offset *= sizeof(unsigned long);
1288	*cow_offset += bitmap_offset;
1289}
1290
1291static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1292		       __u64 bitmap_offset, __u64 bitmap_len)
1293{
1294	__u64 sector = req->offset >> SECTOR_SHIFT;
1295	int i;
1296
1297	if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
1298		panic("Operation too long");
1299
1300	if (req_op(req->req) == REQ_OP_READ) {
1301		for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
1302			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1303				ubd_set_bit(i, (unsigned char *)
1304					    &req->sector_mask);
1305		}
1306	}
1307	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1308			   &req->cow_offset, bitmap, bitmap_offset,
1309			   req->bitmap_words, bitmap_len);
1310}
1311
1312static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
1313		u64 off, struct bio_vec *bvec)
 
 
1314{
1315	struct ubd *dev = hctx->queue->queuedata;
1316	struct io_thread_req *io_req;
1317	int ret;
1318
1319	io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
1320	if (!io_req)
1321		return -ENOMEM;
1322
1323	io_req->req = req;
1324	if (dev->cow.file)
1325		io_req->fds[0] = dev->cow.fd;
1326	else
1327		io_req->fds[0] = dev->fd;
 
 
1328	io_req->error = 0;
 
1329
1330	if (bvec != NULL) {
1331		io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
1332		io_req->length = bvec->bv_len;
1333	} else {
1334		io_req->buffer = NULL;
1335		io_req->length = blk_rq_bytes(req);
1336	}
1337
1338	io_req->sectorsize = SECTOR_SIZE;
1339	io_req->fds[1] = dev->fd;
1340	io_req->cow_offset = -1;
1341	io_req->offset = off;
1342	io_req->sector_mask = 0;
1343	io_req->offsets[0] = 0;
1344	io_req->offsets[1] = dev->cow.data_offset;
1345
1346	if (dev->cow.file)
1347		cowify_req(io_req, dev->cow.bitmap,
1348			   dev->cow.bitmap_offset, dev->cow.bitmap_len);
1349
1350	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1351	if (ret != sizeof(io_req)) {
1352		if (ret != -EAGAIN)
1353			pr_err("write to io thread failed: %d\n", -ret);
1354		kfree(io_req);
1355	}
1356	return ret;
1357}
1358
1359static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
 
1360{
1361	struct req_iterator iter;
1362	struct bio_vec bvec;
1363	int ret;
1364	u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
1365
1366	rq_for_each_segment(bvec, req, iter) {
1367		ret = ubd_queue_one_vec(hctx, req, off, &bvec);
1368		if (ret < 0)
1369			return ret;
1370		off += bvec.bv_len;
1371	}
1372	return 0;
1373}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1374
1375static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1376				 const struct blk_mq_queue_data *bd)
1377{
1378	struct ubd *ubd_dev = hctx->queue->queuedata;
1379	struct request *req = bd->rq;
1380	int ret = 0, res = BLK_STS_OK;
1381
1382	blk_mq_start_request(req);
1383
1384	spin_lock_irq(&ubd_dev->lock);
1385
1386	switch (req_op(req)) {
1387	/* operations with no lentgth/offset arguments */
1388	case REQ_OP_FLUSH:
1389		ret = ubd_queue_one_vec(hctx, req, 0, NULL);
1390		break;
1391	case REQ_OP_READ:
1392	case REQ_OP_WRITE:
1393		ret = queue_rw_req(hctx, req);
1394		break;
1395	case REQ_OP_DISCARD:
1396	case REQ_OP_WRITE_ZEROES:
1397		ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
1398		break;
1399	default:
1400		WARN_ON_ONCE(1);
1401		res = BLK_STS_NOTSUPP;
1402	}
1403
1404	spin_unlock_irq(&ubd_dev->lock);
1405
1406	if (ret < 0) {
1407		if (ret == -ENOMEM)
1408			res = BLK_STS_RESOURCE;
1409		else
1410			res = BLK_STS_DEV_RESOURCE;
1411	}
1412
1413	return res;
1414}
1415
1416static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1417{
1418	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1419
1420	geo->heads = 128;
1421	geo->sectors = 32;
1422	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1423	return 0;
1424}
1425
1426static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1427		     unsigned int cmd, unsigned long arg)
1428{
1429	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1430	u16 ubd_id[ATA_ID_WORDS];
1431
1432	switch (cmd) {
1433		struct cdrom_volctrl volume;
1434	case HDIO_GET_IDENTITY:
1435		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1436		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1437		ubd_id[ATA_ID_HEADS]	= 128;
1438		ubd_id[ATA_ID_SECTORS]	= 32;
1439		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1440				 sizeof(ubd_id)))
1441			return -EFAULT;
1442		return 0;
1443
1444	case CDROMVOLREAD:
1445		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1446			return -EFAULT;
1447		volume.channel0 = 255;
1448		volume.channel1 = 255;
1449		volume.channel2 = 255;
1450		volume.channel3 = 255;
1451		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1452			return -EFAULT;
1453		return 0;
1454	}
1455	return -EINVAL;
1456}
1457
1458static int map_error(int error_code)
1459{
1460	switch (error_code) {
1461	case 0:
1462		return BLK_STS_OK;
1463	case ENOSYS:
1464	case EOPNOTSUPP:
1465		return BLK_STS_NOTSUPP;
1466	case ENOSPC:
1467		return BLK_STS_NOSPC;
1468	}
1469	return BLK_STS_IOERR;
1470}
1471
1472/*
1473 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1474 *
1475 * The following functions are part of UML hypervisor code.
1476 * All functions from here onwards are executed as a helper
1477 * thread and are not allowed to execute any kernel functions.
1478 *
1479 * Any communication must occur strictly via shared memory and IPC.
1480 *
1481 * Do not add printks, locks, kernel memory operations, etc - it
1482 * will result in unpredictable behaviour and/or crashes.
1483 */
1484
1485static int update_bitmap(struct io_thread_req *req)
1486{
1487	int n;
1488
1489	if(req->cow_offset == -1)
1490		return map_error(0);
1491
1492	n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1493			  sizeof(req->bitmap_words), req->cow_offset);
1494	if (n != sizeof(req->bitmap_words))
1495		return map_error(-n);
 
 
 
 
 
 
 
 
 
1496
1497	return map_error(0);
1498}
1499
1500static void do_io(struct io_thread_req *req)
1501{
1502	char *buf = NULL;
1503	unsigned long len;
1504	int n, nsectors, start, end, bit;
 
1505	__u64 off;
1506
1507	/* FLUSH is really a special case, we cannot "case" it with others */
1508
1509	if (req_op(req->req) == REQ_OP_FLUSH) {
1510		/* fds[0] is always either the rw image or our cow file */
1511		req->error = map_error(-os_sync_file(req->fds[0]));
1512		return;
1513	}
1514
1515	nsectors = req->length / req->sectorsize;
1516	start = 0;
1517	do {
1518		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1519		end = start;
1520		while((end < nsectors) &&
1521		      (ubd_test_bit(end, (unsigned char *)
1522				    &req->sector_mask) == bit))
1523			end++;
1524
1525		off = req->offset + req->offsets[bit] +
1526			start * req->sectorsize;
1527		len = (end - start) * req->sectorsize;
1528		if (req->buffer != NULL)
1529			buf = &req->buffer[start * req->sectorsize];
1530
1531		switch (req_op(req->req)) {
1532		case REQ_OP_READ:
 
 
 
 
 
1533			n = 0;
1534			do {
1535				buf = &buf[n];
1536				len -= n;
1537				n = os_pread_file(req->fds[bit], buf, len, off);
1538				if (n < 0) {
1539					req->error = map_error(-n);
 
 
1540					return;
1541				}
1542			} while((n < len) && (n != 0));
1543			if (n < len) memset(&buf[n], 0, len - n);
1544			break;
1545		case REQ_OP_WRITE:
1546			n = os_pwrite_file(req->fds[bit], buf, len, off);
1547			if(n != len){
1548				req->error = map_error(-n);
 
 
1549				return;
1550			}
1551			break;
1552		case REQ_OP_DISCARD:
1553		case REQ_OP_WRITE_ZEROES:
1554			n = os_falloc_punch(req->fds[bit], off, len);
1555			if (n) {
1556				req->error = map_error(-n);
1557				return;
1558			}
1559			break;
1560		default:
1561			WARN_ON_ONCE(1);
1562			req->error = BLK_STS_NOTSUPP;
1563			return;
1564		}
1565
1566		start = end;
1567	} while(start < nsectors);
1568
1569	req->error = update_bitmap(req);
1570}
1571
1572/* Changed in start_io_thread, which is serialized by being called only
1573 * from ubd_init, which is an initcall.
1574 */
1575int kernel_fd = -1;
1576
1577/* Only changed by the io thread. XXX: currently unused. */
1578static int io_count = 0;
1579
1580int io_thread(void *arg)
1581{
1582	int n, count, written, res;
1583
1584	os_fix_helper_signals();
1585
 
1586	while(1){
1587		n = bulk_req_safe_read(
1588			kernel_fd,
1589			io_req_buffer,
1590			&io_remainder,
1591			&io_remainder_size,
1592			UBD_REQ_BUFFER_SIZE
1593		);
1594		if (n < 0) {
1595			if (n == -EAGAIN) {
1596				ubd_read_poll(-1);
1597				continue;
1598			}
 
1599		}
1600
1601		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1602			io_count++;
1603			do_io((*io_req_buffer)[count]);
1604		}
1605
1606		written = 0;
1607
1608		do {
1609			res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
1610			if (res >= 0) {
1611				written += res;
1612			}
1613			if (written < n) {
1614				ubd_write_poll(-1);
1615			}
1616		} while (written < n);
1617	}
1618
1619	return 0;
1620}