Linux Audio

Check our new training course

Loading...
v3.5.6
 
   1/*
 
 
   2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   3 * Licensed under the GPL
   4 */
   5
   6/* 2001-09-28...2002-04-17
   7 * Partition stuff by James_McMechan@hotmail.com
   8 * old style ubd by setting UBD_SHIFT to 0
   9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  10 * partitions have changed in 2.5
  11 * 2003-01-29 more tinkering for 2.5.59-1
  12 * This should now address the sysfs problems and has
  13 * the symlink for devfs to allow for booting with
  14 * the common /dev/ubd/discX/... names rather than
  15 * only /dev/ubdN/discN this version also has lots of
  16 * clean ups preparing for ubd-many.
  17 * James McMechan
  18 */
  19
  20#define UBD_SHIFT 4
  21
  22#include <linux/module.h>
  23#include <linux/init.h>
  24#include <linux/blkdev.h>
 
  25#include <linux/ata.h>
  26#include <linux/hdreg.h>
 
  27#include <linux/cdrom.h>
  28#include <linux/proc_fs.h>
  29#include <linux/seq_file.h>
  30#include <linux/ctype.h>
  31#include <linux/slab.h>
  32#include <linux/vmalloc.h>
  33#include <linux/platform_device.h>
  34#include <linux/scatterlist.h>
  35#include <asm/tlbflush.h>
  36#include "kern_util.h"
  37#include "mconsole_kern.h"
  38#include "init.h"
  39#include "irq_kern.h"
  40#include "ubd.h"
  41#include "os.h"
  42#include "cow.h"
  43
  44enum ubd_req { UBD_READ, UBD_WRITE };
 
 
 
 
 
 
 
 
 
  45
  46struct io_thread_req {
  47	struct request *req;
  48	enum ubd_req op;
  49	int fds[2];
  50	unsigned long offsets[2];
  51	unsigned long long offset;
  52	unsigned long length;
  53	char *buffer;
  54	int sectorsize;
  55	unsigned long sector_mask;
  56	unsigned long long cow_offset;
  57	unsigned long bitmap_words[2];
  58	int error;
 
 
 
 
  59};
  60
 
 
 
 
 
 
 
 
 
 
 
  61static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  62{
  63	__u64 n;
  64	int bits, off;
  65
  66	bits = sizeof(data[0]) * 8;
  67	n = bit / bits;
  68	off = bit % bits;
  69	return (data[n] & (1 << off)) != 0;
  70}
  71
  72static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  73{
  74	__u64 n;
  75	int bits, off;
  76
  77	bits = sizeof(data[0]) * 8;
  78	n = bit / bits;
  79	off = bit % bits;
  80	data[n] |= (1 << off);
  81}
  82/*End stuff from ubd_user.h*/
  83
  84#define DRIVER_NAME "uml-blkdev"
  85
  86static DEFINE_MUTEX(ubd_lock);
  87static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
  88
  89static int ubd_open(struct block_device *bdev, fmode_t mode);
  90static int ubd_release(struct gendisk *disk, fmode_t mode);
  91static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
  92		     unsigned int cmd, unsigned long arg);
  93static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
  94
  95#define MAX_DEV (16)
  96
  97static const struct block_device_operations ubd_blops = {
  98        .owner		= THIS_MODULE,
  99        .open		= ubd_open,
 100        .release	= ubd_release,
 101        .ioctl		= ubd_ioctl,
 
 102	.getgeo		= ubd_getgeo,
 103};
 104
 105/* Protected by ubd_lock */
 106static int fake_major = UBD_MAJOR;
 107static struct gendisk *ubd_gendisk[MAX_DEV];
 108static struct gendisk *fake_gendisk[MAX_DEV];
 109
 110#ifdef CONFIG_BLK_DEV_UBD_SYNC
 111#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 112					 .cl = 1 })
 113#else
 114#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 115					 .cl = 1 })
 116#endif
 117static struct openflags global_openflags = OPEN_FLAGS;
 118
 119struct cow {
 120	/* backing file name */
 121	char *file;
 122	/* backing file fd */
 123	int fd;
 124	unsigned long *bitmap;
 125	unsigned long bitmap_len;
 126	int bitmap_offset;
 127	int data_offset;
 128};
 129
 130#define MAX_SG 64
 131
 132struct ubd {
 133	struct list_head restart;
 134	/* name (and fd, below) of the file opened for writing, either the
 135	 * backing or the cow file. */
 136	char *file;
 
 137	int count;
 138	int fd;
 139	__u64 size;
 140	struct openflags boot_openflags;
 141	struct openflags openflags;
 142	unsigned shared:1;
 143	unsigned no_cow:1;
 
 144	struct cow cow;
 145	struct platform_device pdev;
 146	struct request_queue *queue;
 
 147	spinlock_t lock;
 148	struct scatterlist sg[MAX_SG];
 149	struct request *request;
 150	int start_sg, end_sg;
 151	sector_t rq_pos;
 152};
 153
 154#define DEFAULT_COW { \
 155	.file =			NULL, \
 156	.fd =			-1,	\
 157	.bitmap =		NULL, \
 158	.bitmap_offset =	0, \
 159	.data_offset =		0, \
 160}
 161
 162#define DEFAULT_UBD { \
 163	.file = 		NULL, \
 
 164	.count =		0, \
 165	.fd =			-1, \
 166	.size =			-1, \
 167	.boot_openflags =	OPEN_FLAGS, \
 168	.openflags =		OPEN_FLAGS, \
 169	.no_cow =               0, \
 
 170	.shared =		0, \
 171	.cow =			DEFAULT_COW, \
 172	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 173	.request =		NULL, \
 174	.start_sg =		0, \
 175	.end_sg =		0, \
 176	.rq_pos =		0, \
 177}
 178
 179/* Protected by ubd_lock */
 180static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 181
 182/* Only changed by fake_ide_setup which is a setup */
 183static int fake_ide = 0;
 184static struct proc_dir_entry *proc_ide_root = NULL;
 185static struct proc_dir_entry *proc_ide = NULL;
 186
 187static void make_proc_ide(void)
 188{
 189	proc_ide_root = proc_mkdir("ide", NULL);
 190	proc_ide = proc_mkdir("ide0", proc_ide_root);
 191}
 192
 193static int fake_ide_media_proc_show(struct seq_file *m, void *v)
 194{
 195	seq_puts(m, "disk\n");
 196	return 0;
 197}
 198
 199static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
 200{
 201	return single_open(file, fake_ide_media_proc_show, NULL);
 202}
 203
 204static const struct file_operations fake_ide_media_proc_fops = {
 205	.owner		= THIS_MODULE,
 206	.open		= fake_ide_media_proc_open,
 207	.read		= seq_read,
 208	.llseek		= seq_lseek,
 209	.release	= single_release,
 210};
 211
 212static void make_ide_entries(const char *dev_name)
 213{
 214	struct proc_dir_entry *dir, *ent;
 215	char name[64];
 216
 217	if(proc_ide_root == NULL) make_proc_ide();
 218
 219	dir = proc_mkdir(dev_name, proc_ide);
 220	if(!dir) return;
 221
 222	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
 223	if(!ent) return;
 224	snprintf(name, sizeof(name), "ide0/%s", dev_name);
 225	proc_symlink(dev_name, proc_ide_root, name);
 226}
 227
 228static int fake_ide_setup(char *str)
 229{
 230	fake_ide = 1;
 231	return 1;
 232}
 233
 234__setup("fake_ide", fake_ide_setup);
 235
 236__uml_help(fake_ide_setup,
 237"fake_ide\n"
 238"    Create ide0 entries that map onto ubd devices.\n\n"
 239);
 240
 241static int parse_unit(char **ptr)
 242{
 243	char *str = *ptr, *end;
 244	int n = -1;
 245
 246	if(isdigit(*str)) {
 247		n = simple_strtoul(str, &end, 0);
 248		if(end == str)
 249			return -1;
 250		*ptr = end;
 251	}
 252	else if (('a' <= *str) && (*str <= 'z')) {
 253		n = *str - 'a';
 254		str++;
 255		*ptr = str;
 256	}
 257	return n;
 258}
 259
 260/* If *index_out == -1 at exit, the passed option was a general one;
 261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 262 * should not be freed on exit.
 263 */
 264static int ubd_setup_common(char *str, int *index_out, char **error_out)
 265{
 266	struct ubd *ubd_dev;
 267	struct openflags flags = global_openflags;
 268	char *backing_file;
 269	int n, err = 0, i;
 270
 271	if(index_out) *index_out = -1;
 272	n = *str;
 273	if(n == '='){
 274		char *end;
 275		int major;
 276
 277		str++;
 278		if(!strcmp(str, "sync")){
 279			global_openflags = of_sync(global_openflags);
 280			goto out1;
 281		}
 282
 283		err = -EINVAL;
 284		major = simple_strtoul(str, &end, 0);
 285		if((*end != '\0') || (end == str)){
 286			*error_out = "Didn't parse major number";
 287			goto out1;
 288		}
 289
 290		mutex_lock(&ubd_lock);
 291		if (fake_major != UBD_MAJOR) {
 292			*error_out = "Can't assign a fake major twice";
 293			goto out1;
 294		}
 295
 296		fake_major = major;
 297
 298		printk(KERN_INFO "Setting extra ubd major number to %d\n",
 299		       major);
 300		err = 0;
 301	out1:
 302		mutex_unlock(&ubd_lock);
 303		return err;
 304	}
 305
 306	n = parse_unit(&str);
 307	if(n < 0){
 308		*error_out = "Couldn't parse device number";
 309		return -EINVAL;
 310	}
 311	if(n >= MAX_DEV){
 312		*error_out = "Device number out of range";
 313		return 1;
 314	}
 315
 316	err = -EBUSY;
 317	mutex_lock(&ubd_lock);
 318
 319	ubd_dev = &ubd_devs[n];
 320	if(ubd_dev->file != NULL){
 321		*error_out = "Device is already configured";
 322		goto out;
 323	}
 324
 325	if (index_out)
 326		*index_out = n;
 327
 328	err = -EINVAL;
 329	for (i = 0; i < sizeof("rscd="); i++) {
 330		switch (*str) {
 331		case 'r':
 332			flags.w = 0;
 333			break;
 334		case 's':
 335			flags.s = 1;
 336			break;
 337		case 'd':
 338			ubd_dev->no_cow = 1;
 339			break;
 340		case 'c':
 341			ubd_dev->shared = 1;
 342			break;
 
 
 
 343		case '=':
 344			str++;
 345			goto break_loop;
 346		default:
 347			*error_out = "Expected '=' or flag letter "
 348				"(r, s, c, or d)";
 349			goto out;
 350		}
 351		str++;
 352	}
 353
 354	if (*str == '=')
 355		*error_out = "Too many flags specified";
 356	else
 357		*error_out = "Missing '='";
 358	goto out;
 359
 360break_loop:
 361	backing_file = strchr(str, ',');
 362
 363	if (backing_file == NULL)
 364		backing_file = strchr(str, ':');
 
 
 
 
 
 
 
 365
 366	if(backing_file != NULL){
 367		if(ubd_dev->no_cow){
 368			*error_out = "Can't specify both 'd' and a cow file";
 369			goto out;
 370		}
 371		else {
 372			*backing_file = '\0';
 373			backing_file++;
 374		}
 375	}
 
 376	err = 0;
 377	ubd_dev->file = str;
 378	ubd_dev->cow.file = backing_file;
 
 379	ubd_dev->boot_openflags = flags;
 380out:
 381	mutex_unlock(&ubd_lock);
 382	return err;
 383}
 384
 385static int ubd_setup(char *str)
 386{
 387	char *error;
 388	int err;
 389
 390	err = ubd_setup_common(str, NULL, &error);
 391	if(err)
 392		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 393		       "%s\n", str, error);
 394	return 1;
 395}
 396
 397__setup("ubd", ubd_setup);
 398__uml_help(ubd_setup,
 399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
 400"    This is used to associate a device with a file in the underlying\n"
 401"    filesystem. When specifying two filenames, the first one is the\n"
 402"    COW name and the second is the backing file name. As separator you can\n"
 403"    use either a ':' or a ',': the first one allows writing things like;\n"
 404"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 405"    while with a ',' the shell would not expand the 2nd '~'.\n"
 406"    When using only one filename, UML will detect whether to treat it like\n"
 407"    a COW file or a backing file. To override this detection, add the 'd'\n"
 408"    flag:\n"
 409"	ubd0d=BackingFile\n"
 410"    Usually, there is a filesystem in the file, but \n"
 411"    that's not required. Swap devices containing swap files can be\n"
 412"    specified like this. Also, a file which doesn't contain a\n"
 413"    filesystem can have its contents read in the virtual \n"
 414"    machine by running 'dd' on the device. <n> must be in the range\n"
 415"    0 to 7. Appending an 'r' to the number will cause that device\n"
 416"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 417"    an 's' will cause data to be written to disk on the host immediately.\n"
 418"    'c' will cause the device to be treated as being shared between multiple\n"
 419"    UMLs and file locking will be turned off - this is appropriate for a\n"
 420"    cluster filesystem and inappropriate at almost all other times.\n\n"
 
 
 
 
 
 
 
 421);
 422
 423static int udb_setup(char *str)
 424{
 425	printk("udb%s specified on command line is almost certainly a ubd -> "
 426	       "udb TYPO\n", str);
 427	return 1;
 428}
 429
 430__setup("udb", udb_setup);
 431__uml_help(udb_setup,
 432"udb\n"
 433"    This option is here solely to catch ubd -> udb typos, which can be\n"
 434"    to impossible to catch visually unless you specifically look for\n"
 435"    them.  The only result of any option starting with 'udb' is an error\n"
 436"    in the boot output.\n\n"
 437);
 438
 439static void do_ubd_request(struct request_queue * q);
 440
 441/* Only changed by ubd_init, which is an initcall. */
 442static int thread_fd = -1;
 443static LIST_HEAD(restart);
 444
 445/* XXX - move this inside ubd_intr. */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 446/* Called without dev->lock held, and only in interrupt context. */
 447static void ubd_handler(void)
 448{
 449	struct io_thread_req *req;
 450	struct ubd *ubd;
 451	struct list_head *list, *next_ele;
 452	unsigned long flags;
 453	int n;
 
 454
 455	while(1){
 456		n = os_read_file(thread_fd, &req,
 457				 sizeof(struct io_thread_req *));
 458		if(n != sizeof(req)){
 
 
 
 
 
 459			if(n == -EAGAIN)
 460				break;
 461			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 462			       "err = %d\n", -n);
 463			return;
 464		}
 
 
 465
 466		blk_end_request(req->req, 0, req->length);
 467		kfree(req);
 468	}
 469	reactivate_fd(thread_fd, UBD_IRQ);
 470
 471	list_for_each_safe(list, next_ele, &restart){
 472		ubd = container_of(list, struct ubd, restart);
 473		list_del_init(&ubd->restart);
 474		spin_lock_irqsave(&ubd->lock, flags);
 475		do_ubd_request(ubd->queue);
 476		spin_unlock_irqrestore(&ubd->lock, flags);
 477	}
 478}
 479
 480static irqreturn_t ubd_intr(int irq, void *dev)
 481{
 482	ubd_handler();
 483	return IRQ_HANDLED;
 484}
 485
 486/* Only changed by ubd_init, which is an initcall. */
 487static int io_pid = -1;
 488
 489static void kill_io_thread(void)
 490{
 491	if(io_pid != -1)
 492		os_kill_process(io_pid, 1);
 493}
 494
 495__uml_exitcall(kill_io_thread);
 496
 497static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 498{
 499	char *file;
 500	int fd;
 501	int err;
 502
 503	__u32 version;
 504	__u32 align;
 505	char *backing_file;
 506	time_t mtime;
 507	unsigned long long size;
 508	int sector_size;
 509	int bitmap_offset;
 510
 511	if (ubd_dev->file && ubd_dev->cow.file) {
 512		file = ubd_dev->cow.file;
 513
 514		goto out;
 515	}
 516
 517	fd = os_open_file(ubd_dev->file, global_openflags, 0);
 518	if (fd < 0)
 519		return fd;
 520
 521	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 522		&mtime, &size, &sector_size, &align, &bitmap_offset);
 523	os_close_file(fd);
 524
 525	if(err == -EINVAL)
 526		file = ubd_dev->file;
 527	else
 528		file = backing_file;
 529
 530out:
 531	return os_file_size(file, size_out);
 532}
 533
 534static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 535{
 536	int err;
 537
 538	err = os_seek_file(fd, offset);
 539	if (err < 0)
 540		return err;
 541
 542	err = os_read_file(fd, buf, len);
 543	if (err < 0)
 544		return err;
 545
 546	return 0;
 547}
 548
 549static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 550{
 551	unsigned long modtime;
 552	unsigned long long actual;
 553	int err;
 554
 555	err = os_file_modtime(file, &modtime);
 556	if (err < 0) {
 557		printk(KERN_ERR "Failed to get modification time of backing "
 558		       "file \"%s\", err = %d\n", file, -err);
 559		return err;
 560	}
 561
 562	err = os_file_size(file, &actual);
 563	if (err < 0) {
 564		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 565		       "err = %d\n", file, -err);
 566		return err;
 567	}
 568
 569	if (actual != size) {
 570		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 571		 * the typecast.*/
 572		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 573		       "vs backing file\n", (unsigned long long) size, actual);
 574		return -EINVAL;
 575	}
 576	if (modtime != mtime) {
 577		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
 578		       "backing file\n", mtime, modtime);
 579		return -EINVAL;
 580	}
 581	return 0;
 582}
 583
 584static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 585{
 586	struct uml_stat buf1, buf2;
 587	int err;
 588
 589	if (from_cmdline == NULL)
 590		return 0;
 591	if (!strcmp(from_cmdline, from_cow))
 592		return 0;
 593
 594	err = os_stat_file(from_cmdline, &buf1);
 595	if (err < 0) {
 596		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 597		       -err);
 598		return 0;
 599	}
 600	err = os_stat_file(from_cow, &buf2);
 601	if (err < 0) {
 602		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 603		       -err);
 604		return 1;
 605	}
 606	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 607		return 0;
 608
 609	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 610	       "\"%s\" specified in COW header of \"%s\"\n",
 611	       from_cmdline, from_cow, cow);
 612	return 1;
 613}
 614
 615static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 616		  char **backing_file_out, int *bitmap_offset_out,
 617		  unsigned long *bitmap_len_out, int *data_offset_out,
 618		  int *create_cow_out)
 619{
 620	time_t mtime;
 621	unsigned long long size;
 622	__u32 version, align;
 623	char *backing_file;
 624	int fd, err, sectorsize, asked_switch, mode = 0644;
 625
 626	fd = os_open_file(file, *openflags, mode);
 627	if (fd < 0) {
 628		if ((fd == -ENOENT) && (create_cow_out != NULL))
 629			*create_cow_out = 1;
 630		if (!openflags->w ||
 631		    ((fd != -EROFS) && (fd != -EACCES)))
 632			return fd;
 633		openflags->w = 0;
 634		fd = os_open_file(file, *openflags, mode);
 635		if (fd < 0)
 636			return fd;
 637	}
 638
 639	if (shared)
 640		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 641	else {
 642		err = os_lock_file(fd, openflags->w);
 643		if (err < 0) {
 644			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 645			       file, -err);
 646			goto out_close;
 647		}
 648	}
 649
 650	/* Successful return case! */
 651	if (backing_file_out == NULL)
 652		return fd;
 653
 654	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 655			      &size, &sectorsize, &align, bitmap_offset_out);
 656	if (err && (*backing_file_out != NULL)) {
 657		printk(KERN_ERR "Failed to read COW header from COW file "
 658		       "\"%s\", errno = %d\n", file, -err);
 659		goto out_close;
 660	}
 661	if (err)
 662		return fd;
 663
 664	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 665					    file);
 666
 667	/* Allow switching only if no mismatch. */
 668	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 669						   mtime)) {
 670		printk(KERN_ERR "Switching backing file to '%s'\n",
 671		       *backing_file_out);
 672		err = write_cow_header(file, fd, *backing_file_out,
 673				       sectorsize, align, &size);
 674		if (err) {
 675			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 676			goto out_close;
 677		}
 678	} else {
 679		*backing_file_out = backing_file;
 680		err = backing_file_mismatch(*backing_file_out, size, mtime);
 681		if (err)
 682			goto out_close;
 683	}
 684
 685	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 686		  bitmap_len_out, data_offset_out);
 687
 688	return fd;
 689 out_close:
 690	os_close_file(fd);
 691	return err;
 692}
 693
 694static int create_cow_file(char *cow_file, char *backing_file,
 695		    struct openflags flags,
 696		    int sectorsize, int alignment, int *bitmap_offset_out,
 697		    unsigned long *bitmap_len_out, int *data_offset_out)
 698{
 699	int err, fd;
 700
 701	flags.c = 1;
 702	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 703	if (fd < 0) {
 704		err = fd;
 705		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 706		       cow_file, -err);
 707		goto out;
 708	}
 709
 710	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 711			    bitmap_offset_out, bitmap_len_out,
 712			    data_offset_out);
 713	if (!err)
 714		return fd;
 715	os_close_file(fd);
 716 out:
 717	return err;
 718}
 719
 720static void ubd_close_dev(struct ubd *ubd_dev)
 721{
 722	os_close_file(ubd_dev->fd);
 723	if(ubd_dev->cow.file == NULL)
 724		return;
 725
 726	os_close_file(ubd_dev->cow.fd);
 727	vfree(ubd_dev->cow.bitmap);
 728	ubd_dev->cow.bitmap = NULL;
 729}
 730
 731static int ubd_open_dev(struct ubd *ubd_dev)
 732{
 733	struct openflags flags;
 734	char **back_ptr;
 735	int err, create_cow, *create_ptr;
 736	int fd;
 737
 738	ubd_dev->openflags = ubd_dev->boot_openflags;
 739	create_cow = 0;
 740	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 741	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 742
 743	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 744				back_ptr, &ubd_dev->cow.bitmap_offset,
 745				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 746				create_ptr);
 747
 748	if((fd == -ENOENT) && create_cow){
 749		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 750					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
 751					  &ubd_dev->cow.bitmap_offset,
 752					  &ubd_dev->cow.bitmap_len,
 753					  &ubd_dev->cow.data_offset);
 754		if(fd >= 0){
 755			printk(KERN_INFO "Creating \"%s\" as COW file for "
 756			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 757		}
 758	}
 759
 760	if(fd < 0){
 761		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 762		       -fd);
 763		return fd;
 764	}
 765	ubd_dev->fd = fd;
 766
 767	if(ubd_dev->cow.file != NULL){
 768		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 769
 770		err = -ENOMEM;
 771		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 772		if(ubd_dev->cow.bitmap == NULL){
 773			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 774			goto error;
 775		}
 776		flush_tlb_kernel_vm();
 777
 778		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 779				      ubd_dev->cow.bitmap_offset,
 780				      ubd_dev->cow.bitmap_len);
 781		if(err < 0)
 782			goto error;
 783
 784		flags = ubd_dev->openflags;
 785		flags.w = 0;
 786		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 787				    NULL, NULL, NULL, NULL);
 788		if(err < 0) goto error;
 789		ubd_dev->cow.fd = err;
 790	}
 
 
 
 
 
 791	return 0;
 792 error:
 793	os_close_file(ubd_dev->fd);
 794	return err;
 795}
 796
 797static void ubd_device_release(struct device *dev)
 798{
 799	struct ubd *ubd_dev = dev_get_drvdata(dev);
 800
 801	blk_cleanup_queue(ubd_dev->queue);
 802	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 803}
 804
 805static int ubd_disk_register(int major, u64 size, int unit,
 806			     struct gendisk **disk_out)
 807{
 808	struct gendisk *disk;
 
 809
 810	disk = alloc_disk(1 << UBD_SHIFT);
 811	if(disk == NULL)
 812		return -ENOMEM;
 
 
 813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 814	disk->major = major;
 815	disk->first_minor = unit << UBD_SHIFT;
 
 816	disk->fops = &ubd_blops;
 817	set_capacity(disk, size / 512);
 818	if (major == UBD_MAJOR)
 819		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 820	else
 821		sprintf(disk->disk_name, "ubd_fake%d", unit);
 822
 823	/* sysfs register (not for ide fake devices) */
 824	if (major == UBD_MAJOR) {
 825		ubd_devs[unit].pdev.id   = unit;
 826		ubd_devs[unit].pdev.name = DRIVER_NAME;
 827		ubd_devs[unit].pdev.dev.release = ubd_device_release;
 828		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 829		platform_device_register(&ubd_devs[unit].pdev);
 830		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
 831	}
 832
 833	disk->private_data = &ubd_devs[unit];
 834	disk->queue = ubd_devs[unit].queue;
 835	add_disk(disk);
 836
 837	*disk_out = disk;
 838	return 0;
 839}
 840
 841#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
 
 
 
 
 842
 843static int ubd_add(int n, char **error_out)
 844{
 845	struct ubd *ubd_dev = &ubd_devs[n];
 
 846	int err = 0;
 847
 848	if(ubd_dev->file == NULL)
 849		goto out;
 850
 851	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 852	if(err < 0){
 853		*error_out = "Couldn't determine size of device's file";
 854		goto out;
 855	}
 856
 857	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 858
 859	INIT_LIST_HEAD(&ubd_dev->restart);
 860	sg_init_table(ubd_dev->sg, MAX_SG);
 
 
 
 
 861
 862	err = -ENOMEM;
 863	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
 864	if (ubd_dev->queue == NULL) {
 865		*error_out = "Failed to initialize device queue";
 866		goto out;
 
 
 
 
 
 867	}
 868	ubd_dev->queue->queuedata = ubd_dev;
 869
 
 870	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 871	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 872	if(err){
 873		*error_out = "Failed to register device";
 874		goto out_cleanup;
 875	}
 876
 877	if (fake_major != UBD_MAJOR)
 878		ubd_disk_register(fake_major, ubd_dev->size, n,
 879				  &fake_gendisk[n]);
 880
 881	/*
 882	 * Perhaps this should also be under the "if (fake_major)" above
 883	 * using the fake_disk->disk_name
 884	 */
 885	if (fake_ide)
 886		make_ide_entries(ubd_gendisk[n]->disk_name);
 887
 888	err = 0;
 
 
 
 889out:
 890	return err;
 891
 892out_cleanup:
 893	blk_cleanup_queue(ubd_dev->queue);
 894	goto out;
 895}
 896
 897static int ubd_config(char *str, char **error_out)
 898{
 899	int n, ret;
 900
 901	/* This string is possibly broken up and stored, so it's only
 902	 * freed if ubd_setup_common fails, or if only general options
 903	 * were set.
 904	 */
 905	str = kstrdup(str, GFP_KERNEL);
 906	if (str == NULL) {
 907		*error_out = "Failed to allocate memory";
 908		return -ENOMEM;
 909	}
 910
 911	ret = ubd_setup_common(str, &n, error_out);
 912	if (ret)
 913		goto err_free;
 914
 915	if (n == -1) {
 916		ret = 0;
 917		goto err_free;
 918	}
 919
 920	mutex_lock(&ubd_lock);
 921	ret = ubd_add(n, error_out);
 922	if (ret)
 923		ubd_devs[n].file = NULL;
 924	mutex_unlock(&ubd_lock);
 925
 926out:
 927	return ret;
 928
 929err_free:
 930	kfree(str);
 931	goto out;
 932}
 933
 934static int ubd_get_config(char *name, char *str, int size, char **error_out)
 935{
 936	struct ubd *ubd_dev;
 937	int n, len = 0;
 938
 939	n = parse_unit(&name);
 940	if((n >= MAX_DEV) || (n < 0)){
 941		*error_out = "ubd_get_config : device number out of range";
 942		return -1;
 943	}
 944
 945	ubd_dev = &ubd_devs[n];
 946	mutex_lock(&ubd_lock);
 947
 948	if(ubd_dev->file == NULL){
 949		CONFIG_CHUNK(str, size, len, "", 1);
 950		goto out;
 951	}
 952
 953	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 954
 955	if(ubd_dev->cow.file != NULL){
 956		CONFIG_CHUNK(str, size, len, ",", 0);
 957		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 958	}
 959	else CONFIG_CHUNK(str, size, len, "", 1);
 960
 961 out:
 962	mutex_unlock(&ubd_lock);
 963	return len;
 964}
 965
 966static int ubd_id(char **str, int *start_out, int *end_out)
 967{
 968	int n;
 969
 970	n = parse_unit(str);
 971	*start_out = 0;
 972	*end_out = MAX_DEV - 1;
 973	return n;
 974}
 975
 976static int ubd_remove(int n, char **error_out)
 977{
 978	struct gendisk *disk = ubd_gendisk[n];
 979	struct ubd *ubd_dev;
 980	int err = -ENODEV;
 981
 982	mutex_lock(&ubd_lock);
 983
 984	ubd_dev = &ubd_devs[n];
 985
 986	if(ubd_dev->file == NULL)
 987		goto out;
 988
 989	/* you cannot remove a open disk */
 990	err = -EBUSY;
 991	if(ubd_dev->count > 0)
 992		goto out;
 993
 994	ubd_gendisk[n] = NULL;
 995	if(disk != NULL){
 996		del_gendisk(disk);
 997		put_disk(disk);
 998	}
 999
1000	if(fake_gendisk[n] != NULL){
1001		del_gendisk(fake_gendisk[n]);
1002		put_disk(fake_gendisk[n]);
1003		fake_gendisk[n] = NULL;
1004	}
1005
1006	err = 0;
1007	platform_device_unregister(&ubd_dev->pdev);
1008out:
1009	mutex_unlock(&ubd_lock);
1010	return err;
1011}
1012
1013/* All these are called by mconsole in process context and without
1014 * ubd-specific locks.  The structure itself is const except for .list.
1015 */
1016static struct mc_device ubd_mc = {
1017	.list		= LIST_HEAD_INIT(ubd_mc.list),
1018	.name		= "ubd",
1019	.config		= ubd_config,
1020	.get_config	= ubd_get_config,
1021	.id		= ubd_id,
1022	.remove		= ubd_remove,
1023};
1024
1025static int __init ubd_mc_init(void)
1026{
1027	mconsole_register_dev(&ubd_mc);
1028	return 0;
1029}
1030
1031__initcall(ubd_mc_init);
1032
1033static int __init ubd0_init(void)
1034{
1035	struct ubd *ubd_dev = &ubd_devs[0];
1036
1037	mutex_lock(&ubd_lock);
1038	if(ubd_dev->file == NULL)
1039		ubd_dev->file = "root_fs";
1040	mutex_unlock(&ubd_lock);
1041
1042	return 0;
1043}
1044
1045__initcall(ubd0_init);
1046
1047/* Used in ubd_init, which is an initcall */
1048static struct platform_driver ubd_driver = {
1049	.driver = {
1050		.name  = DRIVER_NAME,
1051	},
1052};
1053
1054static int __init ubd_init(void)
1055{
1056	char *error;
1057	int i, err;
1058
1059	if (register_blkdev(UBD_MAJOR, "ubd"))
1060		return -1;
1061
1062	if (fake_major != UBD_MAJOR) {
1063		char name[sizeof("ubd_nnn\0")];
 
 
 
1064
1065		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1066		if (register_blkdev(fake_major, "ubd"))
1067			return -1;
 
 
 
 
 
 
 
 
 
 
 
1068	}
1069	platform_driver_register(&ubd_driver);
1070	mutex_lock(&ubd_lock);
1071	for (i = 0; i < MAX_DEV; i++){
1072		err = ubd_add(i, &error);
1073		if(err)
1074			printk(KERN_ERR "Failed to initialize ubd device %d :"
1075			       "%s\n", i, error);
1076	}
1077	mutex_unlock(&ubd_lock);
1078	return 0;
1079}
1080
1081late_initcall(ubd_init);
1082
1083static int __init ubd_driver_init(void){
1084	unsigned long stack;
1085	int err;
1086
1087	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1088	if(global_openflags.s){
1089		printk(KERN_INFO "ubd: Synchronous mode\n");
1090		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1091		 * enough. So use anyway the io thread. */
1092	}
1093	stack = alloc_stack(0, 0);
1094	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1095				 &thread_fd);
1096	if(io_pid < 0){
1097		printk(KERN_ERR
1098		       "ubd : Failed to start I/O thread (errno = %d) - "
1099		       "falling back to synchronous I/O\n", -io_pid);
1100		io_pid = -1;
1101		return 0;
1102	}
1103	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1104			     0, "ubd", ubd_devs);
1105	if(err != 0)
1106		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1107	return 0;
1108}
1109
1110device_initcall(ubd_driver_init);
1111
1112static int ubd_open(struct block_device *bdev, fmode_t mode)
1113{
1114	struct gendisk *disk = bdev->bd_disk;
1115	struct ubd *ubd_dev = disk->private_data;
1116	int err = 0;
1117
1118	mutex_lock(&ubd_mutex);
1119	if(ubd_dev->count == 0){
1120		err = ubd_open_dev(ubd_dev);
1121		if(err){
1122			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1123			       disk->disk_name, ubd_dev->file, -err);
1124			goto out;
1125		}
1126	}
1127	ubd_dev->count++;
1128	set_disk_ro(disk, !ubd_dev->openflags.w);
1129
1130	/* This should no more be needed. And it didn't work anyway to exclude
1131	 * read-write remounting of filesystems.*/
1132	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1133	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1134	        err = -EROFS;
1135	}*/
1136out:
1137	mutex_unlock(&ubd_mutex);
1138	return err;
1139}
1140
1141static int ubd_release(struct gendisk *disk, fmode_t mode)
1142{
1143	struct ubd *ubd_dev = disk->private_data;
1144
1145	mutex_lock(&ubd_mutex);
1146	if(--ubd_dev->count == 0)
1147		ubd_close_dev(ubd_dev);
1148	mutex_unlock(&ubd_mutex);
1149	return 0;
1150}
1151
1152static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1153			  __u64 *cow_offset, unsigned long *bitmap,
1154			  __u64 bitmap_offset, unsigned long *bitmap_words,
1155			  __u64 bitmap_len)
1156{
1157	__u64 sector = io_offset >> 9;
1158	int i, update_bitmap = 0;
1159
1160	for(i = 0; i < length >> 9; i++){
1161		if(cow_mask != NULL)
1162			ubd_set_bit(i, (unsigned char *) cow_mask);
1163		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1164			continue;
1165
1166		update_bitmap = 1;
1167		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1168	}
1169
1170	if(!update_bitmap)
1171		return;
1172
1173	*cow_offset = sector / (sizeof(unsigned long) * 8);
1174
1175	/* This takes care of the case where we're exactly at the end of the
1176	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1177	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1178	 * for the original diagnosis.
1179	 */
1180	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1181					 sizeof(unsigned long)) - 1))
1182		(*cow_offset)--;
1183
1184	bitmap_words[0] = bitmap[*cow_offset];
1185	bitmap_words[1] = bitmap[*cow_offset + 1];
1186
1187	*cow_offset *= sizeof(unsigned long);
1188	*cow_offset += bitmap_offset;
1189}
1190
1191static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
 
1192		       __u64 bitmap_offset, __u64 bitmap_len)
1193{
1194	__u64 sector = req->offset >> 9;
1195	int i;
1196
1197	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1198		panic("Operation too long");
1199
1200	if(req->op == UBD_READ) {
1201		for(i = 0; i < req->length >> 9; i++){
1202			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1203				ubd_set_bit(i, (unsigned char *)
1204					    &req->sector_mask);
1205		}
 
 
 
 
1206	}
1207	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1208			   &req->cow_offset, bitmap, bitmap_offset,
1209			   req->bitmap_words, bitmap_len);
1210}
1211
1212/* Called with dev->lock held */
1213static void prepare_request(struct request *req, struct io_thread_req *io_req,
1214			    unsigned long long offset, int page_offset,
1215			    int len, struct page *page)
1216{
1217	struct gendisk *disk = req->rq_disk;
1218	struct ubd *ubd_dev = disk->private_data;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1219
1220	io_req->req = req;
1221	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1222		ubd_dev->fd;
1223	io_req->fds[1] = ubd_dev->fd;
1224	io_req->cow_offset = -1;
1225	io_req->offset = offset;
1226	io_req->length = len;
1227	io_req->error = 0;
1228	io_req->sector_mask = 0;
1229
1230	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1231	io_req->offsets[0] = 0;
1232	io_req->offsets[1] = ubd_dev->cow.data_offset;
1233	io_req->buffer = page_address(page) + page_offset;
1234	io_req->sectorsize = 1 << 9;
1235
1236	if(ubd_dev->cow.file != NULL)
1237		cowify_req(io_req, ubd_dev->cow.bitmap,
1238			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
 
1239
 
1240}
1241
1242/* Called with dev->lock held */
1243static void do_ubd_request(struct request_queue *q)
1244{
 
1245	struct io_thread_req *io_req;
1246	struct request *req;
1247	int n;
1248
1249	while(1){
1250		struct ubd *dev = q->queuedata;
1251		if(dev->end_sg == 0){
1252			struct request *req = blk_fetch_request(q);
1253			if(req == NULL)
1254				return;
1255
1256			dev->request = req;
1257			dev->rq_pos = blk_rq_pos(req);
1258			dev->start_sg = 0;
1259			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1260		}
1261
1262		req = dev->request;
1263		while(dev->start_sg < dev->end_sg){
1264			struct scatterlist *sg = &dev->sg[dev->start_sg];
1265
1266			io_req = kmalloc(sizeof(struct io_thread_req),
1267					 GFP_ATOMIC);
1268			if(io_req == NULL){
1269				if(list_empty(&dev->restart))
1270					list_add(&dev->restart, &restart);
1271				return;
1272			}
1273			prepare_request(req, io_req,
1274					(unsigned long long)dev->rq_pos << 9,
1275					sg->offset, sg->length, sg_page(sg));
1276
1277			n = os_write_file(thread_fd, &io_req,
1278					  sizeof(struct io_thread_req *));
1279			if(n != sizeof(struct io_thread_req *)){
1280				if(n != -EAGAIN)
1281					printk("write to io thread failed, "
1282					       "errno = %d\n", -n);
1283				else if(list_empty(&dev->restart))
1284					list_add(&dev->restart, &restart);
1285				kfree(io_req);
1286				return;
1287			}
1288
1289			dev->rq_pos += sg->length >> 9;
1290			dev->start_sg++;
1291		}
1292		dev->end_sg = 0;
1293		dev->request = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1294	}
 
 
 
 
 
 
 
 
 
 
 
1295}
1296
1297static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1298{
1299	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1300
1301	geo->heads = 128;
1302	geo->sectors = 32;
1303	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1304	return 0;
1305}
1306
1307static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1308		     unsigned int cmd, unsigned long arg)
1309{
1310	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1311	u16 ubd_id[ATA_ID_WORDS];
1312
1313	switch (cmd) {
1314		struct cdrom_volctrl volume;
1315	case HDIO_GET_IDENTITY:
1316		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1317		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1318		ubd_id[ATA_ID_HEADS]	= 128;
1319		ubd_id[ATA_ID_SECTORS]	= 32;
1320		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1321				 sizeof(ubd_id)))
1322			return -EFAULT;
1323		return 0;
1324
1325	case CDROMVOLREAD:
1326		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1327			return -EFAULT;
1328		volume.channel0 = 255;
1329		volume.channel1 = 255;
1330		volume.channel2 = 255;
1331		volume.channel3 = 255;
1332		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1333			return -EFAULT;
1334		return 0;
1335	}
1336	return -EINVAL;
1337}
1338
1339static int update_bitmap(struct io_thread_req *req)
1340{
1341	int n;
 
 
 
 
 
 
 
 
 
 
1342
1343	if(req->cow_offset == -1)
1344		return 0;
 
 
 
 
 
 
 
 
 
 
1345
1346	n = os_seek_file(req->fds[1], req->cow_offset);
1347	if(n < 0){
1348		printk("do_io - bitmap lseek failed : err = %d\n", -n);
1349		return 1;
1350	}
1351
1352	n = os_write_file(req->fds[1], &req->bitmap_words,
1353			  sizeof(req->bitmap_words));
1354	if(n != sizeof(req->bitmap_words)){
1355		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1356		       req->fds[1]);
1357		return 1;
1358	}
1359
1360	return 0;
 
 
 
 
 
1361}
1362
1363static void do_io(struct io_thread_req *req)
1364{
1365	char *buf;
1366	unsigned long len;
1367	int n, nsectors, start, end, bit;
1368	int err;
1369	__u64 off;
1370
1371	nsectors = req->length / req->sectorsize;
 
 
 
 
 
 
 
 
1372	start = 0;
1373	do {
1374		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1375		end = start;
1376		while((end < nsectors) &&
1377		      (ubd_test_bit(end, (unsigned char *)
1378				    &req->sector_mask) == bit))
1379			end++;
1380
1381		off = req->offset + req->offsets[bit] +
1382			start * req->sectorsize;
1383		len = (end - start) * req->sectorsize;
1384		buf = &req->buffer[start * req->sectorsize];
 
1385
1386		err = os_seek_file(req->fds[bit], off);
1387		if(err < 0){
1388			printk("do_io - lseek failed : err = %d\n", -err);
1389			req->error = 1;
1390			return;
1391		}
1392		if(req->op == UBD_READ){
1393			n = 0;
1394			do {
1395				buf = &buf[n];
1396				len -= n;
1397				n = os_read_file(req->fds[bit], buf, len);
1398				if (n < 0) {
1399					printk("do_io - read failed, err = %d "
1400					       "fd = %d\n", -n, req->fds[bit]);
1401					req->error = 1;
1402					return;
1403				}
1404			} while((n < len) && (n != 0));
1405			if (n < len) memset(&buf[n], 0, len - n);
1406		} else {
1407			n = os_write_file(req->fds[bit], buf, len);
 
1408			if(n != len){
1409				printk("do_io - write failed err = %d "
1410				       "fd = %d\n", -n, req->fds[bit]);
1411				req->error = 1;
 
 
 
 
 
 
 
 
 
 
 
 
1412				return;
1413			}
 
 
 
 
 
1414		}
1415
1416		start = end;
1417	} while(start < nsectors);
1418
1419	req->error = update_bitmap(req);
 
1420}
1421
1422/* Changed in start_io_thread, which is serialized by being called only
1423 * from ubd_init, which is an initcall.
1424 */
1425int kernel_fd = -1;
1426
1427/* Only changed by the io thread. XXX: currently unused. */
1428static int io_count = 0;
1429
1430int io_thread(void *arg)
1431{
1432	struct io_thread_req *req;
1433	int n;
 
1434
1435	ignore_sigwinch_sig();
1436	while(1){
1437		n = os_read_file(kernel_fd, &req,
1438				 sizeof(struct io_thread_req *));
1439		if(n != sizeof(struct io_thread_req *)){
1440			if(n < 0)
1441				printk("io_thread - read failed, fd = %d, "
1442				       "err = %d\n", kernel_fd, -n);
1443			else {
1444				printk("io_thread - short read, fd = %d, "
1445				       "length = %d\n", kernel_fd, n);
1446			}
 
1447			continue;
1448		}
1449		io_count++;
1450		do_io(req);
1451		n = os_write_file(kernel_fd, &req,
1452				  sizeof(struct io_thread_req *));
1453		if(n != sizeof(struct io_thread_req *))
1454			printk("io_thread - write failed, fd = %d, err = %d\n",
1455			       kernel_fd, -n);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1456	}
1457
1458	return 0;
1459}
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Cambridge Greys Ltd
   4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
 
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
  30#include <linux/major.h>
  31#include <linux/cdrom.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/ctype.h>
  35#include <linux/slab.h>
  36#include <linux/vmalloc.h>
  37#include <linux/platform_device.h>
  38#include <linux/scatterlist.h>
  39#include <asm/tlbflush.h>
  40#include <kern_util.h>
  41#include "mconsole_kern.h"
  42#include <init.h>
  43#include <irq_kern.h>
  44#include "ubd.h"
  45#include <os.h>
  46#include "cow.h"
  47
  48/* Max request size is determined by sector mask - 32K */
  49#define UBD_MAX_REQUEST (8 * sizeof(long))
  50
  51struct io_desc {
  52	char *buffer;
  53	unsigned long length;
  54	unsigned long sector_mask;
  55	unsigned long long cow_offset;
  56	unsigned long bitmap_words[2];
  57};
  58
  59struct io_thread_req {
  60	struct request *req;
 
  61	int fds[2];
  62	unsigned long offsets[2];
  63	unsigned long long offset;
 
 
  64	int sectorsize;
 
 
 
  65	int error;
  66
  67	int desc_cnt;
  68	/* io_desc has to be the last element of the struct */
  69	struct io_desc io_desc[];
  70};
  71
  72
  73static struct io_thread_req * (*irq_req_buffer)[];
  74static struct io_thread_req *irq_remainder;
  75static int irq_remainder_size;
  76
  77static struct io_thread_req * (*io_req_buffer)[];
  78static struct io_thread_req *io_remainder;
  79static int io_remainder_size;
  80
  81
  82
  83static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  84{
  85	__u64 n;
  86	int bits, off;
  87
  88	bits = sizeof(data[0]) * 8;
  89	n = bit / bits;
  90	off = bit % bits;
  91	return (data[n] & (1 << off)) != 0;
  92}
  93
  94static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  95{
  96	__u64 n;
  97	int bits, off;
  98
  99	bits = sizeof(data[0]) * 8;
 100	n = bit / bits;
 101	off = bit % bits;
 102	data[n] |= (1 << off);
 103}
 104/*End stuff from ubd_user.h*/
 105
 106#define DRIVER_NAME "uml-blkdev"
 107
 108static DEFINE_MUTEX(ubd_lock);
 109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 110
 111static int ubd_open(struct gendisk *disk, blk_mode_t mode);
 112static void ubd_release(struct gendisk *disk);
 113static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
 114		     unsigned int cmd, unsigned long arg);
 115static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 116
 117#define MAX_DEV (16)
 118
 119static const struct block_device_operations ubd_blops = {
 120        .owner		= THIS_MODULE,
 121        .open		= ubd_open,
 122        .release	= ubd_release,
 123        .ioctl		= ubd_ioctl,
 124        .compat_ioctl	= blkdev_compat_ptr_ioctl,
 125	.getgeo		= ubd_getgeo,
 126};
 127
 128/* Protected by ubd_lock */
 
 129static struct gendisk *ubd_gendisk[MAX_DEV];
 
 130
 131#ifdef CONFIG_BLK_DEV_UBD_SYNC
 132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 133					 .cl = 1 })
 134#else
 135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 136					 .cl = 1 })
 137#endif
 138static struct openflags global_openflags = OPEN_FLAGS;
 139
 140struct cow {
 141	/* backing file name */
 142	char *file;
 143	/* backing file fd */
 144	int fd;
 145	unsigned long *bitmap;
 146	unsigned long bitmap_len;
 147	int bitmap_offset;
 148	int data_offset;
 149};
 150
 151#define MAX_SG 64
 152
 153struct ubd {
 
 154	/* name (and fd, below) of the file opened for writing, either the
 155	 * backing or the cow file. */
 156	char *file;
 157	char *serial;
 158	int count;
 159	int fd;
 160	__u64 size;
 161	struct openflags boot_openflags;
 162	struct openflags openflags;
 163	unsigned shared:1;
 164	unsigned no_cow:1;
 165	unsigned no_trim:1;
 166	struct cow cow;
 167	struct platform_device pdev;
 168	struct request_queue *queue;
 169	struct blk_mq_tag_set tag_set;
 170	spinlock_t lock;
 
 
 
 
 171};
 172
 173#define DEFAULT_COW { \
 174	.file =			NULL, \
 175	.fd =			-1,	\
 176	.bitmap =		NULL, \
 177	.bitmap_offset =	0, \
 178	.data_offset =		0, \
 179}
 180
 181#define DEFAULT_UBD { \
 182	.file = 		NULL, \
 183	.serial =		NULL, \
 184	.count =		0, \
 185	.fd =			-1, \
 186	.size =			-1, \
 187	.boot_openflags =	OPEN_FLAGS, \
 188	.openflags =		OPEN_FLAGS, \
 189	.no_cow =               0, \
 190	.no_trim =		0, \
 191	.shared =		0, \
 192	.cow =			DEFAULT_COW, \
 193	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 
 
 
 
 194}
 195
 196/* Protected by ubd_lock */
 197static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 198
 199static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 200				 const struct blk_mq_queue_data *bd);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 201
 202static int fake_ide_setup(char *str)
 203{
 204	pr_warn("The fake_ide option has been removed\n");
 205	return 1;
 206}
 
 207__setup("fake_ide", fake_ide_setup);
 208
 209__uml_help(fake_ide_setup,
 210"fake_ide\n"
 211"    Obsolete stub.\n\n"
 212);
 213
 214static int parse_unit(char **ptr)
 215{
 216	char *str = *ptr, *end;
 217	int n = -1;
 218
 219	if(isdigit(*str)) {
 220		n = simple_strtoul(str, &end, 0);
 221		if(end == str)
 222			return -1;
 223		*ptr = end;
 224	}
 225	else if (('a' <= *str) && (*str <= 'z')) {
 226		n = *str - 'a';
 227		str++;
 228		*ptr = str;
 229	}
 230	return n;
 231}
 232
 233/* If *index_out == -1 at exit, the passed option was a general one;
 234 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 235 * should not be freed on exit.
 236 */
 237static int ubd_setup_common(char *str, int *index_out, char **error_out)
 238{
 239	struct ubd *ubd_dev;
 240	struct openflags flags = global_openflags;
 241	char *file, *backing_file, *serial;
 242	int n, err = 0, i;
 243
 244	if(index_out) *index_out = -1;
 245	n = *str;
 246	if(n == '='){
 
 
 
 247		str++;
 248		if(!strcmp(str, "sync")){
 249			global_openflags = of_sync(global_openflags);
 250			return err;
 251		}
 252
 253		pr_warn("fake major not supported any more\n");
 254		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 255	}
 256
 257	n = parse_unit(&str);
 258	if(n < 0){
 259		*error_out = "Couldn't parse device number";
 260		return -EINVAL;
 261	}
 262	if(n >= MAX_DEV){
 263		*error_out = "Device number out of range";
 264		return 1;
 265	}
 266
 267	err = -EBUSY;
 268	mutex_lock(&ubd_lock);
 269
 270	ubd_dev = &ubd_devs[n];
 271	if(ubd_dev->file != NULL){
 272		*error_out = "Device is already configured";
 273		goto out;
 274	}
 275
 276	if (index_out)
 277		*index_out = n;
 278
 279	err = -EINVAL;
 280	for (i = 0; i < sizeof("rscdt="); i++) {
 281		switch (*str) {
 282		case 'r':
 283			flags.w = 0;
 284			break;
 285		case 's':
 286			flags.s = 1;
 287			break;
 288		case 'd':
 289			ubd_dev->no_cow = 1;
 290			break;
 291		case 'c':
 292			ubd_dev->shared = 1;
 293			break;
 294		case 't':
 295			ubd_dev->no_trim = 1;
 296			break;
 297		case '=':
 298			str++;
 299			goto break_loop;
 300		default:
 301			*error_out = "Expected '=' or flag letter "
 302				"(r, s, c, t or d)";
 303			goto out;
 304		}
 305		str++;
 306	}
 307
 308	if (*str == '=')
 309		*error_out = "Too many flags specified";
 310	else
 311		*error_out = "Missing '='";
 312	goto out;
 313
 314break_loop:
 315	file = strsep(&str, ",:");
 316	if (*file == '\0')
 317		file = NULL;
 318
 319	backing_file = strsep(&str, ",:");
 320	if (backing_file && *backing_file == '\0')
 321		backing_file = NULL;
 322
 323	serial = strsep(&str, ",:");
 324	if (serial && *serial == '\0')
 325		serial = NULL;
 326
 327	if (backing_file && ubd_dev->no_cow) {
 328		*error_out = "Can't specify both 'd' and a cow file";
 329		goto out;
 
 
 
 
 
 
 330	}
 331
 332	err = 0;
 333	ubd_dev->file = file;
 334	ubd_dev->cow.file = backing_file;
 335	ubd_dev->serial = serial;
 336	ubd_dev->boot_openflags = flags;
 337out:
 338	mutex_unlock(&ubd_lock);
 339	return err;
 340}
 341
 342static int ubd_setup(char *str)
 343{
 344	char *error;
 345	int err;
 346
 347	err = ubd_setup_common(str, NULL, &error);
 348	if(err)
 349		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 350		       "%s\n", str, error);
 351	return 1;
 352}
 353
 354__setup("ubd", ubd_setup);
 355__uml_help(ubd_setup,
 356"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
 357"    This is used to associate a device with a file in the underlying\n"
 358"    filesystem. When specifying two filenames, the first one is the\n"
 359"    COW name and the second is the backing file name. As separator you can\n"
 360"    use either a ':' or a ',': the first one allows writing things like;\n"
 361"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 362"    while with a ',' the shell would not expand the 2nd '~'.\n"
 363"    When using only one filename, UML will detect whether to treat it like\n"
 364"    a COW file or a backing file. To override this detection, add the 'd'\n"
 365"    flag:\n"
 366"	ubd0d=BackingFile\n"
 367"    Usually, there is a filesystem in the file, but \n"
 368"    that's not required. Swap devices containing swap files can be\n"
 369"    specified like this. Also, a file which doesn't contain a\n"
 370"    filesystem can have its contents read in the virtual \n"
 371"    machine by running 'dd' on the device. <n> must be in the range\n"
 372"    0 to 7. Appending an 'r' to the number will cause that device\n"
 373"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 374"    an 's' will cause data to be written to disk on the host immediately.\n"
 375"    'c' will cause the device to be treated as being shared between multiple\n"
 376"    UMLs and file locking will be turned off - this is appropriate for a\n"
 377"    cluster filesystem and inappropriate at almost all other times.\n\n"
 378"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 379"    An optional device serial number can be exposed using the serial parameter\n"
 380"    on the cmdline which is exposed as a sysfs entry. This is particularly\n"
 381"    useful when a unique number should be given to the device. Note when\n"
 382"    specifying a label, the filename2 must be also presented. It can be\n"
 383"    an empty string, in which case the backing file is not used:\n"
 384"       ubd0=File,,Serial\n"
 385);
 386
 387static int udb_setup(char *str)
 388{
 389	printk("udb%s specified on command line is almost certainly a ubd -> "
 390	       "udb TYPO\n", str);
 391	return 1;
 392}
 393
 394__setup("udb", udb_setup);
 395__uml_help(udb_setup,
 396"udb\n"
 397"    This option is here solely to catch ubd -> udb typos, which can be\n"
 398"    to impossible to catch visually unless you specifically look for\n"
 399"    them.  The only result of any option starting with 'udb' is an error\n"
 400"    in the boot output.\n\n"
 401);
 402
 
 
 403/* Only changed by ubd_init, which is an initcall. */
 404static int thread_fd = -1;
 
 405
 406/* Function to read several request pointers at a time
 407* handling fractional reads if (and as) needed
 408*/
 409
 410static int bulk_req_safe_read(
 411	int fd,
 412	struct io_thread_req * (*request_buffer)[],
 413	struct io_thread_req **remainder,
 414	int *remainder_size,
 415	int max_recs
 416	)
 417{
 418	int n = 0;
 419	int res = 0;
 420
 421	if (*remainder_size > 0) {
 422		memmove(
 423			(char *) request_buffer,
 424			(char *) remainder, *remainder_size
 425		);
 426		n = *remainder_size;
 427	}
 428
 429	res = os_read_file(
 430			fd,
 431			((char *) request_buffer) + *remainder_size,
 432			sizeof(struct io_thread_req *)*max_recs
 433				- *remainder_size
 434		);
 435	if (res > 0) {
 436		n += res;
 437		if ((n % sizeof(struct io_thread_req *)) > 0) {
 438			/*
 439			* Read somehow returned not a multiple of dword
 440			* theoretically possible, but never observed in the
 441			* wild, so read routine must be able to handle it
 442			*/
 443			*remainder_size = n % sizeof(struct io_thread_req *);
 444			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 445			memmove(
 446				remainder,
 447				((char *) request_buffer) +
 448					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 449				*remainder_size
 450			);
 451			n = n - *remainder_size;
 452		}
 453	} else {
 454		n = res;
 455	}
 456	return n;
 457}
 458
 459/* Called without dev->lock held, and only in interrupt context. */
 460static void ubd_handler(void)
 461{
 
 
 
 
 462	int n;
 463	int count;
 464
 465	while(1){
 466		n = bulk_req_safe_read(
 467			thread_fd,
 468			irq_req_buffer,
 469			&irq_remainder,
 470			&irq_remainder_size,
 471			UBD_REQ_BUFFER_SIZE
 472		);
 473		if (n < 0) {
 474			if(n == -EAGAIN)
 475				break;
 476			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 477			       "err = %d\n", -n);
 478			return;
 479		}
 480		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 481			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 482
 483			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 484				blk_queue_max_discard_sectors(io_req->req->q, 0);
 485				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 486			}
 487			blk_mq_end_request(io_req->req, io_req->error);
 488			kfree(io_req);
 489		}
 
 
 
 
 490	}
 491}
 492
 493static irqreturn_t ubd_intr(int irq, void *dev)
 494{
 495	ubd_handler();
 496	return IRQ_HANDLED;
 497}
 498
 499/* Only changed by ubd_init, which is an initcall. */
 500static int io_pid = -1;
 501
 502static void kill_io_thread(void)
 503{
 504	if(io_pid != -1)
 505		os_kill_process(io_pid, 1);
 506}
 507
 508__uml_exitcall(kill_io_thread);
 509
 510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 511{
 512	char *file;
 513	int fd;
 514	int err;
 515
 516	__u32 version;
 517	__u32 align;
 518	char *backing_file;
 519	time64_t mtime;
 520	unsigned long long size;
 521	int sector_size;
 522	int bitmap_offset;
 523
 524	if (ubd_dev->file && ubd_dev->cow.file) {
 525		file = ubd_dev->cow.file;
 526
 527		goto out;
 528	}
 529
 530	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 531	if (fd < 0)
 532		return fd;
 533
 534	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 535		&mtime, &size, &sector_size, &align, &bitmap_offset);
 536	os_close_file(fd);
 537
 538	if(err == -EINVAL)
 539		file = ubd_dev->file;
 540	else
 541		file = backing_file;
 542
 543out:
 544	return os_file_size(file, size_out);
 545}
 546
 547static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 548{
 549	int err;
 550
 551	err = os_pread_file(fd, buf, len, offset);
 
 
 
 
 552	if (err < 0)
 553		return err;
 554
 555	return 0;
 556}
 557
 558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 559{
 560	time64_t modtime;
 561	unsigned long long actual;
 562	int err;
 563
 564	err = os_file_modtime(file, &modtime);
 565	if (err < 0) {
 566		printk(KERN_ERR "Failed to get modification time of backing "
 567		       "file \"%s\", err = %d\n", file, -err);
 568		return err;
 569	}
 570
 571	err = os_file_size(file, &actual);
 572	if (err < 0) {
 573		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 574		       "err = %d\n", file, -err);
 575		return err;
 576	}
 577
 578	if (actual != size) {
 579		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 580		 * the typecast.*/
 581		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 582		       "vs backing file\n", (unsigned long long) size, actual);
 583		return -EINVAL;
 584	}
 585	if (modtime != mtime) {
 586		printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 587		       "backing file\n", mtime, modtime);
 588		return -EINVAL;
 589	}
 590	return 0;
 591}
 592
 593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 594{
 595	struct uml_stat buf1, buf2;
 596	int err;
 597
 598	if (from_cmdline == NULL)
 599		return 0;
 600	if (!strcmp(from_cmdline, from_cow))
 601		return 0;
 602
 603	err = os_stat_file(from_cmdline, &buf1);
 604	if (err < 0) {
 605		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 606		       -err);
 607		return 0;
 608	}
 609	err = os_stat_file(from_cow, &buf2);
 610	if (err < 0) {
 611		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 612		       -err);
 613		return 1;
 614	}
 615	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 616		return 0;
 617
 618	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 619	       "\"%s\" specified in COW header of \"%s\"\n",
 620	       from_cmdline, from_cow, cow);
 621	return 1;
 622}
 623
 624static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 625		  char **backing_file_out, int *bitmap_offset_out,
 626		  unsigned long *bitmap_len_out, int *data_offset_out,
 627		  int *create_cow_out)
 628{
 629	time64_t mtime;
 630	unsigned long long size;
 631	__u32 version, align;
 632	char *backing_file;
 633	int fd, err, sectorsize, asked_switch, mode = 0644;
 634
 635	fd = os_open_file(file, *openflags, mode);
 636	if (fd < 0) {
 637		if ((fd == -ENOENT) && (create_cow_out != NULL))
 638			*create_cow_out = 1;
 639		if (!openflags->w ||
 640		    ((fd != -EROFS) && (fd != -EACCES)))
 641			return fd;
 642		openflags->w = 0;
 643		fd = os_open_file(file, *openflags, mode);
 644		if (fd < 0)
 645			return fd;
 646	}
 647
 648	if (shared)
 649		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 650	else {
 651		err = os_lock_file(fd, openflags->w);
 652		if (err < 0) {
 653			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 654			       file, -err);
 655			goto out_close;
 656		}
 657	}
 658
 659	/* Successful return case! */
 660	if (backing_file_out == NULL)
 661		return fd;
 662
 663	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 664			      &size, &sectorsize, &align, bitmap_offset_out);
 665	if (err && (*backing_file_out != NULL)) {
 666		printk(KERN_ERR "Failed to read COW header from COW file "
 667		       "\"%s\", errno = %d\n", file, -err);
 668		goto out_close;
 669	}
 670	if (err)
 671		return fd;
 672
 673	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 674					    file);
 675
 676	/* Allow switching only if no mismatch. */
 677	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 678						   mtime)) {
 679		printk(KERN_ERR "Switching backing file to '%s'\n",
 680		       *backing_file_out);
 681		err = write_cow_header(file, fd, *backing_file_out,
 682				       sectorsize, align, &size);
 683		if (err) {
 684			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 685			goto out_close;
 686		}
 687	} else {
 688		*backing_file_out = backing_file;
 689		err = backing_file_mismatch(*backing_file_out, size, mtime);
 690		if (err)
 691			goto out_close;
 692	}
 693
 694	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 695		  bitmap_len_out, data_offset_out);
 696
 697	return fd;
 698 out_close:
 699	os_close_file(fd);
 700	return err;
 701}
 702
 703static int create_cow_file(char *cow_file, char *backing_file,
 704		    struct openflags flags,
 705		    int sectorsize, int alignment, int *bitmap_offset_out,
 706		    unsigned long *bitmap_len_out, int *data_offset_out)
 707{
 708	int err, fd;
 709
 710	flags.c = 1;
 711	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 712	if (fd < 0) {
 713		err = fd;
 714		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 715		       cow_file, -err);
 716		goto out;
 717	}
 718
 719	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 720			    bitmap_offset_out, bitmap_len_out,
 721			    data_offset_out);
 722	if (!err)
 723		return fd;
 724	os_close_file(fd);
 725 out:
 726	return err;
 727}
 728
 729static void ubd_close_dev(struct ubd *ubd_dev)
 730{
 731	os_close_file(ubd_dev->fd);
 732	if(ubd_dev->cow.file == NULL)
 733		return;
 734
 735	os_close_file(ubd_dev->cow.fd);
 736	vfree(ubd_dev->cow.bitmap);
 737	ubd_dev->cow.bitmap = NULL;
 738}
 739
 740static int ubd_open_dev(struct ubd *ubd_dev)
 741{
 742	struct openflags flags;
 743	char **back_ptr;
 744	int err, create_cow, *create_ptr;
 745	int fd;
 746
 747	ubd_dev->openflags = ubd_dev->boot_openflags;
 748	create_cow = 0;
 749	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 750	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 751
 752	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 753				back_ptr, &ubd_dev->cow.bitmap_offset,
 754				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 755				create_ptr);
 756
 757	if((fd == -ENOENT) && create_cow){
 758		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 759					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 760					  &ubd_dev->cow.bitmap_offset,
 761					  &ubd_dev->cow.bitmap_len,
 762					  &ubd_dev->cow.data_offset);
 763		if(fd >= 0){
 764			printk(KERN_INFO "Creating \"%s\" as COW file for "
 765			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 766		}
 767	}
 768
 769	if(fd < 0){
 770		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 771		       -fd);
 772		return fd;
 773	}
 774	ubd_dev->fd = fd;
 775
 776	if(ubd_dev->cow.file != NULL){
 777		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 778
 779		err = -ENOMEM;
 780		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 781		if(ubd_dev->cow.bitmap == NULL){
 782			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 783			goto error;
 784		}
 785		flush_tlb_kernel_vm();
 786
 787		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 788				      ubd_dev->cow.bitmap_offset,
 789				      ubd_dev->cow.bitmap_len);
 790		if(err < 0)
 791			goto error;
 792
 793		flags = ubd_dev->openflags;
 794		flags.w = 0;
 795		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 796				    NULL, NULL, NULL, NULL);
 797		if(err < 0) goto error;
 798		ubd_dev->cow.fd = err;
 799	}
 800	if (ubd_dev->no_trim == 0) {
 801		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 802		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 803	}
 804	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 805	return 0;
 806 error:
 807	os_close_file(ubd_dev->fd);
 808	return err;
 809}
 810
 811static void ubd_device_release(struct device *dev)
 812{
 813	struct ubd *ubd_dev = dev_get_drvdata(dev);
 814
 815	blk_mq_free_tag_set(&ubd_dev->tag_set);
 816	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 817}
 818
 819static ssize_t serial_show(struct device *dev,
 820			   struct device_attribute *attr, char *buf)
 821{
 822	struct gendisk *disk = dev_to_disk(dev);
 823	struct ubd *ubd_dev = disk->private_data;
 824
 825	if (!ubd_dev)
 826		return 0;
 827
 828	return sprintf(buf, "%s", ubd_dev->serial);
 829}
 830
 831static DEVICE_ATTR_RO(serial);
 832
 833static struct attribute *ubd_attrs[] = {
 834	&dev_attr_serial.attr,
 835	NULL,
 836};
 837
 838static umode_t ubd_attrs_are_visible(struct kobject *kobj,
 839				     struct attribute *a, int n)
 840{
 841	return a->mode;
 842}
 843
 844static const struct attribute_group ubd_attr_group = {
 845	.attrs = ubd_attrs,
 846	.is_visible = ubd_attrs_are_visible,
 847};
 848
 849static const struct attribute_group *ubd_attr_groups[] = {
 850	&ubd_attr_group,
 851	NULL,
 852};
 853
 854static int ubd_disk_register(int major, u64 size, int unit,
 855			     struct gendisk *disk)
 856{
 857	disk->major = major;
 858	disk->first_minor = unit << UBD_SHIFT;
 859	disk->minors = 1 << UBD_SHIFT;
 860	disk->fops = &ubd_blops;
 861	set_capacity(disk, size / 512);
 862	sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 
 
 
 863
 864	ubd_devs[unit].pdev.id   = unit;
 865	ubd_devs[unit].pdev.name = DRIVER_NAME;
 866	ubd_devs[unit].pdev.dev.release = ubd_device_release;
 867	dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 868	platform_device_register(&ubd_devs[unit].pdev);
 
 
 
 
 869
 870	disk->private_data = &ubd_devs[unit];
 871	disk->queue = ubd_devs[unit].queue;
 872	return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
 
 
 
 873}
 874
 875#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 876
 877static const struct blk_mq_ops ubd_mq_ops = {
 878	.queue_rq = ubd_queue_rq,
 879};
 880
 881static int ubd_add(int n, char **error_out)
 882{
 883	struct ubd *ubd_dev = &ubd_devs[n];
 884	struct gendisk *disk;
 885	int err = 0;
 886
 887	if(ubd_dev->file == NULL)
 888		goto out;
 889
 890	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 891	if(err < 0){
 892		*error_out = "Couldn't determine size of device's file";
 893		goto out;
 894	}
 895
 896	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 897
 898	ubd_dev->tag_set.ops = &ubd_mq_ops;
 899	ubd_dev->tag_set.queue_depth = 64;
 900	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 901	ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 902	ubd_dev->tag_set.driver_data = ubd_dev;
 903	ubd_dev->tag_set.nr_hw_queues = 1;
 904
 905	err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 906	if (err)
 
 
 907		goto out;
 908
 909	disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
 910	if (IS_ERR(disk)) {
 911		err = PTR_ERR(disk);
 912		goto out_cleanup_tags;
 913	}
 914	ubd_dev->queue = disk->queue;
 915
 916	blk_queue_write_cache(ubd_dev->queue, true, false);
 917	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 918	blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
 919	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
 920	if (err)
 921		goto out_cleanup_disk;
 
 922
 923	ubd_gendisk[n] = disk;
 924	return 0;
 
 
 
 
 
 
 
 
 925
 926out_cleanup_disk:
 927	put_disk(disk);
 928out_cleanup_tags:
 929	blk_mq_free_tag_set(&ubd_dev->tag_set);
 930out:
 931	return err;
 
 
 
 
 932}
 933
 934static int ubd_config(char *str, char **error_out)
 935{
 936	int n, ret;
 937
 938	/* This string is possibly broken up and stored, so it's only
 939	 * freed if ubd_setup_common fails, or if only general options
 940	 * were set.
 941	 */
 942	str = kstrdup(str, GFP_KERNEL);
 943	if (str == NULL) {
 944		*error_out = "Failed to allocate memory";
 945		return -ENOMEM;
 946	}
 947
 948	ret = ubd_setup_common(str, &n, error_out);
 949	if (ret)
 950		goto err_free;
 951
 952	if (n == -1) {
 953		ret = 0;
 954		goto err_free;
 955	}
 956
 957	mutex_lock(&ubd_lock);
 958	ret = ubd_add(n, error_out);
 959	if (ret)
 960		ubd_devs[n].file = NULL;
 961	mutex_unlock(&ubd_lock);
 962
 963out:
 964	return ret;
 965
 966err_free:
 967	kfree(str);
 968	goto out;
 969}
 970
 971static int ubd_get_config(char *name, char *str, int size, char **error_out)
 972{
 973	struct ubd *ubd_dev;
 974	int n, len = 0;
 975
 976	n = parse_unit(&name);
 977	if((n >= MAX_DEV) || (n < 0)){
 978		*error_out = "ubd_get_config : device number out of range";
 979		return -1;
 980	}
 981
 982	ubd_dev = &ubd_devs[n];
 983	mutex_lock(&ubd_lock);
 984
 985	if(ubd_dev->file == NULL){
 986		CONFIG_CHUNK(str, size, len, "", 1);
 987		goto out;
 988	}
 989
 990	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 991
 992	if(ubd_dev->cow.file != NULL){
 993		CONFIG_CHUNK(str, size, len, ",", 0);
 994		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 995	}
 996	else CONFIG_CHUNK(str, size, len, "", 1);
 997
 998 out:
 999	mutex_unlock(&ubd_lock);
1000	return len;
1001}
1002
1003static int ubd_id(char **str, int *start_out, int *end_out)
1004{
1005	int n;
1006
1007	n = parse_unit(str);
1008	*start_out = 0;
1009	*end_out = MAX_DEV - 1;
1010	return n;
1011}
1012
1013static int ubd_remove(int n, char **error_out)
1014{
1015	struct gendisk *disk = ubd_gendisk[n];
1016	struct ubd *ubd_dev;
1017	int err = -ENODEV;
1018
1019	mutex_lock(&ubd_lock);
1020
1021	ubd_dev = &ubd_devs[n];
1022
1023	if(ubd_dev->file == NULL)
1024		goto out;
1025
1026	/* you cannot remove a open disk */
1027	err = -EBUSY;
1028	if(ubd_dev->count > 0)
1029		goto out;
1030
1031	ubd_gendisk[n] = NULL;
1032	if(disk != NULL){
1033		del_gendisk(disk);
1034		put_disk(disk);
1035	}
1036
 
 
 
 
 
 
1037	err = 0;
1038	platform_device_unregister(&ubd_dev->pdev);
1039out:
1040	mutex_unlock(&ubd_lock);
1041	return err;
1042}
1043
1044/* All these are called by mconsole in process context and without
1045 * ubd-specific locks.  The structure itself is const except for .list.
1046 */
1047static struct mc_device ubd_mc = {
1048	.list		= LIST_HEAD_INIT(ubd_mc.list),
1049	.name		= "ubd",
1050	.config		= ubd_config,
1051	.get_config	= ubd_get_config,
1052	.id		= ubd_id,
1053	.remove		= ubd_remove,
1054};
1055
1056static int __init ubd_mc_init(void)
1057{
1058	mconsole_register_dev(&ubd_mc);
1059	return 0;
1060}
1061
1062__initcall(ubd_mc_init);
1063
1064static int __init ubd0_init(void)
1065{
1066	struct ubd *ubd_dev = &ubd_devs[0];
1067
1068	mutex_lock(&ubd_lock);
1069	if(ubd_dev->file == NULL)
1070		ubd_dev->file = "root_fs";
1071	mutex_unlock(&ubd_lock);
1072
1073	return 0;
1074}
1075
1076__initcall(ubd0_init);
1077
1078/* Used in ubd_init, which is an initcall */
1079static struct platform_driver ubd_driver = {
1080	.driver = {
1081		.name  = DRIVER_NAME,
1082	},
1083};
1084
1085static int __init ubd_init(void)
1086{
1087	char *error;
1088	int i, err;
1089
1090	if (register_blkdev(UBD_MAJOR, "ubd"))
1091		return -1;
1092
1093	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1094				       sizeof(struct io_thread_req *),
1095				       GFP_KERNEL
1096		);
1097	irq_remainder = 0;
1098
1099	if (irq_req_buffer == NULL) {
1100		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1101		return -1;
1102	}
1103	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1104				      sizeof(struct io_thread_req *),
1105				      GFP_KERNEL
1106		);
1107
1108	io_remainder = 0;
1109
1110	if (io_req_buffer == NULL) {
1111		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1112		return -1;
1113	}
1114	platform_driver_register(&ubd_driver);
1115	mutex_lock(&ubd_lock);
1116	for (i = 0; i < MAX_DEV; i++){
1117		err = ubd_add(i, &error);
1118		if(err)
1119			printk(KERN_ERR "Failed to initialize ubd device %d :"
1120			       "%s\n", i, error);
1121	}
1122	mutex_unlock(&ubd_lock);
1123	return 0;
1124}
1125
1126late_initcall(ubd_init);
1127
1128static int __init ubd_driver_init(void){
1129	unsigned long stack;
1130	int err;
1131
1132	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1133	if(global_openflags.s){
1134		printk(KERN_INFO "ubd: Synchronous mode\n");
1135		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1136		 * enough. So use anyway the io thread. */
1137	}
1138	stack = alloc_stack(0, 0);
1139	io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
 
1140	if(io_pid < 0){
1141		printk(KERN_ERR
1142		       "ubd : Failed to start I/O thread (errno = %d) - "
1143		       "falling back to synchronous I/O\n", -io_pid);
1144		io_pid = -1;
1145		return 0;
1146	}
1147	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1148			     0, "ubd", ubd_devs);
1149	if(err < 0)
1150		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1151	return 0;
1152}
1153
1154device_initcall(ubd_driver_init);
1155
1156static int ubd_open(struct gendisk *disk, blk_mode_t mode)
1157{
 
1158	struct ubd *ubd_dev = disk->private_data;
1159	int err = 0;
1160
1161	mutex_lock(&ubd_mutex);
1162	if(ubd_dev->count == 0){
1163		err = ubd_open_dev(ubd_dev);
1164		if(err){
1165			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1166			       disk->disk_name, ubd_dev->file, -err);
1167			goto out;
1168		}
1169	}
1170	ubd_dev->count++;
1171	set_disk_ro(disk, !ubd_dev->openflags.w);
 
 
 
 
 
 
 
1172out:
1173	mutex_unlock(&ubd_mutex);
1174	return err;
1175}
1176
1177static void ubd_release(struct gendisk *disk)
1178{
1179	struct ubd *ubd_dev = disk->private_data;
1180
1181	mutex_lock(&ubd_mutex);
1182	if(--ubd_dev->count == 0)
1183		ubd_close_dev(ubd_dev);
1184	mutex_unlock(&ubd_mutex);
 
1185}
1186
1187static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1188			  __u64 *cow_offset, unsigned long *bitmap,
1189			  __u64 bitmap_offset, unsigned long *bitmap_words,
1190			  __u64 bitmap_len)
1191{
1192	__u64 sector = io_offset >> SECTOR_SHIFT;
1193	int i, update_bitmap = 0;
1194
1195	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1196		if(cow_mask != NULL)
1197			ubd_set_bit(i, (unsigned char *) cow_mask);
1198		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1199			continue;
1200
1201		update_bitmap = 1;
1202		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1203	}
1204
1205	if(!update_bitmap)
1206		return;
1207
1208	*cow_offset = sector / (sizeof(unsigned long) * 8);
1209
1210	/* This takes care of the case where we're exactly at the end of the
1211	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1212	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1213	 * for the original diagnosis.
1214	 */
1215	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1216					 sizeof(unsigned long)) - 1))
1217		(*cow_offset)--;
1218
1219	bitmap_words[0] = bitmap[*cow_offset];
1220	bitmap_words[1] = bitmap[*cow_offset + 1];
1221
1222	*cow_offset *= sizeof(unsigned long);
1223	*cow_offset += bitmap_offset;
1224}
1225
1226static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1227		       unsigned long offset, unsigned long *bitmap,
1228		       __u64 bitmap_offset, __u64 bitmap_len)
1229{
1230	__u64 sector = offset >> SECTOR_SHIFT;
1231	int i;
1232
1233	if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1234		panic("Operation too long");
1235
1236	if (req_op(req->req) == REQ_OP_READ) {
1237		for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1238			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1239				ubd_set_bit(i, (unsigned char *)
1240					    &segment->sector_mask);
1241		}
1242	} else {
1243		cowify_bitmap(offset, segment->length, &segment->sector_mask,
1244			      &segment->cow_offset, bitmap, bitmap_offset,
1245			      segment->bitmap_words, bitmap_len);
1246	}
 
 
 
1247}
1248
1249static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1250			struct request *req)
 
 
1251{
1252	struct bio_vec bvec;
1253	struct req_iterator iter;
1254	int i = 0;
1255	unsigned long byte_offset = io_req->offset;
1256	enum req_op op = req_op(req);
1257
1258	if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1259		io_req->io_desc[0].buffer = NULL;
1260		io_req->io_desc[0].length = blk_rq_bytes(req);
1261	} else {
1262		rq_for_each_segment(bvec, req, iter) {
1263			BUG_ON(i >= io_req->desc_cnt);
1264
1265			io_req->io_desc[i].buffer = bvec_virt(&bvec);
1266			io_req->io_desc[i].length = bvec.bv_len;
1267			i++;
1268		}
1269	}
1270
1271	if (dev->cow.file) {
1272		for (i = 0; i < io_req->desc_cnt; i++) {
1273			cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1274				   dev->cow.bitmap, dev->cow.bitmap_offset,
1275				   dev->cow.bitmap_len);
1276			byte_offset += io_req->io_desc[i].length;
1277		}
1278
1279	}
1280}
1281
1282static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1283					   int desc_cnt)
1284{
1285	struct io_thread_req *io_req;
1286	int i;
1287
1288	io_req = kmalloc(sizeof(*io_req) +
1289			 (desc_cnt * sizeof(struct io_desc)),
1290			 GFP_ATOMIC);
1291	if (!io_req)
1292		return NULL;
1293
1294	io_req->req = req;
1295	if (dev->cow.file)
1296		io_req->fds[0] = dev->cow.fd;
1297	else
1298		io_req->fds[0] = dev->fd;
 
 
1299	io_req->error = 0;
1300	io_req->sectorsize = SECTOR_SIZE;
1301	io_req->fds[1] = dev->fd;
1302	io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1303	io_req->offsets[0] = 0;
1304	io_req->offsets[1] = dev->cow.data_offset;
 
 
1305
1306	for (i = 0 ; i < desc_cnt; i++) {
1307		io_req->io_desc[i].sector_mask = 0;
1308		io_req->io_desc[i].cow_offset = -1;
1309	}
1310
1311	return io_req;
1312}
1313
1314static int ubd_submit_request(struct ubd *dev, struct request *req)
 
1315{
1316	int segs = 0;
1317	struct io_thread_req *io_req;
1318	int ret;
1319	enum req_op op = req_op(req);
1320
1321	if (op == REQ_OP_FLUSH)
1322		segs = 0;
1323	else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1324		segs = 1;
1325	else
1326		segs = blk_rq_nr_phys_segments(req);
1327
1328	io_req = ubd_alloc_req(dev, req, segs);
1329	if (!io_req)
1330		return -ENOMEM;
 
 
1331
1332	io_req->desc_cnt = segs;
1333	if (segs)
1334		ubd_map_req(dev, io_req, req);
1335
1336	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1337	if (ret != sizeof(io_req)) {
1338		if (ret != -EAGAIN)
1339			pr_err("write to io thread failed: %d\n", -ret);
1340		kfree(io_req);
1341	}
1342	return ret;
1343}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1344
1345static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1346				 const struct blk_mq_queue_data *bd)
1347{
1348	struct ubd *ubd_dev = hctx->queue->queuedata;
1349	struct request *req = bd->rq;
1350	int ret = 0, res = BLK_STS_OK;
1351
1352	blk_mq_start_request(req);
1353
1354	spin_lock_irq(&ubd_dev->lock);
1355
1356	switch (req_op(req)) {
1357	case REQ_OP_FLUSH:
1358	case REQ_OP_READ:
1359	case REQ_OP_WRITE:
1360	case REQ_OP_DISCARD:
1361	case REQ_OP_WRITE_ZEROES:
1362		ret = ubd_submit_request(ubd_dev, req);
1363		break;
1364	default:
1365		WARN_ON_ONCE(1);
1366		res = BLK_STS_NOTSUPP;
1367	}
1368
1369	spin_unlock_irq(&ubd_dev->lock);
1370
1371	if (ret < 0) {
1372		if (ret == -ENOMEM)
1373			res = BLK_STS_RESOURCE;
1374		else
1375			res = BLK_STS_DEV_RESOURCE;
1376	}
1377
1378	return res;
1379}
1380
1381static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1382{
1383	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1384
1385	geo->heads = 128;
1386	geo->sectors = 32;
1387	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1388	return 0;
1389}
1390
1391static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1392		     unsigned int cmd, unsigned long arg)
1393{
1394	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1395	u16 ubd_id[ATA_ID_WORDS];
1396
1397	switch (cmd) {
1398		struct cdrom_volctrl volume;
1399	case HDIO_GET_IDENTITY:
1400		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1401		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1402		ubd_id[ATA_ID_HEADS]	= 128;
1403		ubd_id[ATA_ID_SECTORS]	= 32;
1404		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1405				 sizeof(ubd_id)))
1406			return -EFAULT;
1407		return 0;
1408
1409	case CDROMVOLREAD:
1410		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1411			return -EFAULT;
1412		volume.channel0 = 255;
1413		volume.channel1 = 255;
1414		volume.channel2 = 255;
1415		volume.channel3 = 255;
1416		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1417			return -EFAULT;
1418		return 0;
1419	}
1420	return -EINVAL;
1421}
1422
1423static int map_error(int error_code)
1424{
1425	switch (error_code) {
1426	case 0:
1427		return BLK_STS_OK;
1428	case ENOSYS:
1429	case EOPNOTSUPP:
1430		return BLK_STS_NOTSUPP;
1431	case ENOSPC:
1432		return BLK_STS_NOSPC;
1433	}
1434	return BLK_STS_IOERR;
1435}
1436
1437/*
1438 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1439 *
1440 * The following functions are part of UML hypervisor code.
1441 * All functions from here onwards are executed as a helper
1442 * thread and are not allowed to execute any kernel functions.
1443 *
1444 * Any communication must occur strictly via shared memory and IPC.
1445 *
1446 * Do not add printks, locks, kernel memory operations, etc - it
1447 * will result in unpredictable behaviour and/or crashes.
1448 */
1449
1450static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1451{
1452	int n;
 
 
1453
1454	if (segment->cow_offset == -1)
1455		return map_error(0);
 
 
 
 
 
1456
1457	n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1458			  sizeof(segment->bitmap_words), segment->cow_offset);
1459	if (n != sizeof(segment->bitmap_words))
1460		return map_error(-n);
1461
1462	return map_error(0);
1463}
1464
1465static void do_io(struct io_thread_req *req, struct io_desc *desc)
1466{
1467	char *buf = NULL;
1468	unsigned long len;
1469	int n, nsectors, start, end, bit;
 
1470	__u64 off;
1471
1472	/* FLUSH is really a special case, we cannot "case" it with others */
1473
1474	if (req_op(req->req) == REQ_OP_FLUSH) {
1475		/* fds[0] is always either the rw image or our cow file */
1476		req->error = map_error(-os_sync_file(req->fds[0]));
1477		return;
1478	}
1479
1480	nsectors = desc->length / req->sectorsize;
1481	start = 0;
1482	do {
1483		bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1484		end = start;
1485		while((end < nsectors) &&
1486		      (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
 
1487			end++;
1488
1489		off = req->offset + req->offsets[bit] +
1490			start * req->sectorsize;
1491		len = (end - start) * req->sectorsize;
1492		if (desc->buffer != NULL)
1493			buf = &desc->buffer[start * req->sectorsize];
1494
1495		switch (req_op(req->req)) {
1496		case REQ_OP_READ:
 
 
 
 
 
1497			n = 0;
1498			do {
1499				buf = &buf[n];
1500				len -= n;
1501				n = os_pread_file(req->fds[bit], buf, len, off);
1502				if (n < 0) {
1503					req->error = map_error(-n);
 
 
1504					return;
1505				}
1506			} while((n < len) && (n != 0));
1507			if (n < len) memset(&buf[n], 0, len - n);
1508			break;
1509		case REQ_OP_WRITE:
1510			n = os_pwrite_file(req->fds[bit], buf, len, off);
1511			if(n != len){
1512				req->error = map_error(-n);
1513				return;
1514			}
1515			break;
1516		case REQ_OP_DISCARD:
1517			n = os_falloc_punch(req->fds[bit], off, len);
1518			if (n) {
1519				req->error = map_error(-n);
1520				return;
1521			}
1522			break;
1523		case REQ_OP_WRITE_ZEROES:
1524			n = os_falloc_zeroes(req->fds[bit], off, len);
1525			if (n) {
1526				req->error = map_error(-n);
1527				return;
1528			}
1529			break;
1530		default:
1531			WARN_ON_ONCE(1);
1532			req->error = BLK_STS_NOTSUPP;
1533			return;
1534		}
1535
1536		start = end;
1537	} while(start < nsectors);
1538
1539	req->offset += len;
1540	req->error = update_bitmap(req, desc);
1541}
1542
1543/* Changed in start_io_thread, which is serialized by being called only
1544 * from ubd_init, which is an initcall.
1545 */
1546int kernel_fd = -1;
1547
1548/* Only changed by the io thread. XXX: currently unused. */
1549static int io_count;
1550
1551int io_thread(void *arg)
1552{
1553	int n, count, written, res;
1554
1555	os_fix_helper_signals();
1556
 
1557	while(1){
1558		n = bulk_req_safe_read(
1559			kernel_fd,
1560			io_req_buffer,
1561			&io_remainder,
1562			&io_remainder_size,
1563			UBD_REQ_BUFFER_SIZE
1564		);
1565		if (n <= 0) {
1566			if (n == -EAGAIN)
1567				ubd_read_poll(-1);
1568
1569			continue;
1570		}
1571
1572		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1573			struct io_thread_req *req = (*io_req_buffer)[count];
1574			int i;
1575
1576			io_count++;
1577			for (i = 0; !req->error && i < req->desc_cnt; i++)
1578				do_io(req, &(req->io_desc[i]));
1579
1580		}
1581
1582		written = 0;
1583
1584		do {
1585			res = os_write_file(kernel_fd,
1586					    ((char *) io_req_buffer) + written,
1587					    n - written);
1588			if (res >= 0) {
1589				written += res;
1590			}
1591			if (written < n) {
1592				ubd_write_poll(-1);
1593			}
1594		} while (written < n);
1595	}
1596
1597	return 0;
1598}