Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Cambridge Greys Ltd
   4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
  30#include <linux/major.h>
  31#include <linux/cdrom.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/ctype.h>
  35#include <linux/slab.h>
  36#include <linux/vmalloc.h>
  37#include <linux/platform_device.h>
  38#include <linux/scatterlist.h>
  39#include <asm/tlbflush.h>
  40#include <kern_util.h>
  41#include "mconsole_kern.h"
  42#include <init.h>
  43#include <irq_kern.h>
  44#include "ubd.h"
  45#include <os.h>
  46#include "cow.h"
  47
  48/* Max request size is determined by sector mask - 32K */
  49#define UBD_MAX_REQUEST (8 * sizeof(long))
  50
  51struct io_desc {
  52	char *buffer;
  53	unsigned long length;
  54	unsigned long sector_mask;
  55	unsigned long long cow_offset;
  56	unsigned long bitmap_words[2];
  57};
  58
  59struct io_thread_req {
  60	struct request *req;
  61	int fds[2];
  62	unsigned long offsets[2];
  63	unsigned long long offset;
  64	int sectorsize;
  65	int error;
  66
  67	int desc_cnt;
  68	/* io_desc has to be the last element of the struct */
  69	struct io_desc io_desc[];
  70};
  71
  72
  73static struct io_thread_req * (*irq_req_buffer)[];
  74static struct io_thread_req *irq_remainder;
  75static int irq_remainder_size;
  76
  77static struct io_thread_req * (*io_req_buffer)[];
  78static struct io_thread_req *io_remainder;
  79static int io_remainder_size;
  80
  81
  82
  83static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  84{
  85	__u64 n;
  86	int bits, off;
  87
  88	bits = sizeof(data[0]) * 8;
  89	n = bit / bits;
  90	off = bit % bits;
  91	return (data[n] & (1 << off)) != 0;
  92}
  93
  94static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  95{
  96	__u64 n;
  97	int bits, off;
  98
  99	bits = sizeof(data[0]) * 8;
 100	n = bit / bits;
 101	off = bit % bits;
 102	data[n] |= (1 << off);
 103}
 104/*End stuff from ubd_user.h*/
 105
 106#define DRIVER_NAME "uml-blkdev"
 107
 108static DEFINE_MUTEX(ubd_lock);
 109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 110
 111static int ubd_open(struct block_device *bdev, fmode_t mode);
 112static void ubd_release(struct gendisk *disk, fmode_t mode);
 113static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 114		     unsigned int cmd, unsigned long arg);
 115static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 116
 117#define MAX_DEV (16)
 118
 119static const struct block_device_operations ubd_blops = {
 120        .owner		= THIS_MODULE,
 121        .open		= ubd_open,
 122        .release	= ubd_release,
 123        .ioctl		= ubd_ioctl,
 124        .compat_ioctl	= blkdev_compat_ptr_ioctl,
 125	.getgeo		= ubd_getgeo,
 126};
 127
 128/* Protected by ubd_lock */
 129static struct gendisk *ubd_gendisk[MAX_DEV];
 130
 131#ifdef CONFIG_BLK_DEV_UBD_SYNC
 132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 133					 .cl = 1 })
 134#else
 135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 136					 .cl = 1 })
 137#endif
 138static struct openflags global_openflags = OPEN_FLAGS;
 139
 140struct cow {
 141	/* backing file name */
 142	char *file;
 143	/* backing file fd */
 144	int fd;
 145	unsigned long *bitmap;
 146	unsigned long bitmap_len;
 147	int bitmap_offset;
 148	int data_offset;
 149};
 150
 151#define MAX_SG 64
 152
 153struct ubd {
 154	/* name (and fd, below) of the file opened for writing, either the
 155	 * backing or the cow file. */
 156	char *file;
 157	char *serial;
 158	int count;
 159	int fd;
 160	__u64 size;
 161	struct openflags boot_openflags;
 162	struct openflags openflags;
 163	unsigned shared:1;
 164	unsigned no_cow:1;
 165	unsigned no_trim:1;
 166	struct cow cow;
 167	struct platform_device pdev;
 168	struct request_queue *queue;
 169	struct blk_mq_tag_set tag_set;
 170	spinlock_t lock;
 171};
 172
 173#define DEFAULT_COW { \
 174	.file =			NULL, \
 175	.fd =			-1,	\
 176	.bitmap =		NULL, \
 177	.bitmap_offset =	0, \
 178	.data_offset =		0, \
 179}
 180
 181#define DEFAULT_UBD { \
 182	.file = 		NULL, \
 183	.serial =		NULL, \
 184	.count =		0, \
 185	.fd =			-1, \
 186	.size =			-1, \
 187	.boot_openflags =	OPEN_FLAGS, \
 188	.openflags =		OPEN_FLAGS, \
 189	.no_cow =               0, \
 190	.no_trim =		0, \
 191	.shared =		0, \
 192	.cow =			DEFAULT_COW, \
 193	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 194}
 195
 196/* Protected by ubd_lock */
 197static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 198
 199static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 200				 const struct blk_mq_queue_data *bd);
 201
 202static int fake_ide_setup(char *str)
 203{
 204	pr_warn("The fake_ide option has been removed\n");
 205	return 1;
 206}
 207__setup("fake_ide", fake_ide_setup);
 208
 209__uml_help(fake_ide_setup,
 210"fake_ide\n"
 211"    Obsolete stub.\n\n"
 212);
 213
 214static int parse_unit(char **ptr)
 215{
 216	char *str = *ptr, *end;
 217	int n = -1;
 218
 219	if(isdigit(*str)) {
 220		n = simple_strtoul(str, &end, 0);
 221		if(end == str)
 222			return -1;
 223		*ptr = end;
 224	}
 225	else if (('a' <= *str) && (*str <= 'z')) {
 226		n = *str - 'a';
 227		str++;
 228		*ptr = str;
 229	}
 230	return n;
 231}
 232
 233/* If *index_out == -1 at exit, the passed option was a general one;
 234 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 235 * should not be freed on exit.
 236 */
 237static int ubd_setup_common(char *str, int *index_out, char **error_out)
 238{
 239	struct ubd *ubd_dev;
 240	struct openflags flags = global_openflags;
 241	char *file, *backing_file, *serial;
 242	int n, err = 0, i;
 243
 244	if(index_out) *index_out = -1;
 245	n = *str;
 246	if(n == '='){
 247		str++;
 248		if(!strcmp(str, "sync")){
 249			global_openflags = of_sync(global_openflags);
 250			return err;
 251		}
 252
 253		pr_warn("fake major not supported any more\n");
 254		return 0;
 255	}
 256
 257	n = parse_unit(&str);
 258	if(n < 0){
 259		*error_out = "Couldn't parse device number";
 260		return -EINVAL;
 261	}
 262	if(n >= MAX_DEV){
 263		*error_out = "Device number out of range";
 264		return 1;
 265	}
 266
 267	err = -EBUSY;
 268	mutex_lock(&ubd_lock);
 269
 270	ubd_dev = &ubd_devs[n];
 271	if(ubd_dev->file != NULL){
 272		*error_out = "Device is already configured";
 273		goto out;
 274	}
 275
 276	if (index_out)
 277		*index_out = n;
 278
 279	err = -EINVAL;
 280	for (i = 0; i < sizeof("rscdt="); i++) {
 281		switch (*str) {
 282		case 'r':
 283			flags.w = 0;
 284			break;
 285		case 's':
 286			flags.s = 1;
 287			break;
 288		case 'd':
 289			ubd_dev->no_cow = 1;
 290			break;
 291		case 'c':
 292			ubd_dev->shared = 1;
 293			break;
 294		case 't':
 295			ubd_dev->no_trim = 1;
 296			break;
 297		case '=':
 298			str++;
 299			goto break_loop;
 300		default:
 301			*error_out = "Expected '=' or flag letter "
 302				"(r, s, c, t or d)";
 303			goto out;
 304		}
 305		str++;
 306	}
 307
 308	if (*str == '=')
 309		*error_out = "Too many flags specified";
 310	else
 311		*error_out = "Missing '='";
 312	goto out;
 313
 314break_loop:
 315	file = strsep(&str, ",:");
 316	if (*file == '\0')
 317		file = NULL;
 318
 319	backing_file = strsep(&str, ",:");
 320	if (backing_file && *backing_file == '\0')
 321		backing_file = NULL;
 322
 323	serial = strsep(&str, ",:");
 324	if (serial && *serial == '\0')
 325		serial = NULL;
 326
 327	if (backing_file && ubd_dev->no_cow) {
 328		*error_out = "Can't specify both 'd' and a cow file";
 329		goto out;
 330	}
 331
 332	err = 0;
 333	ubd_dev->file = file;
 334	ubd_dev->cow.file = backing_file;
 335	ubd_dev->serial = serial;
 336	ubd_dev->boot_openflags = flags;
 337out:
 338	mutex_unlock(&ubd_lock);
 339	return err;
 340}
 341
 342static int ubd_setup(char *str)
 343{
 344	char *error;
 345	int err;
 346
 347	err = ubd_setup_common(str, NULL, &error);
 348	if(err)
 349		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 350		       "%s\n", str, error);
 351	return 1;
 352}
 353
 354__setup("ubd", ubd_setup);
 355__uml_help(ubd_setup,
 356"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
 357"    This is used to associate a device with a file in the underlying\n"
 358"    filesystem. When specifying two filenames, the first one is the\n"
 359"    COW name and the second is the backing file name. As separator you can\n"
 360"    use either a ':' or a ',': the first one allows writing things like;\n"
 361"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 362"    while with a ',' the shell would not expand the 2nd '~'.\n"
 363"    When using only one filename, UML will detect whether to treat it like\n"
 364"    a COW file or a backing file. To override this detection, add the 'd'\n"
 365"    flag:\n"
 366"	ubd0d=BackingFile\n"
 367"    Usually, there is a filesystem in the file, but \n"
 368"    that's not required. Swap devices containing swap files can be\n"
 369"    specified like this. Also, a file which doesn't contain a\n"
 370"    filesystem can have its contents read in the virtual \n"
 371"    machine by running 'dd' on the device. <n> must be in the range\n"
 372"    0 to 7. Appending an 'r' to the number will cause that device\n"
 373"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 374"    an 's' will cause data to be written to disk on the host immediately.\n"
 375"    'c' will cause the device to be treated as being shared between multiple\n"
 376"    UMLs and file locking will be turned off - this is appropriate for a\n"
 377"    cluster filesystem and inappropriate at almost all other times.\n\n"
 378"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 379"    An optional device serial number can be exposed using the serial parameter\n"
 380"    on the cmdline which is exposed as a sysfs entry. This is particularly\n"
 381"    useful when a unique number should be given to the device. Note when\n"
 382"    specifying a label, the filename2 must be also presented. It can be\n"
 383"    an empty string, in which case the backing file is not used:\n"
 384"       ubd0=File,,Serial\n"
 385);
 386
 387static int udb_setup(char *str)
 388{
 389	printk("udb%s specified on command line is almost certainly a ubd -> "
 390	       "udb TYPO\n", str);
 391	return 1;
 392}
 393
 394__setup("udb", udb_setup);
 395__uml_help(udb_setup,
 396"udb\n"
 397"    This option is here solely to catch ubd -> udb typos, which can be\n"
 398"    to impossible to catch visually unless you specifically look for\n"
 399"    them.  The only result of any option starting with 'udb' is an error\n"
 400"    in the boot output.\n\n"
 401);
 402
 403/* Only changed by ubd_init, which is an initcall. */
 404static int thread_fd = -1;
 405
 406/* Function to read several request pointers at a time
 407* handling fractional reads if (and as) needed
 408*/
 409
 410static int bulk_req_safe_read(
 411	int fd,
 412	struct io_thread_req * (*request_buffer)[],
 413	struct io_thread_req **remainder,
 414	int *remainder_size,
 415	int max_recs
 416	)
 417{
 418	int n = 0;
 419	int res = 0;
 420
 421	if (*remainder_size > 0) {
 422		memmove(
 423			(char *) request_buffer,
 424			(char *) remainder, *remainder_size
 425		);
 426		n = *remainder_size;
 427	}
 428
 429	res = os_read_file(
 430			fd,
 431			((char *) request_buffer) + *remainder_size,
 432			sizeof(struct io_thread_req *)*max_recs
 433				- *remainder_size
 434		);
 435	if (res > 0) {
 436		n += res;
 437		if ((n % sizeof(struct io_thread_req *)) > 0) {
 438			/*
 439			* Read somehow returned not a multiple of dword
 440			* theoretically possible, but never observed in the
 441			* wild, so read routine must be able to handle it
 442			*/
 443			*remainder_size = n % sizeof(struct io_thread_req *);
 444			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 445			memmove(
 446				remainder,
 447				((char *) request_buffer) +
 448					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 449				*remainder_size
 450			);
 451			n = n - *remainder_size;
 452		}
 453	} else {
 454		n = res;
 455	}
 456	return n;
 457}
 458
 459/* Called without dev->lock held, and only in interrupt context. */
 460static void ubd_handler(void)
 461{
 462	int n;
 463	int count;
 464
 465	while(1){
 466		n = bulk_req_safe_read(
 467			thread_fd,
 468			irq_req_buffer,
 469			&irq_remainder,
 470			&irq_remainder_size,
 471			UBD_REQ_BUFFER_SIZE
 472		);
 473		if (n < 0) {
 474			if(n == -EAGAIN)
 475				break;
 476			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 477			       "err = %d\n", -n);
 478			return;
 479		}
 480		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 481			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 482
 483			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 484				blk_queue_max_discard_sectors(io_req->req->q, 0);
 485				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 
 486			}
 487			blk_mq_end_request(io_req->req, io_req->error);
 488			kfree(io_req);
 489		}
 490	}
 491}
 492
 493static irqreturn_t ubd_intr(int irq, void *dev)
 494{
 495	ubd_handler();
 496	return IRQ_HANDLED;
 497}
 498
 499/* Only changed by ubd_init, which is an initcall. */
 500static int io_pid = -1;
 501
 502static void kill_io_thread(void)
 503{
 504	if(io_pid != -1)
 505		os_kill_process(io_pid, 1);
 506}
 507
 508__uml_exitcall(kill_io_thread);
 509
 510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 511{
 512	char *file;
 513	int fd;
 514	int err;
 515
 516	__u32 version;
 517	__u32 align;
 518	char *backing_file;
 519	time64_t mtime;
 520	unsigned long long size;
 521	int sector_size;
 522	int bitmap_offset;
 523
 524	if (ubd_dev->file && ubd_dev->cow.file) {
 525		file = ubd_dev->cow.file;
 526
 527		goto out;
 528	}
 529
 530	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 531	if (fd < 0)
 532		return fd;
 533
 534	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 535		&mtime, &size, &sector_size, &align, &bitmap_offset);
 536	os_close_file(fd);
 537
 538	if(err == -EINVAL)
 539		file = ubd_dev->file;
 540	else
 541		file = backing_file;
 542
 543out:
 544	return os_file_size(file, size_out);
 545}
 546
 547static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 548{
 549	int err;
 550
 551	err = os_pread_file(fd, buf, len, offset);
 552	if (err < 0)
 553		return err;
 554
 555	return 0;
 556}
 557
 558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 559{
 560	time64_t modtime;
 561	unsigned long long actual;
 562	int err;
 563
 564	err = os_file_modtime(file, &modtime);
 565	if (err < 0) {
 566		printk(KERN_ERR "Failed to get modification time of backing "
 567		       "file \"%s\", err = %d\n", file, -err);
 568		return err;
 569	}
 570
 571	err = os_file_size(file, &actual);
 572	if (err < 0) {
 573		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 574		       "err = %d\n", file, -err);
 575		return err;
 576	}
 577
 578	if (actual != size) {
 579		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 580		 * the typecast.*/
 581		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 582		       "vs backing file\n", (unsigned long long) size, actual);
 583		return -EINVAL;
 584	}
 585	if (modtime != mtime) {
 586		printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 587		       "backing file\n", mtime, modtime);
 588		return -EINVAL;
 589	}
 590	return 0;
 591}
 592
 593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 594{
 595	struct uml_stat buf1, buf2;
 596	int err;
 597
 598	if (from_cmdline == NULL)
 599		return 0;
 600	if (!strcmp(from_cmdline, from_cow))
 601		return 0;
 602
 603	err = os_stat_file(from_cmdline, &buf1);
 604	if (err < 0) {
 605		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 606		       -err);
 607		return 0;
 608	}
 609	err = os_stat_file(from_cow, &buf2);
 610	if (err < 0) {
 611		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 612		       -err);
 613		return 1;
 614	}
 615	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 616		return 0;
 617
 618	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 619	       "\"%s\" specified in COW header of \"%s\"\n",
 620	       from_cmdline, from_cow, cow);
 621	return 1;
 622}
 623
 624static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 625		  char **backing_file_out, int *bitmap_offset_out,
 626		  unsigned long *bitmap_len_out, int *data_offset_out,
 627		  int *create_cow_out)
 628{
 629	time64_t mtime;
 630	unsigned long long size;
 631	__u32 version, align;
 632	char *backing_file;
 633	int fd, err, sectorsize, asked_switch, mode = 0644;
 634
 635	fd = os_open_file(file, *openflags, mode);
 636	if (fd < 0) {
 637		if ((fd == -ENOENT) && (create_cow_out != NULL))
 638			*create_cow_out = 1;
 639		if (!openflags->w ||
 640		    ((fd != -EROFS) && (fd != -EACCES)))
 641			return fd;
 642		openflags->w = 0;
 643		fd = os_open_file(file, *openflags, mode);
 644		if (fd < 0)
 645			return fd;
 646	}
 647
 648	if (shared)
 649		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 650	else {
 651		err = os_lock_file(fd, openflags->w);
 652		if (err < 0) {
 653			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 654			       file, -err);
 655			goto out_close;
 656		}
 657	}
 658
 659	/* Successful return case! */
 660	if (backing_file_out == NULL)
 661		return fd;
 662
 663	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 664			      &size, &sectorsize, &align, bitmap_offset_out);
 665	if (err && (*backing_file_out != NULL)) {
 666		printk(KERN_ERR "Failed to read COW header from COW file "
 667		       "\"%s\", errno = %d\n", file, -err);
 668		goto out_close;
 669	}
 670	if (err)
 671		return fd;
 672
 673	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 674					    file);
 675
 676	/* Allow switching only if no mismatch. */
 677	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 678						   mtime)) {
 679		printk(KERN_ERR "Switching backing file to '%s'\n",
 680		       *backing_file_out);
 681		err = write_cow_header(file, fd, *backing_file_out,
 682				       sectorsize, align, &size);
 683		if (err) {
 684			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 685			goto out_close;
 686		}
 687	} else {
 688		*backing_file_out = backing_file;
 689		err = backing_file_mismatch(*backing_file_out, size, mtime);
 690		if (err)
 691			goto out_close;
 692	}
 693
 694	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 695		  bitmap_len_out, data_offset_out);
 696
 697	return fd;
 698 out_close:
 699	os_close_file(fd);
 700	return err;
 701}
 702
 703static int create_cow_file(char *cow_file, char *backing_file,
 704		    struct openflags flags,
 705		    int sectorsize, int alignment, int *bitmap_offset_out,
 706		    unsigned long *bitmap_len_out, int *data_offset_out)
 707{
 708	int err, fd;
 709
 710	flags.c = 1;
 711	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 712	if (fd < 0) {
 713		err = fd;
 714		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 715		       cow_file, -err);
 716		goto out;
 717	}
 718
 719	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 720			    bitmap_offset_out, bitmap_len_out,
 721			    data_offset_out);
 722	if (!err)
 723		return fd;
 724	os_close_file(fd);
 725 out:
 726	return err;
 727}
 728
 729static void ubd_close_dev(struct ubd *ubd_dev)
 730{
 731	os_close_file(ubd_dev->fd);
 732	if(ubd_dev->cow.file == NULL)
 733		return;
 734
 735	os_close_file(ubd_dev->cow.fd);
 736	vfree(ubd_dev->cow.bitmap);
 737	ubd_dev->cow.bitmap = NULL;
 738}
 739
 740static int ubd_open_dev(struct ubd *ubd_dev)
 741{
 742	struct openflags flags;
 743	char **back_ptr;
 744	int err, create_cow, *create_ptr;
 745	int fd;
 746
 747	ubd_dev->openflags = ubd_dev->boot_openflags;
 748	create_cow = 0;
 749	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 750	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 751
 752	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 753				back_ptr, &ubd_dev->cow.bitmap_offset,
 754				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 755				create_ptr);
 756
 757	if((fd == -ENOENT) && create_cow){
 758		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 759					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 760					  &ubd_dev->cow.bitmap_offset,
 761					  &ubd_dev->cow.bitmap_len,
 762					  &ubd_dev->cow.data_offset);
 763		if(fd >= 0){
 764			printk(KERN_INFO "Creating \"%s\" as COW file for "
 765			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 766		}
 767	}
 768
 769	if(fd < 0){
 770		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 771		       -fd);
 772		return fd;
 773	}
 774	ubd_dev->fd = fd;
 775
 776	if(ubd_dev->cow.file != NULL){
 777		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 778
 779		err = -ENOMEM;
 780		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 781		if(ubd_dev->cow.bitmap == NULL){
 782			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 783			goto error;
 784		}
 785		flush_tlb_kernel_vm();
 786
 787		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 788				      ubd_dev->cow.bitmap_offset,
 789				      ubd_dev->cow.bitmap_len);
 790		if(err < 0)
 791			goto error;
 792
 793		flags = ubd_dev->openflags;
 794		flags.w = 0;
 795		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 796				    NULL, NULL, NULL, NULL);
 797		if(err < 0) goto error;
 798		ubd_dev->cow.fd = err;
 799	}
 800	if (ubd_dev->no_trim == 0) {
 801		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
 
 802		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 803		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 
 804	}
 805	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 806	return 0;
 807 error:
 808	os_close_file(ubd_dev->fd);
 809	return err;
 810}
 811
 812static void ubd_device_release(struct device *dev)
 813{
 814	struct ubd *ubd_dev = dev_get_drvdata(dev);
 815
 816	blk_mq_free_tag_set(&ubd_dev->tag_set);
 817	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 818}
 819
 820static ssize_t serial_show(struct device *dev,
 821			   struct device_attribute *attr, char *buf)
 822{
 823	struct gendisk *disk = dev_to_disk(dev);
 824	struct ubd *ubd_dev = disk->private_data;
 825
 826	if (!ubd_dev)
 827		return 0;
 828
 829	return sprintf(buf, "%s", ubd_dev->serial);
 830}
 831
 832static DEVICE_ATTR_RO(serial);
 833
 834static struct attribute *ubd_attrs[] = {
 835	&dev_attr_serial.attr,
 836	NULL,
 837};
 838
 839static umode_t ubd_attrs_are_visible(struct kobject *kobj,
 840				     struct attribute *a, int n)
 841{
 842	return a->mode;
 843}
 844
 845static const struct attribute_group ubd_attr_group = {
 846	.attrs = ubd_attrs,
 847	.is_visible = ubd_attrs_are_visible,
 848};
 849
 850static const struct attribute_group *ubd_attr_groups[] = {
 851	&ubd_attr_group,
 852	NULL,
 853};
 854
 855static int ubd_disk_register(int major, u64 size, int unit,
 856			     struct gendisk *disk)
 857{
 858	disk->major = major;
 859	disk->first_minor = unit << UBD_SHIFT;
 860	disk->minors = 1 << UBD_SHIFT;
 861	disk->fops = &ubd_blops;
 862	set_capacity(disk, size / 512);
 863	sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 864
 865	ubd_devs[unit].pdev.id   = unit;
 866	ubd_devs[unit].pdev.name = DRIVER_NAME;
 867	ubd_devs[unit].pdev.dev.release = ubd_device_release;
 868	dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 869	platform_device_register(&ubd_devs[unit].pdev);
 870
 871	disk->private_data = &ubd_devs[unit];
 872	disk->queue = ubd_devs[unit].queue;
 873	return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
 874}
 875
 876#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 877
 878static const struct blk_mq_ops ubd_mq_ops = {
 879	.queue_rq = ubd_queue_rq,
 880};
 881
 882static int ubd_add(int n, char **error_out)
 883{
 884	struct ubd *ubd_dev = &ubd_devs[n];
 885	struct gendisk *disk;
 886	int err = 0;
 887
 888	if(ubd_dev->file == NULL)
 889		goto out;
 890
 891	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 892	if(err < 0){
 893		*error_out = "Couldn't determine size of device's file";
 894		goto out;
 895	}
 896
 897	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 898
 899	ubd_dev->tag_set.ops = &ubd_mq_ops;
 900	ubd_dev->tag_set.queue_depth = 64;
 901	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 902	ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 903	ubd_dev->tag_set.driver_data = ubd_dev;
 904	ubd_dev->tag_set.nr_hw_queues = 1;
 905
 906	err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 907	if (err)
 908		goto out;
 909
 910	disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
 911	if (IS_ERR(disk)) {
 912		err = PTR_ERR(disk);
 913		goto out_cleanup_tags;
 914	}
 915	ubd_dev->queue = disk->queue;
 916
 917	blk_queue_write_cache(ubd_dev->queue, true, false);
 918	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 919	blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
 920	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
 921	if (err)
 922		goto out_cleanup_disk;
 923
 924	ubd_gendisk[n] = disk;
 925	return 0;
 926
 927out_cleanup_disk:
 928	put_disk(disk);
 929out_cleanup_tags:
 930	blk_mq_free_tag_set(&ubd_dev->tag_set);
 931out:
 932	return err;
 933}
 934
 935static int ubd_config(char *str, char **error_out)
 936{
 937	int n, ret;
 938
 939	/* This string is possibly broken up and stored, so it's only
 940	 * freed if ubd_setup_common fails, or if only general options
 941	 * were set.
 942	 */
 943	str = kstrdup(str, GFP_KERNEL);
 944	if (str == NULL) {
 945		*error_out = "Failed to allocate memory";
 946		return -ENOMEM;
 947	}
 948
 949	ret = ubd_setup_common(str, &n, error_out);
 950	if (ret)
 951		goto err_free;
 952
 953	if (n == -1) {
 954		ret = 0;
 955		goto err_free;
 956	}
 957
 958	mutex_lock(&ubd_lock);
 959	ret = ubd_add(n, error_out);
 960	if (ret)
 961		ubd_devs[n].file = NULL;
 962	mutex_unlock(&ubd_lock);
 963
 964out:
 965	return ret;
 966
 967err_free:
 968	kfree(str);
 969	goto out;
 970}
 971
 972static int ubd_get_config(char *name, char *str, int size, char **error_out)
 973{
 974	struct ubd *ubd_dev;
 975	int n, len = 0;
 976
 977	n = parse_unit(&name);
 978	if((n >= MAX_DEV) || (n < 0)){
 979		*error_out = "ubd_get_config : device number out of range";
 980		return -1;
 981	}
 982
 983	ubd_dev = &ubd_devs[n];
 984	mutex_lock(&ubd_lock);
 985
 986	if(ubd_dev->file == NULL){
 987		CONFIG_CHUNK(str, size, len, "", 1);
 988		goto out;
 989	}
 990
 991	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 992
 993	if(ubd_dev->cow.file != NULL){
 994		CONFIG_CHUNK(str, size, len, ",", 0);
 995		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 996	}
 997	else CONFIG_CHUNK(str, size, len, "", 1);
 998
 999 out:
1000	mutex_unlock(&ubd_lock);
1001	return len;
1002}
1003
1004static int ubd_id(char **str, int *start_out, int *end_out)
1005{
1006	int n;
1007
1008	n = parse_unit(str);
1009	*start_out = 0;
1010	*end_out = MAX_DEV - 1;
1011	return n;
1012}
1013
1014static int ubd_remove(int n, char **error_out)
1015{
1016	struct gendisk *disk = ubd_gendisk[n];
1017	struct ubd *ubd_dev;
1018	int err = -ENODEV;
1019
1020	mutex_lock(&ubd_lock);
1021
1022	ubd_dev = &ubd_devs[n];
1023
1024	if(ubd_dev->file == NULL)
1025		goto out;
1026
1027	/* you cannot remove a open disk */
1028	err = -EBUSY;
1029	if(ubd_dev->count > 0)
1030		goto out;
1031
1032	ubd_gendisk[n] = NULL;
1033	if(disk != NULL){
1034		del_gendisk(disk);
1035		put_disk(disk);
1036	}
1037
1038	err = 0;
1039	platform_device_unregister(&ubd_dev->pdev);
1040out:
1041	mutex_unlock(&ubd_lock);
1042	return err;
1043}
1044
1045/* All these are called by mconsole in process context and without
1046 * ubd-specific locks.  The structure itself is const except for .list.
1047 */
1048static struct mc_device ubd_mc = {
1049	.list		= LIST_HEAD_INIT(ubd_mc.list),
1050	.name		= "ubd",
1051	.config		= ubd_config,
1052	.get_config	= ubd_get_config,
1053	.id		= ubd_id,
1054	.remove		= ubd_remove,
1055};
1056
1057static int __init ubd_mc_init(void)
1058{
1059	mconsole_register_dev(&ubd_mc);
1060	return 0;
1061}
1062
1063__initcall(ubd_mc_init);
1064
1065static int __init ubd0_init(void)
1066{
1067	struct ubd *ubd_dev = &ubd_devs[0];
1068
1069	mutex_lock(&ubd_lock);
1070	if(ubd_dev->file == NULL)
1071		ubd_dev->file = "root_fs";
1072	mutex_unlock(&ubd_lock);
1073
1074	return 0;
1075}
1076
1077__initcall(ubd0_init);
1078
1079/* Used in ubd_init, which is an initcall */
1080static struct platform_driver ubd_driver = {
1081	.driver = {
1082		.name  = DRIVER_NAME,
1083	},
1084};
1085
1086static int __init ubd_init(void)
1087{
1088	char *error;
1089	int i, err;
1090
1091	if (register_blkdev(UBD_MAJOR, "ubd"))
1092		return -1;
1093
1094	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1095				       sizeof(struct io_thread_req *),
1096				       GFP_KERNEL
1097		);
1098	irq_remainder = 0;
1099
1100	if (irq_req_buffer == NULL) {
1101		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1102		return -1;
1103	}
1104	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1105				      sizeof(struct io_thread_req *),
1106				      GFP_KERNEL
1107		);
1108
1109	io_remainder = 0;
1110
1111	if (io_req_buffer == NULL) {
1112		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1113		return -1;
1114	}
1115	platform_driver_register(&ubd_driver);
1116	mutex_lock(&ubd_lock);
1117	for (i = 0; i < MAX_DEV; i++){
1118		err = ubd_add(i, &error);
1119		if(err)
1120			printk(KERN_ERR "Failed to initialize ubd device %d :"
1121			       "%s\n", i, error);
1122	}
1123	mutex_unlock(&ubd_lock);
1124	return 0;
1125}
1126
1127late_initcall(ubd_init);
1128
1129static int __init ubd_driver_init(void){
1130	unsigned long stack;
1131	int err;
1132
1133	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1134	if(global_openflags.s){
1135		printk(KERN_INFO "ubd: Synchronous mode\n");
1136		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1137		 * enough. So use anyway the io thread. */
1138	}
1139	stack = alloc_stack(0, 0);
1140	io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
1141	if(io_pid < 0){
1142		printk(KERN_ERR
1143		       "ubd : Failed to start I/O thread (errno = %d) - "
1144		       "falling back to synchronous I/O\n", -io_pid);
1145		io_pid = -1;
1146		return 0;
1147	}
1148	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1149			     0, "ubd", ubd_devs);
1150	if(err < 0)
1151		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1152	return 0;
1153}
1154
1155device_initcall(ubd_driver_init);
1156
1157static int ubd_open(struct block_device *bdev, fmode_t mode)
1158{
1159	struct gendisk *disk = bdev->bd_disk;
1160	struct ubd *ubd_dev = disk->private_data;
1161	int err = 0;
1162
1163	mutex_lock(&ubd_mutex);
1164	if(ubd_dev->count == 0){
1165		err = ubd_open_dev(ubd_dev);
1166		if(err){
1167			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1168			       disk->disk_name, ubd_dev->file, -err);
1169			goto out;
1170		}
1171	}
1172	ubd_dev->count++;
1173	set_disk_ro(disk, !ubd_dev->openflags.w);
1174
1175	/* This should no more be needed. And it didn't work anyway to exclude
1176	 * read-write remounting of filesystems.*/
1177	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1178	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1179	        err = -EROFS;
1180	}*/
1181out:
1182	mutex_unlock(&ubd_mutex);
1183	return err;
1184}
1185
1186static void ubd_release(struct gendisk *disk, fmode_t mode)
1187{
1188	struct ubd *ubd_dev = disk->private_data;
1189
1190	mutex_lock(&ubd_mutex);
1191	if(--ubd_dev->count == 0)
1192		ubd_close_dev(ubd_dev);
1193	mutex_unlock(&ubd_mutex);
1194}
1195
1196static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1197			  __u64 *cow_offset, unsigned long *bitmap,
1198			  __u64 bitmap_offset, unsigned long *bitmap_words,
1199			  __u64 bitmap_len)
1200{
1201	__u64 sector = io_offset >> SECTOR_SHIFT;
1202	int i, update_bitmap = 0;
1203
1204	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1205		if(cow_mask != NULL)
1206			ubd_set_bit(i, (unsigned char *) cow_mask);
1207		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1208			continue;
1209
1210		update_bitmap = 1;
1211		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1212	}
1213
1214	if(!update_bitmap)
1215		return;
1216
1217	*cow_offset = sector / (sizeof(unsigned long) * 8);
1218
1219	/* This takes care of the case where we're exactly at the end of the
1220	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1221	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1222	 * for the original diagnosis.
1223	 */
1224	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1225					 sizeof(unsigned long)) - 1))
1226		(*cow_offset)--;
1227
1228	bitmap_words[0] = bitmap[*cow_offset];
1229	bitmap_words[1] = bitmap[*cow_offset + 1];
1230
1231	*cow_offset *= sizeof(unsigned long);
1232	*cow_offset += bitmap_offset;
1233}
1234
1235static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1236		       unsigned long offset, unsigned long *bitmap,
1237		       __u64 bitmap_offset, __u64 bitmap_len)
1238{
1239	__u64 sector = offset >> SECTOR_SHIFT;
1240	int i;
1241
1242	if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1243		panic("Operation too long");
1244
1245	if (req_op(req->req) == REQ_OP_READ) {
1246		for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1247			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1248				ubd_set_bit(i, (unsigned char *)
1249					    &segment->sector_mask);
1250		}
1251	} else {
1252		cowify_bitmap(offset, segment->length, &segment->sector_mask,
1253			      &segment->cow_offset, bitmap, bitmap_offset,
1254			      segment->bitmap_words, bitmap_len);
1255	}
1256}
1257
1258static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1259			struct request *req)
1260{
1261	struct bio_vec bvec;
1262	struct req_iterator iter;
1263	int i = 0;
1264	unsigned long byte_offset = io_req->offset;
1265	enum req_op op = req_op(req);
1266
1267	if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1268		io_req->io_desc[0].buffer = NULL;
1269		io_req->io_desc[0].length = blk_rq_bytes(req);
1270	} else {
1271		rq_for_each_segment(bvec, req, iter) {
1272			BUG_ON(i >= io_req->desc_cnt);
1273
1274			io_req->io_desc[i].buffer = bvec_virt(&bvec);
 
1275			io_req->io_desc[i].length = bvec.bv_len;
1276			i++;
1277		}
1278	}
1279
1280	if (dev->cow.file) {
1281		for (i = 0; i < io_req->desc_cnt; i++) {
1282			cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1283				   dev->cow.bitmap, dev->cow.bitmap_offset,
1284				   dev->cow.bitmap_len);
1285			byte_offset += io_req->io_desc[i].length;
1286		}
1287
1288	}
1289}
1290
1291static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1292					   int desc_cnt)
1293{
1294	struct io_thread_req *io_req;
1295	int i;
1296
1297	io_req = kmalloc(sizeof(*io_req) +
1298			 (desc_cnt * sizeof(struct io_desc)),
1299			 GFP_ATOMIC);
1300	if (!io_req)
1301		return NULL;
1302
1303	io_req->req = req;
1304	if (dev->cow.file)
1305		io_req->fds[0] = dev->cow.fd;
1306	else
1307		io_req->fds[0] = dev->fd;
1308	io_req->error = 0;
1309	io_req->sectorsize = SECTOR_SIZE;
1310	io_req->fds[1] = dev->fd;
1311	io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1312	io_req->offsets[0] = 0;
1313	io_req->offsets[1] = dev->cow.data_offset;
1314
1315	for (i = 0 ; i < desc_cnt; i++) {
1316		io_req->io_desc[i].sector_mask = 0;
1317		io_req->io_desc[i].cow_offset = -1;
1318	}
1319
1320	return io_req;
1321}
1322
1323static int ubd_submit_request(struct ubd *dev, struct request *req)
1324{
1325	int segs = 0;
1326	struct io_thread_req *io_req;
1327	int ret;
1328	enum req_op op = req_op(req);
1329
1330	if (op == REQ_OP_FLUSH)
1331		segs = 0;
1332	else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1333		segs = 1;
1334	else
1335		segs = blk_rq_nr_phys_segments(req);
1336
1337	io_req = ubd_alloc_req(dev, req, segs);
1338	if (!io_req)
1339		return -ENOMEM;
1340
1341	io_req->desc_cnt = segs;
1342	if (segs)
1343		ubd_map_req(dev, io_req, req);
1344
1345	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1346	if (ret != sizeof(io_req)) {
1347		if (ret != -EAGAIN)
1348			pr_err("write to io thread failed: %d\n", -ret);
1349		kfree(io_req);
1350	}
1351	return ret;
1352}
1353
1354static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1355				 const struct blk_mq_queue_data *bd)
1356{
1357	struct ubd *ubd_dev = hctx->queue->queuedata;
1358	struct request *req = bd->rq;
1359	int ret = 0, res = BLK_STS_OK;
1360
1361	blk_mq_start_request(req);
1362
1363	spin_lock_irq(&ubd_dev->lock);
1364
1365	switch (req_op(req)) {
1366	case REQ_OP_FLUSH:
1367	case REQ_OP_READ:
1368	case REQ_OP_WRITE:
1369	case REQ_OP_DISCARD:
1370	case REQ_OP_WRITE_ZEROES:
1371		ret = ubd_submit_request(ubd_dev, req);
1372		break;
1373	default:
1374		WARN_ON_ONCE(1);
1375		res = BLK_STS_NOTSUPP;
1376	}
1377
1378	spin_unlock_irq(&ubd_dev->lock);
1379
1380	if (ret < 0) {
1381		if (ret == -ENOMEM)
1382			res = BLK_STS_RESOURCE;
1383		else
1384			res = BLK_STS_DEV_RESOURCE;
1385	}
1386
1387	return res;
1388}
1389
1390static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1391{
1392	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1393
1394	geo->heads = 128;
1395	geo->sectors = 32;
1396	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1397	return 0;
1398}
1399
1400static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1401		     unsigned int cmd, unsigned long arg)
1402{
1403	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1404	u16 ubd_id[ATA_ID_WORDS];
1405
1406	switch (cmd) {
1407		struct cdrom_volctrl volume;
1408	case HDIO_GET_IDENTITY:
1409		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1410		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1411		ubd_id[ATA_ID_HEADS]	= 128;
1412		ubd_id[ATA_ID_SECTORS]	= 32;
1413		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1414				 sizeof(ubd_id)))
1415			return -EFAULT;
1416		return 0;
1417
1418	case CDROMVOLREAD:
1419		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1420			return -EFAULT;
1421		volume.channel0 = 255;
1422		volume.channel1 = 255;
1423		volume.channel2 = 255;
1424		volume.channel3 = 255;
1425		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1426			return -EFAULT;
1427		return 0;
1428	}
1429	return -EINVAL;
1430}
1431
1432static int map_error(int error_code)
1433{
1434	switch (error_code) {
1435	case 0:
1436		return BLK_STS_OK;
1437	case ENOSYS:
1438	case EOPNOTSUPP:
1439		return BLK_STS_NOTSUPP;
1440	case ENOSPC:
1441		return BLK_STS_NOSPC;
1442	}
1443	return BLK_STS_IOERR;
1444}
1445
1446/*
1447 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1448 *
1449 * The following functions are part of UML hypervisor code.
1450 * All functions from here onwards are executed as a helper
1451 * thread and are not allowed to execute any kernel functions.
1452 *
1453 * Any communication must occur strictly via shared memory and IPC.
1454 *
1455 * Do not add printks, locks, kernel memory operations, etc - it
1456 * will result in unpredictable behaviour and/or crashes.
1457 */
1458
1459static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1460{
1461	int n;
1462
1463	if (segment->cow_offset == -1)
1464		return map_error(0);
1465
1466	n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1467			  sizeof(segment->bitmap_words), segment->cow_offset);
1468	if (n != sizeof(segment->bitmap_words))
1469		return map_error(-n);
1470
1471	return map_error(0);
1472}
1473
1474static void do_io(struct io_thread_req *req, struct io_desc *desc)
1475{
1476	char *buf = NULL;
1477	unsigned long len;
1478	int n, nsectors, start, end, bit;
1479	__u64 off;
1480
1481	/* FLUSH is really a special case, we cannot "case" it with others */
1482
1483	if (req_op(req->req) == REQ_OP_FLUSH) {
1484		/* fds[0] is always either the rw image or our cow file */
1485		req->error = map_error(-os_sync_file(req->fds[0]));
1486		return;
1487	}
1488
1489	nsectors = desc->length / req->sectorsize;
1490	start = 0;
1491	do {
1492		bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1493		end = start;
1494		while((end < nsectors) &&
1495		      (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1496			end++;
1497
1498		off = req->offset + req->offsets[bit] +
1499			start * req->sectorsize;
1500		len = (end - start) * req->sectorsize;
1501		if (desc->buffer != NULL)
1502			buf = &desc->buffer[start * req->sectorsize];
1503
1504		switch (req_op(req->req)) {
1505		case REQ_OP_READ:
1506			n = 0;
1507			do {
1508				buf = &buf[n];
1509				len -= n;
1510				n = os_pread_file(req->fds[bit], buf, len, off);
1511				if (n < 0) {
1512					req->error = map_error(-n);
1513					return;
1514				}
1515			} while((n < len) && (n != 0));
1516			if (n < len) memset(&buf[n], 0, len - n);
1517			break;
1518		case REQ_OP_WRITE:
1519			n = os_pwrite_file(req->fds[bit], buf, len, off);
1520			if(n != len){
1521				req->error = map_error(-n);
1522				return;
1523			}
1524			break;
1525		case REQ_OP_DISCARD:
1526			n = os_falloc_punch(req->fds[bit], off, len);
1527			if (n) {
1528				req->error = map_error(-n);
1529				return;
1530			}
1531			break;
1532		case REQ_OP_WRITE_ZEROES:
1533			n = os_falloc_zeroes(req->fds[bit], off, len);
1534			if (n) {
1535				req->error = map_error(-n);
1536				return;
1537			}
1538			break;
1539		default:
1540			WARN_ON_ONCE(1);
1541			req->error = BLK_STS_NOTSUPP;
1542			return;
1543		}
1544
1545		start = end;
1546	} while(start < nsectors);
1547
1548	req->offset += len;
1549	req->error = update_bitmap(req, desc);
1550}
1551
1552/* Changed in start_io_thread, which is serialized by being called only
1553 * from ubd_init, which is an initcall.
1554 */
1555int kernel_fd = -1;
1556
1557/* Only changed by the io thread. XXX: currently unused. */
1558static int io_count;
1559
1560int io_thread(void *arg)
1561{
1562	int n, count, written, res;
1563
1564	os_fix_helper_signals();
1565
1566	while(1){
1567		n = bulk_req_safe_read(
1568			kernel_fd,
1569			io_req_buffer,
1570			&io_remainder,
1571			&io_remainder_size,
1572			UBD_REQ_BUFFER_SIZE
1573		);
1574		if (n <= 0) {
1575			if (n == -EAGAIN)
1576				ubd_read_poll(-1);
1577
1578			continue;
1579		}
1580
1581		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1582			struct io_thread_req *req = (*io_req_buffer)[count];
1583			int i;
1584
1585			io_count++;
1586			for (i = 0; !req->error && i < req->desc_cnt; i++)
1587				do_io(req, &(req->io_desc[i]));
1588
1589		}
1590
1591		written = 0;
1592
1593		do {
1594			res = os_write_file(kernel_fd,
1595					    ((char *) io_req_buffer) + written,
1596					    n - written);
1597			if (res >= 0) {
1598				written += res;
1599			}
1600			if (written < n) {
1601				ubd_write_poll(-1);
1602			}
1603		} while (written < n);
1604	}
1605
1606	return 0;
1607}
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Cambridge Greys Ltd
   4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
 
  30#include <linux/cdrom.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/ctype.h>
  34#include <linux/slab.h>
  35#include <linux/vmalloc.h>
  36#include <linux/platform_device.h>
  37#include <linux/scatterlist.h>
  38#include <asm/tlbflush.h>
  39#include <kern_util.h>
  40#include "mconsole_kern.h"
  41#include <init.h>
  42#include <irq_kern.h>
  43#include "ubd.h"
  44#include <os.h>
  45#include "cow.h"
  46
  47/* Max request size is determined by sector mask - 32K */
  48#define UBD_MAX_REQUEST (8 * sizeof(long))
  49
  50struct io_desc {
  51	char *buffer;
  52	unsigned long length;
  53	unsigned long sector_mask;
  54	unsigned long long cow_offset;
  55	unsigned long bitmap_words[2];
  56};
  57
  58struct io_thread_req {
  59	struct request *req;
  60	int fds[2];
  61	unsigned long offsets[2];
  62	unsigned long long offset;
  63	int sectorsize;
  64	int error;
  65
  66	int desc_cnt;
  67	/* io_desc has to be the last element of the struct */
  68	struct io_desc io_desc[];
  69};
  70
  71
  72static struct io_thread_req * (*irq_req_buffer)[];
  73static struct io_thread_req *irq_remainder;
  74static int irq_remainder_size;
  75
  76static struct io_thread_req * (*io_req_buffer)[];
  77static struct io_thread_req *io_remainder;
  78static int io_remainder_size;
  79
  80
  81
  82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  83{
  84	__u64 n;
  85	int bits, off;
  86
  87	bits = sizeof(data[0]) * 8;
  88	n = bit / bits;
  89	off = bit % bits;
  90	return (data[n] & (1 << off)) != 0;
  91}
  92
  93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  94{
  95	__u64 n;
  96	int bits, off;
  97
  98	bits = sizeof(data[0]) * 8;
  99	n = bit / bits;
 100	off = bit % bits;
 101	data[n] |= (1 << off);
 102}
 103/*End stuff from ubd_user.h*/
 104
 105#define DRIVER_NAME "uml-blkdev"
 106
 107static DEFINE_MUTEX(ubd_lock);
 108static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 109
 110static int ubd_open(struct block_device *bdev, fmode_t mode);
 111static void ubd_release(struct gendisk *disk, fmode_t mode);
 112static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 113		     unsigned int cmd, unsigned long arg);
 114static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 115
 116#define MAX_DEV (16)
 117
 118static const struct block_device_operations ubd_blops = {
 119        .owner		= THIS_MODULE,
 120        .open		= ubd_open,
 121        .release	= ubd_release,
 122        .ioctl		= ubd_ioctl,
 123        .compat_ioctl	= blkdev_compat_ptr_ioctl,
 124	.getgeo		= ubd_getgeo,
 125};
 126
 127/* Protected by ubd_lock */
 128static struct gendisk *ubd_gendisk[MAX_DEV];
 129
 130#ifdef CONFIG_BLK_DEV_UBD_SYNC
 131#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 132					 .cl = 1 })
 133#else
 134#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 135					 .cl = 1 })
 136#endif
 137static struct openflags global_openflags = OPEN_FLAGS;
 138
 139struct cow {
 140	/* backing file name */
 141	char *file;
 142	/* backing file fd */
 143	int fd;
 144	unsigned long *bitmap;
 145	unsigned long bitmap_len;
 146	int bitmap_offset;
 147	int data_offset;
 148};
 149
 150#define MAX_SG 64
 151
 152struct ubd {
 153	/* name (and fd, below) of the file opened for writing, either the
 154	 * backing or the cow file. */
 155	char *file;
 156	char *serial;
 157	int count;
 158	int fd;
 159	__u64 size;
 160	struct openflags boot_openflags;
 161	struct openflags openflags;
 162	unsigned shared:1;
 163	unsigned no_cow:1;
 164	unsigned no_trim:1;
 165	struct cow cow;
 166	struct platform_device pdev;
 167	struct request_queue *queue;
 168	struct blk_mq_tag_set tag_set;
 169	spinlock_t lock;
 170};
 171
 172#define DEFAULT_COW { \
 173	.file =			NULL, \
 174	.fd =			-1,	\
 175	.bitmap =		NULL, \
 176	.bitmap_offset =	0, \
 177	.data_offset =		0, \
 178}
 179
 180#define DEFAULT_UBD { \
 181	.file = 		NULL, \
 182	.serial =		NULL, \
 183	.count =		0, \
 184	.fd =			-1, \
 185	.size =			-1, \
 186	.boot_openflags =	OPEN_FLAGS, \
 187	.openflags =		OPEN_FLAGS, \
 188	.no_cow =               0, \
 189	.no_trim =		0, \
 190	.shared =		0, \
 191	.cow =			DEFAULT_COW, \
 192	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 193}
 194
 195/* Protected by ubd_lock */
 196static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 197
 198static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 199				 const struct blk_mq_queue_data *bd);
 200
 201static int fake_ide_setup(char *str)
 202{
 203	pr_warn("The fake_ide option has been removed\n");
 204	return 1;
 205}
 206__setup("fake_ide", fake_ide_setup);
 207
 208__uml_help(fake_ide_setup,
 209"fake_ide\n"
 210"    Obsolete stub.\n\n"
 211);
 212
 213static int parse_unit(char **ptr)
 214{
 215	char *str = *ptr, *end;
 216	int n = -1;
 217
 218	if(isdigit(*str)) {
 219		n = simple_strtoul(str, &end, 0);
 220		if(end == str)
 221			return -1;
 222		*ptr = end;
 223	}
 224	else if (('a' <= *str) && (*str <= 'z')) {
 225		n = *str - 'a';
 226		str++;
 227		*ptr = str;
 228	}
 229	return n;
 230}
 231
 232/* If *index_out == -1 at exit, the passed option was a general one;
 233 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 234 * should not be freed on exit.
 235 */
 236static int ubd_setup_common(char *str, int *index_out, char **error_out)
 237{
 238	struct ubd *ubd_dev;
 239	struct openflags flags = global_openflags;
 240	char *file, *backing_file, *serial;
 241	int n, err = 0, i;
 242
 243	if(index_out) *index_out = -1;
 244	n = *str;
 245	if(n == '='){
 246		str++;
 247		if(!strcmp(str, "sync")){
 248			global_openflags = of_sync(global_openflags);
 249			return err;
 250		}
 251
 252		pr_warn("fake major not supported any more\n");
 253		return 0;
 254	}
 255
 256	n = parse_unit(&str);
 257	if(n < 0){
 258		*error_out = "Couldn't parse device number";
 259		return -EINVAL;
 260	}
 261	if(n >= MAX_DEV){
 262		*error_out = "Device number out of range";
 263		return 1;
 264	}
 265
 266	err = -EBUSY;
 267	mutex_lock(&ubd_lock);
 268
 269	ubd_dev = &ubd_devs[n];
 270	if(ubd_dev->file != NULL){
 271		*error_out = "Device is already configured";
 272		goto out;
 273	}
 274
 275	if (index_out)
 276		*index_out = n;
 277
 278	err = -EINVAL;
 279	for (i = 0; i < sizeof("rscdt="); i++) {
 280		switch (*str) {
 281		case 'r':
 282			flags.w = 0;
 283			break;
 284		case 's':
 285			flags.s = 1;
 286			break;
 287		case 'd':
 288			ubd_dev->no_cow = 1;
 289			break;
 290		case 'c':
 291			ubd_dev->shared = 1;
 292			break;
 293		case 't':
 294			ubd_dev->no_trim = 1;
 295			break;
 296		case '=':
 297			str++;
 298			goto break_loop;
 299		default:
 300			*error_out = "Expected '=' or flag letter "
 301				"(r, s, c, t or d)";
 302			goto out;
 303		}
 304		str++;
 305	}
 306
 307	if (*str == '=')
 308		*error_out = "Too many flags specified";
 309	else
 310		*error_out = "Missing '='";
 311	goto out;
 312
 313break_loop:
 314	file = strsep(&str, ",:");
 315	if (*file == '\0')
 316		file = NULL;
 317
 318	backing_file = strsep(&str, ",:");
 319	if (backing_file && *backing_file == '\0')
 320		backing_file = NULL;
 321
 322	serial = strsep(&str, ",:");
 323	if (serial && *serial == '\0')
 324		serial = NULL;
 325
 326	if (backing_file && ubd_dev->no_cow) {
 327		*error_out = "Can't specify both 'd' and a cow file";
 328		goto out;
 329	}
 330
 331	err = 0;
 332	ubd_dev->file = file;
 333	ubd_dev->cow.file = backing_file;
 334	ubd_dev->serial = serial;
 335	ubd_dev->boot_openflags = flags;
 336out:
 337	mutex_unlock(&ubd_lock);
 338	return err;
 339}
 340
 341static int ubd_setup(char *str)
 342{
 343	char *error;
 344	int err;
 345
 346	err = ubd_setup_common(str, NULL, &error);
 347	if(err)
 348		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 349		       "%s\n", str, error);
 350	return 1;
 351}
 352
 353__setup("ubd", ubd_setup);
 354__uml_help(ubd_setup,
 355"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
 356"    This is used to associate a device with a file in the underlying\n"
 357"    filesystem. When specifying two filenames, the first one is the\n"
 358"    COW name and the second is the backing file name. As separator you can\n"
 359"    use either a ':' or a ',': the first one allows writing things like;\n"
 360"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 361"    while with a ',' the shell would not expand the 2nd '~'.\n"
 362"    When using only one filename, UML will detect whether to treat it like\n"
 363"    a COW file or a backing file. To override this detection, add the 'd'\n"
 364"    flag:\n"
 365"	ubd0d=BackingFile\n"
 366"    Usually, there is a filesystem in the file, but \n"
 367"    that's not required. Swap devices containing swap files can be\n"
 368"    specified like this. Also, a file which doesn't contain a\n"
 369"    filesystem can have its contents read in the virtual \n"
 370"    machine by running 'dd' on the device. <n> must be in the range\n"
 371"    0 to 7. Appending an 'r' to the number will cause that device\n"
 372"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 373"    an 's' will cause data to be written to disk on the host immediately.\n"
 374"    'c' will cause the device to be treated as being shared between multiple\n"
 375"    UMLs and file locking will be turned off - this is appropriate for a\n"
 376"    cluster filesystem and inappropriate at almost all other times.\n\n"
 377"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 378"    An optional device serial number can be exposed using the serial parameter\n"
 379"    on the cmdline which is exposed as a sysfs entry. This is particularly\n"
 380"    useful when a unique number should be given to the device. Note when\n"
 381"    specifying a label, the filename2 must be also presented. It can be\n"
 382"    an empty string, in which case the backing file is not used:\n"
 383"       ubd0=File,,Serial\n"
 384);
 385
 386static int udb_setup(char *str)
 387{
 388	printk("udb%s specified on command line is almost certainly a ubd -> "
 389	       "udb TYPO\n", str);
 390	return 1;
 391}
 392
 393__setup("udb", udb_setup);
 394__uml_help(udb_setup,
 395"udb\n"
 396"    This option is here solely to catch ubd -> udb typos, which can be\n"
 397"    to impossible to catch visually unless you specifically look for\n"
 398"    them.  The only result of any option starting with 'udb' is an error\n"
 399"    in the boot output.\n\n"
 400);
 401
 402/* Only changed by ubd_init, which is an initcall. */
 403static int thread_fd = -1;
 404
 405/* Function to read several request pointers at a time
 406* handling fractional reads if (and as) needed
 407*/
 408
 409static int bulk_req_safe_read(
 410	int fd,
 411	struct io_thread_req * (*request_buffer)[],
 412	struct io_thread_req **remainder,
 413	int *remainder_size,
 414	int max_recs
 415	)
 416{
 417	int n = 0;
 418	int res = 0;
 419
 420	if (*remainder_size > 0) {
 421		memmove(
 422			(char *) request_buffer,
 423			(char *) remainder, *remainder_size
 424		);
 425		n = *remainder_size;
 426	}
 427
 428	res = os_read_file(
 429			fd,
 430			((char *) request_buffer) + *remainder_size,
 431			sizeof(struct io_thread_req *)*max_recs
 432				- *remainder_size
 433		);
 434	if (res > 0) {
 435		n += res;
 436		if ((n % sizeof(struct io_thread_req *)) > 0) {
 437			/*
 438			* Read somehow returned not a multiple of dword
 439			* theoretically possible, but never observed in the
 440			* wild, so read routine must be able to handle it
 441			*/
 442			*remainder_size = n % sizeof(struct io_thread_req *);
 443			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 444			memmove(
 445				remainder,
 446				((char *) request_buffer) +
 447					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 448				*remainder_size
 449			);
 450			n = n - *remainder_size;
 451		}
 452	} else {
 453		n = res;
 454	}
 455	return n;
 456}
 457
 458/* Called without dev->lock held, and only in interrupt context. */
 459static void ubd_handler(void)
 460{
 461	int n;
 462	int count;
 463
 464	while(1){
 465		n = bulk_req_safe_read(
 466			thread_fd,
 467			irq_req_buffer,
 468			&irq_remainder,
 469			&irq_remainder_size,
 470			UBD_REQ_BUFFER_SIZE
 471		);
 472		if (n < 0) {
 473			if(n == -EAGAIN)
 474				break;
 475			printk(KERN_ERR "spurious interrupt in ubd_handler, "
 476			       "err = %d\n", -n);
 477			return;
 478		}
 479		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 480			struct io_thread_req *io_req = (*irq_req_buffer)[count];
 481
 482			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 483				blk_queue_max_discard_sectors(io_req->req->q, 0);
 484				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 485				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
 486			}
 487			blk_mq_end_request(io_req->req, io_req->error);
 488			kfree(io_req);
 489		}
 490	}
 491}
 492
 493static irqreturn_t ubd_intr(int irq, void *dev)
 494{
 495	ubd_handler();
 496	return IRQ_HANDLED;
 497}
 498
 499/* Only changed by ubd_init, which is an initcall. */
 500static int io_pid = -1;
 501
 502static void kill_io_thread(void)
 503{
 504	if(io_pid != -1)
 505		os_kill_process(io_pid, 1);
 506}
 507
 508__uml_exitcall(kill_io_thread);
 509
 510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 511{
 512	char *file;
 513	int fd;
 514	int err;
 515
 516	__u32 version;
 517	__u32 align;
 518	char *backing_file;
 519	time64_t mtime;
 520	unsigned long long size;
 521	int sector_size;
 522	int bitmap_offset;
 523
 524	if (ubd_dev->file && ubd_dev->cow.file) {
 525		file = ubd_dev->cow.file;
 526
 527		goto out;
 528	}
 529
 530	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 531	if (fd < 0)
 532		return fd;
 533
 534	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 535		&mtime, &size, &sector_size, &align, &bitmap_offset);
 536	os_close_file(fd);
 537
 538	if(err == -EINVAL)
 539		file = ubd_dev->file;
 540	else
 541		file = backing_file;
 542
 543out:
 544	return os_file_size(file, size_out);
 545}
 546
 547static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 548{
 549	int err;
 550
 551	err = os_pread_file(fd, buf, len, offset);
 552	if (err < 0)
 553		return err;
 554
 555	return 0;
 556}
 557
 558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 559{
 560	time64_t modtime;
 561	unsigned long long actual;
 562	int err;
 563
 564	err = os_file_modtime(file, &modtime);
 565	if (err < 0) {
 566		printk(KERN_ERR "Failed to get modification time of backing "
 567		       "file \"%s\", err = %d\n", file, -err);
 568		return err;
 569	}
 570
 571	err = os_file_size(file, &actual);
 572	if (err < 0) {
 573		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 574		       "err = %d\n", file, -err);
 575		return err;
 576	}
 577
 578	if (actual != size) {
 579		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
 580		 * the typecast.*/
 581		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 582		       "vs backing file\n", (unsigned long long) size, actual);
 583		return -EINVAL;
 584	}
 585	if (modtime != mtime) {
 586		printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 587		       "backing file\n", mtime, modtime);
 588		return -EINVAL;
 589	}
 590	return 0;
 591}
 592
 593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 594{
 595	struct uml_stat buf1, buf2;
 596	int err;
 597
 598	if (from_cmdline == NULL)
 599		return 0;
 600	if (!strcmp(from_cmdline, from_cow))
 601		return 0;
 602
 603	err = os_stat_file(from_cmdline, &buf1);
 604	if (err < 0) {
 605		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 606		       -err);
 607		return 0;
 608	}
 609	err = os_stat_file(from_cow, &buf2);
 610	if (err < 0) {
 611		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 612		       -err);
 613		return 1;
 614	}
 615	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 616		return 0;
 617
 618	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 619	       "\"%s\" specified in COW header of \"%s\"\n",
 620	       from_cmdline, from_cow, cow);
 621	return 1;
 622}
 623
 624static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 625		  char **backing_file_out, int *bitmap_offset_out,
 626		  unsigned long *bitmap_len_out, int *data_offset_out,
 627		  int *create_cow_out)
 628{
 629	time64_t mtime;
 630	unsigned long long size;
 631	__u32 version, align;
 632	char *backing_file;
 633	int fd, err, sectorsize, asked_switch, mode = 0644;
 634
 635	fd = os_open_file(file, *openflags, mode);
 636	if (fd < 0) {
 637		if ((fd == -ENOENT) && (create_cow_out != NULL))
 638			*create_cow_out = 1;
 639		if (!openflags->w ||
 640		    ((fd != -EROFS) && (fd != -EACCES)))
 641			return fd;
 642		openflags->w = 0;
 643		fd = os_open_file(file, *openflags, mode);
 644		if (fd < 0)
 645			return fd;
 646	}
 647
 648	if (shared)
 649		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 650	else {
 651		err = os_lock_file(fd, openflags->w);
 652		if (err < 0) {
 653			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 654			       file, -err);
 655			goto out_close;
 656		}
 657	}
 658
 659	/* Successful return case! */
 660	if (backing_file_out == NULL)
 661		return fd;
 662
 663	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 664			      &size, &sectorsize, &align, bitmap_offset_out);
 665	if (err && (*backing_file_out != NULL)) {
 666		printk(KERN_ERR "Failed to read COW header from COW file "
 667		       "\"%s\", errno = %d\n", file, -err);
 668		goto out_close;
 669	}
 670	if (err)
 671		return fd;
 672
 673	asked_switch = path_requires_switch(*backing_file_out, backing_file,
 674					    file);
 675
 676	/* Allow switching only if no mismatch. */
 677	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 678						   mtime)) {
 679		printk(KERN_ERR "Switching backing file to '%s'\n",
 680		       *backing_file_out);
 681		err = write_cow_header(file, fd, *backing_file_out,
 682				       sectorsize, align, &size);
 683		if (err) {
 684			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 685			goto out_close;
 686		}
 687	} else {
 688		*backing_file_out = backing_file;
 689		err = backing_file_mismatch(*backing_file_out, size, mtime);
 690		if (err)
 691			goto out_close;
 692	}
 693
 694	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 695		  bitmap_len_out, data_offset_out);
 696
 697	return fd;
 698 out_close:
 699	os_close_file(fd);
 700	return err;
 701}
 702
 703static int create_cow_file(char *cow_file, char *backing_file,
 704		    struct openflags flags,
 705		    int sectorsize, int alignment, int *bitmap_offset_out,
 706		    unsigned long *bitmap_len_out, int *data_offset_out)
 707{
 708	int err, fd;
 709
 710	flags.c = 1;
 711	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 712	if (fd < 0) {
 713		err = fd;
 714		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 715		       cow_file, -err);
 716		goto out;
 717	}
 718
 719	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 720			    bitmap_offset_out, bitmap_len_out,
 721			    data_offset_out);
 722	if (!err)
 723		return fd;
 724	os_close_file(fd);
 725 out:
 726	return err;
 727}
 728
 729static void ubd_close_dev(struct ubd *ubd_dev)
 730{
 731	os_close_file(ubd_dev->fd);
 732	if(ubd_dev->cow.file == NULL)
 733		return;
 734
 735	os_close_file(ubd_dev->cow.fd);
 736	vfree(ubd_dev->cow.bitmap);
 737	ubd_dev->cow.bitmap = NULL;
 738}
 739
 740static int ubd_open_dev(struct ubd *ubd_dev)
 741{
 742	struct openflags flags;
 743	char **back_ptr;
 744	int err, create_cow, *create_ptr;
 745	int fd;
 746
 747	ubd_dev->openflags = ubd_dev->boot_openflags;
 748	create_cow = 0;
 749	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 750	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 751
 752	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 753				back_ptr, &ubd_dev->cow.bitmap_offset,
 754				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 755				create_ptr);
 756
 757	if((fd == -ENOENT) && create_cow){
 758		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 759					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 760					  &ubd_dev->cow.bitmap_offset,
 761					  &ubd_dev->cow.bitmap_len,
 762					  &ubd_dev->cow.data_offset);
 763		if(fd >= 0){
 764			printk(KERN_INFO "Creating \"%s\" as COW file for "
 765			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 766		}
 767	}
 768
 769	if(fd < 0){
 770		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 771		       -fd);
 772		return fd;
 773	}
 774	ubd_dev->fd = fd;
 775
 776	if(ubd_dev->cow.file != NULL){
 777		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 778
 779		err = -ENOMEM;
 780		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 781		if(ubd_dev->cow.bitmap == NULL){
 782			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 783			goto error;
 784		}
 785		flush_tlb_kernel_vm();
 786
 787		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 788				      ubd_dev->cow.bitmap_offset,
 789				      ubd_dev->cow.bitmap_len);
 790		if(err < 0)
 791			goto error;
 792
 793		flags = ubd_dev->openflags;
 794		flags.w = 0;
 795		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 796				    NULL, NULL, NULL, NULL);
 797		if(err < 0) goto error;
 798		ubd_dev->cow.fd = err;
 799	}
 800	if (ubd_dev->no_trim == 0) {
 801		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
 802		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
 803		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 804		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 805		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
 806	}
 807	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 808	return 0;
 809 error:
 810	os_close_file(ubd_dev->fd);
 811	return err;
 812}
 813
 814static void ubd_device_release(struct device *dev)
 815{
 816	struct ubd *ubd_dev = dev_get_drvdata(dev);
 817
 818	blk_mq_free_tag_set(&ubd_dev->tag_set);
 819	*ubd_dev = ((struct ubd) DEFAULT_UBD);
 820}
 821
 822static ssize_t serial_show(struct device *dev,
 823			   struct device_attribute *attr, char *buf)
 824{
 825	struct gendisk *disk = dev_to_disk(dev);
 826	struct ubd *ubd_dev = disk->private_data;
 827
 828	if (!ubd_dev)
 829		return 0;
 830
 831	return sprintf(buf, "%s", ubd_dev->serial);
 832}
 833
 834static DEVICE_ATTR_RO(serial);
 835
 836static struct attribute *ubd_attrs[] = {
 837	&dev_attr_serial.attr,
 838	NULL,
 839};
 840
 841static umode_t ubd_attrs_are_visible(struct kobject *kobj,
 842				     struct attribute *a, int n)
 843{
 844	return a->mode;
 845}
 846
 847static const struct attribute_group ubd_attr_group = {
 848	.attrs = ubd_attrs,
 849	.is_visible = ubd_attrs_are_visible,
 850};
 851
 852static const struct attribute_group *ubd_attr_groups[] = {
 853	&ubd_attr_group,
 854	NULL,
 855};
 856
 857static void ubd_disk_register(int major, u64 size, int unit,
 858			      struct gendisk *disk)
 859{
 860	disk->major = major;
 861	disk->first_minor = unit << UBD_SHIFT;
 862	disk->minors = 1 << UBD_SHIFT;
 863	disk->fops = &ubd_blops;
 864	set_capacity(disk, size / 512);
 865	sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 866
 867	ubd_devs[unit].pdev.id   = unit;
 868	ubd_devs[unit].pdev.name = DRIVER_NAME;
 869	ubd_devs[unit].pdev.dev.release = ubd_device_release;
 870	dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 871	platform_device_register(&ubd_devs[unit].pdev);
 872
 873	disk->private_data = &ubd_devs[unit];
 874	disk->queue = ubd_devs[unit].queue;
 875	device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
 876}
 877
 878#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 879
 880static const struct blk_mq_ops ubd_mq_ops = {
 881	.queue_rq = ubd_queue_rq,
 882};
 883
 884static int ubd_add(int n, char **error_out)
 885{
 886	struct ubd *ubd_dev = &ubd_devs[n];
 887	struct gendisk *disk;
 888	int err = 0;
 889
 890	if(ubd_dev->file == NULL)
 891		goto out;
 892
 893	err = ubd_file_size(ubd_dev, &ubd_dev->size);
 894	if(err < 0){
 895		*error_out = "Couldn't determine size of device's file";
 896		goto out;
 897	}
 898
 899	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 900
 901	ubd_dev->tag_set.ops = &ubd_mq_ops;
 902	ubd_dev->tag_set.queue_depth = 64;
 903	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 904	ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 905	ubd_dev->tag_set.driver_data = ubd_dev;
 906	ubd_dev->tag_set.nr_hw_queues = 1;
 907
 908	err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 909	if (err)
 910		goto out;
 911
 912	disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
 913	if (IS_ERR(disk)) {
 914		err = PTR_ERR(disk);
 915		goto out_cleanup_tags;
 916	}
 917	ubd_dev->queue = disk->queue;
 918
 919	blk_queue_write_cache(ubd_dev->queue, true, false);
 920	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 921	blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
 922	ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
 
 
 
 923	ubd_gendisk[n] = disk;
 924	return 0;
 925
 
 
 926out_cleanup_tags:
 927	blk_mq_free_tag_set(&ubd_dev->tag_set);
 928out:
 929	return err;
 930}
 931
 932static int ubd_config(char *str, char **error_out)
 933{
 934	int n, ret;
 935
 936	/* This string is possibly broken up and stored, so it's only
 937	 * freed if ubd_setup_common fails, or if only general options
 938	 * were set.
 939	 */
 940	str = kstrdup(str, GFP_KERNEL);
 941	if (str == NULL) {
 942		*error_out = "Failed to allocate memory";
 943		return -ENOMEM;
 944	}
 945
 946	ret = ubd_setup_common(str, &n, error_out);
 947	if (ret)
 948		goto err_free;
 949
 950	if (n == -1) {
 951		ret = 0;
 952		goto err_free;
 953	}
 954
 955	mutex_lock(&ubd_lock);
 956	ret = ubd_add(n, error_out);
 957	if (ret)
 958		ubd_devs[n].file = NULL;
 959	mutex_unlock(&ubd_lock);
 960
 961out:
 962	return ret;
 963
 964err_free:
 965	kfree(str);
 966	goto out;
 967}
 968
 969static int ubd_get_config(char *name, char *str, int size, char **error_out)
 970{
 971	struct ubd *ubd_dev;
 972	int n, len = 0;
 973
 974	n = parse_unit(&name);
 975	if((n >= MAX_DEV) || (n < 0)){
 976		*error_out = "ubd_get_config : device number out of range";
 977		return -1;
 978	}
 979
 980	ubd_dev = &ubd_devs[n];
 981	mutex_lock(&ubd_lock);
 982
 983	if(ubd_dev->file == NULL){
 984		CONFIG_CHUNK(str, size, len, "", 1);
 985		goto out;
 986	}
 987
 988	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 989
 990	if(ubd_dev->cow.file != NULL){
 991		CONFIG_CHUNK(str, size, len, ",", 0);
 992		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 993	}
 994	else CONFIG_CHUNK(str, size, len, "", 1);
 995
 996 out:
 997	mutex_unlock(&ubd_lock);
 998	return len;
 999}
1000
1001static int ubd_id(char **str, int *start_out, int *end_out)
1002{
1003	int n;
1004
1005	n = parse_unit(str);
1006	*start_out = 0;
1007	*end_out = MAX_DEV - 1;
1008	return n;
1009}
1010
1011static int ubd_remove(int n, char **error_out)
1012{
1013	struct gendisk *disk = ubd_gendisk[n];
1014	struct ubd *ubd_dev;
1015	int err = -ENODEV;
1016
1017	mutex_lock(&ubd_lock);
1018
1019	ubd_dev = &ubd_devs[n];
1020
1021	if(ubd_dev->file == NULL)
1022		goto out;
1023
1024	/* you cannot remove a open disk */
1025	err = -EBUSY;
1026	if(ubd_dev->count > 0)
1027		goto out;
1028
1029	ubd_gendisk[n] = NULL;
1030	if(disk != NULL){
1031		del_gendisk(disk);
1032		blk_cleanup_disk(disk);
1033	}
1034
1035	err = 0;
1036	platform_device_unregister(&ubd_dev->pdev);
1037out:
1038	mutex_unlock(&ubd_lock);
1039	return err;
1040}
1041
1042/* All these are called by mconsole in process context and without
1043 * ubd-specific locks.  The structure itself is const except for .list.
1044 */
1045static struct mc_device ubd_mc = {
1046	.list		= LIST_HEAD_INIT(ubd_mc.list),
1047	.name		= "ubd",
1048	.config		= ubd_config,
1049	.get_config	= ubd_get_config,
1050	.id		= ubd_id,
1051	.remove		= ubd_remove,
1052};
1053
1054static int __init ubd_mc_init(void)
1055{
1056	mconsole_register_dev(&ubd_mc);
1057	return 0;
1058}
1059
1060__initcall(ubd_mc_init);
1061
1062static int __init ubd0_init(void)
1063{
1064	struct ubd *ubd_dev = &ubd_devs[0];
1065
1066	mutex_lock(&ubd_lock);
1067	if(ubd_dev->file == NULL)
1068		ubd_dev->file = "root_fs";
1069	mutex_unlock(&ubd_lock);
1070
1071	return 0;
1072}
1073
1074__initcall(ubd0_init);
1075
1076/* Used in ubd_init, which is an initcall */
1077static struct platform_driver ubd_driver = {
1078	.driver = {
1079		.name  = DRIVER_NAME,
1080	},
1081};
1082
1083static int __init ubd_init(void)
1084{
1085	char *error;
1086	int i, err;
1087
1088	if (register_blkdev(UBD_MAJOR, "ubd"))
1089		return -1;
1090
1091	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1092				       sizeof(struct io_thread_req *),
1093				       GFP_KERNEL
1094		);
1095	irq_remainder = 0;
1096
1097	if (irq_req_buffer == NULL) {
1098		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1099		return -1;
1100	}
1101	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1102				      sizeof(struct io_thread_req *),
1103				      GFP_KERNEL
1104		);
1105
1106	io_remainder = 0;
1107
1108	if (io_req_buffer == NULL) {
1109		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1110		return -1;
1111	}
1112	platform_driver_register(&ubd_driver);
1113	mutex_lock(&ubd_lock);
1114	for (i = 0; i < MAX_DEV; i++){
1115		err = ubd_add(i, &error);
1116		if(err)
1117			printk(KERN_ERR "Failed to initialize ubd device %d :"
1118			       "%s\n", i, error);
1119	}
1120	mutex_unlock(&ubd_lock);
1121	return 0;
1122}
1123
1124late_initcall(ubd_init);
1125
1126static int __init ubd_driver_init(void){
1127	unsigned long stack;
1128	int err;
1129
1130	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1131	if(global_openflags.s){
1132		printk(KERN_INFO "ubd: Synchronous mode\n");
1133		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1134		 * enough. So use anyway the io thread. */
1135	}
1136	stack = alloc_stack(0, 0);
1137	io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
1138	if(io_pid < 0){
1139		printk(KERN_ERR
1140		       "ubd : Failed to start I/O thread (errno = %d) - "
1141		       "falling back to synchronous I/O\n", -io_pid);
1142		io_pid = -1;
1143		return 0;
1144	}
1145	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1146			     0, "ubd", ubd_devs);
1147	if(err < 0)
1148		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1149	return 0;
1150}
1151
1152device_initcall(ubd_driver_init);
1153
1154static int ubd_open(struct block_device *bdev, fmode_t mode)
1155{
1156	struct gendisk *disk = bdev->bd_disk;
1157	struct ubd *ubd_dev = disk->private_data;
1158	int err = 0;
1159
1160	mutex_lock(&ubd_mutex);
1161	if(ubd_dev->count == 0){
1162		err = ubd_open_dev(ubd_dev);
1163		if(err){
1164			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1165			       disk->disk_name, ubd_dev->file, -err);
1166			goto out;
1167		}
1168	}
1169	ubd_dev->count++;
1170	set_disk_ro(disk, !ubd_dev->openflags.w);
1171
1172	/* This should no more be needed. And it didn't work anyway to exclude
1173	 * read-write remounting of filesystems.*/
1174	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1175	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1176	        err = -EROFS;
1177	}*/
1178out:
1179	mutex_unlock(&ubd_mutex);
1180	return err;
1181}
1182
1183static void ubd_release(struct gendisk *disk, fmode_t mode)
1184{
1185	struct ubd *ubd_dev = disk->private_data;
1186
1187	mutex_lock(&ubd_mutex);
1188	if(--ubd_dev->count == 0)
1189		ubd_close_dev(ubd_dev);
1190	mutex_unlock(&ubd_mutex);
1191}
1192
1193static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1194			  __u64 *cow_offset, unsigned long *bitmap,
1195			  __u64 bitmap_offset, unsigned long *bitmap_words,
1196			  __u64 bitmap_len)
1197{
1198	__u64 sector = io_offset >> SECTOR_SHIFT;
1199	int i, update_bitmap = 0;
1200
1201	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1202		if(cow_mask != NULL)
1203			ubd_set_bit(i, (unsigned char *) cow_mask);
1204		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1205			continue;
1206
1207		update_bitmap = 1;
1208		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1209	}
1210
1211	if(!update_bitmap)
1212		return;
1213
1214	*cow_offset = sector / (sizeof(unsigned long) * 8);
1215
1216	/* This takes care of the case where we're exactly at the end of the
1217	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1218	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1219	 * for the original diagnosis.
1220	 */
1221	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1222					 sizeof(unsigned long)) - 1))
1223		(*cow_offset)--;
1224
1225	bitmap_words[0] = bitmap[*cow_offset];
1226	bitmap_words[1] = bitmap[*cow_offset + 1];
1227
1228	*cow_offset *= sizeof(unsigned long);
1229	*cow_offset += bitmap_offset;
1230}
1231
1232static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1233		       unsigned long offset, unsigned long *bitmap,
1234		       __u64 bitmap_offset, __u64 bitmap_len)
1235{
1236	__u64 sector = offset >> SECTOR_SHIFT;
1237	int i;
1238
1239	if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1240		panic("Operation too long");
1241
1242	if (req_op(req->req) == REQ_OP_READ) {
1243		for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1244			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1245				ubd_set_bit(i, (unsigned char *)
1246					    &segment->sector_mask);
1247		}
1248	} else {
1249		cowify_bitmap(offset, segment->length, &segment->sector_mask,
1250			      &segment->cow_offset, bitmap, bitmap_offset,
1251			      segment->bitmap_words, bitmap_len);
1252	}
1253}
1254
1255static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1256			struct request *req)
1257{
1258	struct bio_vec bvec;
1259	struct req_iterator iter;
1260	int i = 0;
1261	unsigned long byte_offset = io_req->offset;
1262	int op = req_op(req);
1263
1264	if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1265		io_req->io_desc[0].buffer = NULL;
1266		io_req->io_desc[0].length = blk_rq_bytes(req);
1267	} else {
1268		rq_for_each_segment(bvec, req, iter) {
1269			BUG_ON(i >= io_req->desc_cnt);
1270
1271			io_req->io_desc[i].buffer =
1272				page_address(bvec.bv_page) + bvec.bv_offset;
1273			io_req->io_desc[i].length = bvec.bv_len;
1274			i++;
1275		}
1276	}
1277
1278	if (dev->cow.file) {
1279		for (i = 0; i < io_req->desc_cnt; i++) {
1280			cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1281				   dev->cow.bitmap, dev->cow.bitmap_offset,
1282				   dev->cow.bitmap_len);
1283			byte_offset += io_req->io_desc[i].length;
1284		}
1285
1286	}
1287}
1288
1289static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1290					   int desc_cnt)
1291{
1292	struct io_thread_req *io_req;
1293	int i;
1294
1295	io_req = kmalloc(sizeof(*io_req) +
1296			 (desc_cnt * sizeof(struct io_desc)),
1297			 GFP_ATOMIC);
1298	if (!io_req)
1299		return NULL;
1300
1301	io_req->req = req;
1302	if (dev->cow.file)
1303		io_req->fds[0] = dev->cow.fd;
1304	else
1305		io_req->fds[0] = dev->fd;
1306	io_req->error = 0;
1307	io_req->sectorsize = SECTOR_SIZE;
1308	io_req->fds[1] = dev->fd;
1309	io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1310	io_req->offsets[0] = 0;
1311	io_req->offsets[1] = dev->cow.data_offset;
1312
1313	for (i = 0 ; i < desc_cnt; i++) {
1314		io_req->io_desc[i].sector_mask = 0;
1315		io_req->io_desc[i].cow_offset = -1;
1316	}
1317
1318	return io_req;
1319}
1320
1321static int ubd_submit_request(struct ubd *dev, struct request *req)
1322{
1323	int segs = 0;
1324	struct io_thread_req *io_req;
1325	int ret;
1326	int op = req_op(req);
1327
1328	if (op == REQ_OP_FLUSH)
1329		segs = 0;
1330	else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1331		segs = 1;
1332	else
1333		segs = blk_rq_nr_phys_segments(req);
1334
1335	io_req = ubd_alloc_req(dev, req, segs);
1336	if (!io_req)
1337		return -ENOMEM;
1338
1339	io_req->desc_cnt = segs;
1340	if (segs)
1341		ubd_map_req(dev, io_req, req);
1342
1343	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1344	if (ret != sizeof(io_req)) {
1345		if (ret != -EAGAIN)
1346			pr_err("write to io thread failed: %d\n", -ret);
1347		kfree(io_req);
1348	}
1349	return ret;
1350}
1351
1352static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1353				 const struct blk_mq_queue_data *bd)
1354{
1355	struct ubd *ubd_dev = hctx->queue->queuedata;
1356	struct request *req = bd->rq;
1357	int ret = 0, res = BLK_STS_OK;
1358
1359	blk_mq_start_request(req);
1360
1361	spin_lock_irq(&ubd_dev->lock);
1362
1363	switch (req_op(req)) {
1364	case REQ_OP_FLUSH:
1365	case REQ_OP_READ:
1366	case REQ_OP_WRITE:
1367	case REQ_OP_DISCARD:
1368	case REQ_OP_WRITE_ZEROES:
1369		ret = ubd_submit_request(ubd_dev, req);
1370		break;
1371	default:
1372		WARN_ON_ONCE(1);
1373		res = BLK_STS_NOTSUPP;
1374	}
1375
1376	spin_unlock_irq(&ubd_dev->lock);
1377
1378	if (ret < 0) {
1379		if (ret == -ENOMEM)
1380			res = BLK_STS_RESOURCE;
1381		else
1382			res = BLK_STS_DEV_RESOURCE;
1383	}
1384
1385	return res;
1386}
1387
1388static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1389{
1390	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1391
1392	geo->heads = 128;
1393	geo->sectors = 32;
1394	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1395	return 0;
1396}
1397
1398static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1399		     unsigned int cmd, unsigned long arg)
1400{
1401	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1402	u16 ubd_id[ATA_ID_WORDS];
1403
1404	switch (cmd) {
1405		struct cdrom_volctrl volume;
1406	case HDIO_GET_IDENTITY:
1407		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1408		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1409		ubd_id[ATA_ID_HEADS]	= 128;
1410		ubd_id[ATA_ID_SECTORS]	= 32;
1411		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1412				 sizeof(ubd_id)))
1413			return -EFAULT;
1414		return 0;
1415
1416	case CDROMVOLREAD:
1417		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1418			return -EFAULT;
1419		volume.channel0 = 255;
1420		volume.channel1 = 255;
1421		volume.channel2 = 255;
1422		volume.channel3 = 255;
1423		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1424			return -EFAULT;
1425		return 0;
1426	}
1427	return -EINVAL;
1428}
1429
1430static int map_error(int error_code)
1431{
1432	switch (error_code) {
1433	case 0:
1434		return BLK_STS_OK;
1435	case ENOSYS:
1436	case EOPNOTSUPP:
1437		return BLK_STS_NOTSUPP;
1438	case ENOSPC:
1439		return BLK_STS_NOSPC;
1440	}
1441	return BLK_STS_IOERR;
1442}
1443
1444/*
1445 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1446 *
1447 * The following functions are part of UML hypervisor code.
1448 * All functions from here onwards are executed as a helper
1449 * thread and are not allowed to execute any kernel functions.
1450 *
1451 * Any communication must occur strictly via shared memory and IPC.
1452 *
1453 * Do not add printks, locks, kernel memory operations, etc - it
1454 * will result in unpredictable behaviour and/or crashes.
1455 */
1456
1457static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1458{
1459	int n;
1460
1461	if (segment->cow_offset == -1)
1462		return map_error(0);
1463
1464	n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1465			  sizeof(segment->bitmap_words), segment->cow_offset);
1466	if (n != sizeof(segment->bitmap_words))
1467		return map_error(-n);
1468
1469	return map_error(0);
1470}
1471
1472static void do_io(struct io_thread_req *req, struct io_desc *desc)
1473{
1474	char *buf = NULL;
1475	unsigned long len;
1476	int n, nsectors, start, end, bit;
1477	__u64 off;
1478
1479	/* FLUSH is really a special case, we cannot "case" it with others */
1480
1481	if (req_op(req->req) == REQ_OP_FLUSH) {
1482		/* fds[0] is always either the rw image or our cow file */
1483		req->error = map_error(-os_sync_file(req->fds[0]));
1484		return;
1485	}
1486
1487	nsectors = desc->length / req->sectorsize;
1488	start = 0;
1489	do {
1490		bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1491		end = start;
1492		while((end < nsectors) &&
1493		      (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1494			end++;
1495
1496		off = req->offset + req->offsets[bit] +
1497			start * req->sectorsize;
1498		len = (end - start) * req->sectorsize;
1499		if (desc->buffer != NULL)
1500			buf = &desc->buffer[start * req->sectorsize];
1501
1502		switch (req_op(req->req)) {
1503		case REQ_OP_READ:
1504			n = 0;
1505			do {
1506				buf = &buf[n];
1507				len -= n;
1508				n = os_pread_file(req->fds[bit], buf, len, off);
1509				if (n < 0) {
1510					req->error = map_error(-n);
1511					return;
1512				}
1513			} while((n < len) && (n != 0));
1514			if (n < len) memset(&buf[n], 0, len - n);
1515			break;
1516		case REQ_OP_WRITE:
1517			n = os_pwrite_file(req->fds[bit], buf, len, off);
1518			if(n != len){
1519				req->error = map_error(-n);
1520				return;
1521			}
1522			break;
1523		case REQ_OP_DISCARD:
 
 
 
 
 
 
1524		case REQ_OP_WRITE_ZEROES:
1525			n = os_falloc_punch(req->fds[bit], off, len);
1526			if (n) {
1527				req->error = map_error(-n);
1528				return;
1529			}
1530			break;
1531		default:
1532			WARN_ON_ONCE(1);
1533			req->error = BLK_STS_NOTSUPP;
1534			return;
1535		}
1536
1537		start = end;
1538	} while(start < nsectors);
1539
1540	req->offset += len;
1541	req->error = update_bitmap(req, desc);
1542}
1543
1544/* Changed in start_io_thread, which is serialized by being called only
1545 * from ubd_init, which is an initcall.
1546 */
1547int kernel_fd = -1;
1548
1549/* Only changed by the io thread. XXX: currently unused. */
1550static int io_count = 0;
1551
1552int io_thread(void *arg)
1553{
1554	int n, count, written, res;
1555
1556	os_fix_helper_signals();
1557
1558	while(1){
1559		n = bulk_req_safe_read(
1560			kernel_fd,
1561			io_req_buffer,
1562			&io_remainder,
1563			&io_remainder_size,
1564			UBD_REQ_BUFFER_SIZE
1565		);
1566		if (n <= 0) {
1567			if (n == -EAGAIN)
1568				ubd_read_poll(-1);
1569
1570			continue;
1571		}
1572
1573		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1574			struct io_thread_req *req = (*io_req_buffer)[count];
1575			int i;
1576
1577			io_count++;
1578			for (i = 0; !req->error && i < req->desc_cnt; i++)
1579				do_io(req, &(req->io_desc[i]));
1580
1581		}
1582
1583		written = 0;
1584
1585		do {
1586			res = os_write_file(kernel_fd,
1587					    ((char *) io_req_buffer) + written,
1588					    n - written);
1589			if (res >= 0) {
1590				written += res;
1591			}
1592			if (written < n) {
1593				ubd_write_poll(-1);
1594			}
1595		} while (written < n);
1596	}
1597
1598	return 0;
1599}