Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Virtio vhost-user driver
   4 *
   5 * Copyright(c) 2019 Intel Corporation
   6 *
   7 * This driver allows virtio devices to be used over a vhost-user socket.
   8 *
   9 * Guest devices can be instantiated by kernel module or command line
  10 * parameters. One device will be created for each parameter. Syntax:
  11 *
  12 *		virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
  13 * where:
  14 *		<socket>	:= vhost-user socket path to connect
  15 *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
  16 *		<platform_id>	:= (optional) platform device id
  17 *
  18 * example:
  19 *		virtio_uml.device=/var/uml.socket:1
  20 *
  21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
  22 */
  23#include <linux/module.h>
  24#include <linux/of.h>
  25#include <linux/platform_device.h>
  26#include <linux/slab.h>
  27#include <linux/virtio.h>
  28#include <linux/virtio_config.h>
  29#include <linux/virtio_ring.h>
  30#include <linux/time-internal.h>
  31#include <linux/virtio-uml.h>
  32#include <shared/as-layout.h>
  33#include <irq_kern.h>
  34#include <init.h>
  35#include <os.h>
  36#include "vhost_user.h"
  37
 
 
 
 
 
  38#define MAX_SUPPORTED_QUEUE_SIZE	256
  39
  40#define to_virtio_uml_device(_vdev) \
  41	container_of(_vdev, struct virtio_uml_device, vdev)
  42
  43struct virtio_uml_platform_data {
  44	u32 virtio_device_id;
  45	const char *socket_path;
  46	struct work_struct conn_broken_wk;
  47	struct platform_device *pdev;
  48};
  49
  50struct virtio_uml_device {
  51	struct virtio_device vdev;
  52	struct platform_device *pdev;
  53	struct virtio_uml_platform_data *pdata;
  54
  55	spinlock_t sock_lock;
  56	int sock, req_fd, irq;
  57	u64 features;
  58	u64 protocol_features;
  59	u8 status;
  60	u8 registered:1;
  61	u8 suspended:1;
  62	u8 no_vq_suspend:1;
  63
  64	u8 config_changed_irq:1;
  65	uint64_t vq_irq_vq_map;
  66	int recv_rc;
  67};
  68
  69struct virtio_uml_vq_info {
  70	int kick_fd, call_fd;
  71	char name[32];
  72	bool suspended;
  73};
  74
  75extern unsigned long long physmem_size, highmem;
  76
  77#define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
  78
  79/* Vhost-user protocol */
  80
  81static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
  82			    const int *fds, unsigned int fds_num)
  83{
  84	int rc;
  85
  86	do {
  87		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
  88		if (rc > 0) {
  89			buf += rc;
  90			len -= rc;
  91			fds = NULL;
  92			fds_num = 0;
  93		}
  94	} while (len && (rc >= 0 || rc == -EINTR));
  95
  96	if (rc < 0)
  97		return rc;
  98	return 0;
  99}
 100
 101static int full_read(int fd, void *buf, int len, bool abortable)
 102{
 103	int rc;
 104
 105	if (!len)
 106		return 0;
 107
 108	do {
 109		rc = os_read_file(fd, buf, len);
 110		if (rc > 0) {
 111			buf += rc;
 112			len -= rc;
 113		}
 114	} while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
 115
 116	if (rc < 0)
 117		return rc;
 118	if (rc == 0)
 119		return -ECONNRESET;
 120	return 0;
 121}
 122
 123static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
 124{
 125	return full_read(fd, msg, sizeof(msg->header), true);
 126}
 127
 128static int vhost_user_recv(struct virtio_uml_device *vu_dev,
 129			   int fd, struct vhost_user_msg *msg,
 130			   size_t max_payload_size, bool wait)
 131{
 132	size_t size;
 133	int rc;
 134
 135	/*
 136	 * In virtio time-travel mode, we're handling all the vhost-user
 137	 * FDs by polling them whenever appropriate. However, we may get
 138	 * into a situation where we're sending out an interrupt message
 139	 * to a device (e.g. a net device) and need to handle a simulation
 140	 * time message while doing so, e.g. one that tells us to update
 141	 * our idea of how long we can run without scheduling.
 142	 *
 143	 * Thus, we need to not just read() from the given fd, but need
 144	 * to also handle messages for the simulation time - this function
 145	 * does that for us while waiting for the given fd to be readable.
 146	 */
 147	if (wait)
 148		time_travel_wait_readable(fd);
 149
 150	rc = vhost_user_recv_header(fd, msg);
 151
 152	if (rc)
 153		return rc;
 154	size = msg->header.size;
 155	if (size > max_payload_size)
 156		return -EPROTO;
 157	return full_read(fd, &msg->payload, size, false);
 158}
 159
 160static void vhost_user_check_reset(struct virtio_uml_device *vu_dev,
 161				   int rc)
 162{
 163	struct virtio_uml_platform_data *pdata = vu_dev->pdata;
 164
 165	if (rc != -ECONNRESET)
 166		return;
 167
 168	if (!vu_dev->registered)
 169		return;
 170
 171	virtio_break_device(&vu_dev->vdev);
 172	schedule_work(&pdata->conn_broken_wk);
 173}
 174
 175static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
 176				struct vhost_user_msg *msg,
 177				size_t max_payload_size)
 178{
 179	int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
 180				 max_payload_size, true);
 181
 182	if (rc) {
 183		vhost_user_check_reset(vu_dev, rc);
 184		return rc;
 185	}
 186
 187	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
 188		return -EPROTO;
 189
 190	return 0;
 191}
 192
 193static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
 194			       u64 *value)
 195{
 196	struct vhost_user_msg msg;
 197	int rc = vhost_user_recv_resp(vu_dev, &msg,
 198				      sizeof(msg.payload.integer));
 199
 200	if (rc)
 201		return rc;
 202	if (msg.header.size != sizeof(msg.payload.integer))
 203		return -EPROTO;
 204	*value = msg.payload.integer;
 205	return 0;
 206}
 207
 208static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
 209			       struct vhost_user_msg *msg,
 210			       size_t max_payload_size)
 211{
 212	int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
 213				 max_payload_size, false);
 214
 215	if (rc)
 216		return rc;
 217
 218	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
 219			VHOST_USER_VERSION)
 220		return -EPROTO;
 221
 222	return 0;
 223}
 224
 225static int vhost_user_send(struct virtio_uml_device *vu_dev,
 226			   bool need_response, struct vhost_user_msg *msg,
 227			   int *fds, size_t num_fds)
 228{
 229	size_t size = sizeof(msg->header) + msg->header.size;
 230	unsigned long flags;
 231	bool request_ack;
 232	int rc;
 233
 234	msg->header.flags |= VHOST_USER_VERSION;
 235
 236	/*
 237	 * The need_response flag indicates that we already need a response,
 238	 * e.g. to read the features. In these cases, don't request an ACK as
 239	 * it is meaningless. Also request an ACK only if supported.
 240	 */
 241	request_ack = !need_response;
 242	if (!(vu_dev->protocol_features &
 243			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
 244		request_ack = false;
 245
 246	if (request_ack)
 247		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
 248
 249	spin_lock_irqsave(&vu_dev->sock_lock, flags);
 250	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
 251	if (rc < 0)
 252		goto out;
 253
 254	if (request_ack) {
 255		uint64_t status;
 256
 257		rc = vhost_user_recv_u64(vu_dev, &status);
 258		if (rc)
 259			goto out;
 260
 261		if (status) {
 262			vu_err(vu_dev, "slave reports error: %llu\n", status);
 263			rc = -EIO;
 264			goto out;
 265		}
 266	}
 267
 268out:
 269	spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
 270	return rc;
 271}
 272
 273static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
 274				      bool need_response, u32 request)
 275{
 276	struct vhost_user_msg msg = {
 277		.header.request = request,
 278	};
 279
 280	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
 281}
 282
 283static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
 284					 u32 request, int fd)
 285{
 286	struct vhost_user_msg msg = {
 287		.header.request = request,
 288	};
 289
 290	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
 291}
 292
 293static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
 294			       u32 request, u64 value)
 295{
 296	struct vhost_user_msg msg = {
 297		.header.request = request,
 298		.header.size = sizeof(msg.payload.integer),
 299		.payload.integer = value,
 300	};
 301
 302	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 303}
 304
 305static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
 306{
 307	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
 308}
 309
 310static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
 311				   u64 *features)
 312{
 313	int rc = vhost_user_send_no_payload(vu_dev, true,
 314					    VHOST_USER_GET_FEATURES);
 315
 316	if (rc)
 317		return rc;
 318	return vhost_user_recv_u64(vu_dev, features);
 319}
 320
 321static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
 322				   u64 features)
 323{
 324	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
 325}
 326
 327static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
 328					    u64 *protocol_features)
 329{
 330	int rc = vhost_user_send_no_payload(vu_dev, true,
 331			VHOST_USER_GET_PROTOCOL_FEATURES);
 332
 333	if (rc)
 334		return rc;
 335	return vhost_user_recv_u64(vu_dev, protocol_features);
 336}
 337
 338static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
 339					    u64 protocol_features)
 340{
 341	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
 342				   protocol_features);
 343}
 344
 345static void vhost_user_reply(struct virtio_uml_device *vu_dev,
 346			     struct vhost_user_msg *msg, int response)
 347{
 348	struct vhost_user_msg reply = {
 349		.payload.integer = response,
 350	};
 351	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
 352	int rc;
 353
 354	reply.header = msg->header;
 355	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
 356	reply.header.flags |= VHOST_USER_FLAG_REPLY;
 357	reply.header.size = sizeof(reply.payload.integer);
 358
 359	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
 360
 361	if (rc)
 362		vu_err(vu_dev,
 363		       "sending reply to slave request failed: %d (size %zu)\n",
 364		       rc, size);
 365}
 366
 367static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
 368				       struct time_travel_event *ev)
 369{
 370	struct virtqueue *vq;
 371	int response = 1;
 372	struct {
 373		struct vhost_user_msg msg;
 374		u8 extra_payload[512];
 375	} msg;
 376	int rc;
 377	irqreturn_t irq_rc = IRQ_NONE;
 378
 379	while (1) {
 380		rc = vhost_user_recv_req(vu_dev, &msg.msg,
 381					 sizeof(msg.msg.payload) +
 382					 sizeof(msg.extra_payload));
 383		if (rc)
 384			break;
 385
 386		switch (msg.msg.header.request) {
 387		case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
 388			vu_dev->config_changed_irq = true;
 389			response = 0;
 390			break;
 391		case VHOST_USER_SLAVE_VRING_CALL:
 392			virtio_device_for_each_vq((&vu_dev->vdev), vq) {
 393				if (vq->index == msg.msg.payload.vring_state.index) {
 394					response = 0;
 395					vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
 396					break;
 397				}
 398			}
 399			break;
 400		case VHOST_USER_SLAVE_IOTLB_MSG:
 401			/* not supported - VIRTIO_F_ACCESS_PLATFORM */
 402		case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
 403			/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
 404		default:
 405			vu_err(vu_dev, "unexpected slave request %d\n",
 406			       msg.msg.header.request);
 407		}
 408
 409		if (ev && !vu_dev->suspended)
 410			time_travel_add_irq_event(ev);
 411
 412		if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
 413			vhost_user_reply(vu_dev, &msg.msg, response);
 414		irq_rc = IRQ_HANDLED;
 415	};
 416	/* mask EAGAIN as we try non-blocking read until socket is empty */
 417	vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc;
 418	return irq_rc;
 419}
 420
 421static irqreturn_t vu_req_interrupt(int irq, void *data)
 422{
 423	struct virtio_uml_device *vu_dev = data;
 424	irqreturn_t ret = IRQ_HANDLED;
 425
 426	if (!um_irq_timetravel_handler_used())
 427		ret = vu_req_read_message(vu_dev, NULL);
 428
 429	if (vu_dev->recv_rc) {
 430		vhost_user_check_reset(vu_dev, vu_dev->recv_rc);
 431	} else if (vu_dev->vq_irq_vq_map) {
 432		struct virtqueue *vq;
 433
 434		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
 435			if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
 436				vring_interrupt(0 /* ignored */, vq);
 437		}
 438		vu_dev->vq_irq_vq_map = 0;
 439	} else if (vu_dev->config_changed_irq) {
 440		virtio_config_changed(&vu_dev->vdev);
 441		vu_dev->config_changed_irq = false;
 
 
 
 
 
 
 
 
 442	}
 443
 444	return ret;
 445}
 446
 447static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
 448					  struct time_travel_event *ev)
 449{
 450	vu_req_read_message(data, ev);
 451}
 452
 453static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
 454{
 455	int rc, req_fds[2];
 456
 457	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
 458	rc = os_pipe(req_fds, true, true);
 459	if (rc < 0)
 460		return rc;
 461	vu_dev->req_fd = req_fds[0];
 462
 463	rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
 464			       vu_req_interrupt, IRQF_SHARED,
 465			       vu_dev->pdev->name, vu_dev,
 466			       vu_req_interrupt_comm_handler);
 467	if (rc < 0)
 468		goto err_close;
 469
 470	vu_dev->irq = rc;
 471
 472	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
 473					   req_fds[1]);
 474	if (rc)
 475		goto err_free_irq;
 476
 477	goto out;
 478
 479err_free_irq:
 480	um_free_irq(vu_dev->irq, vu_dev);
 481err_close:
 482	os_close_file(req_fds[0]);
 483out:
 484	/* Close unused write end of request fds */
 485	os_close_file(req_fds[1]);
 486	return rc;
 487}
 488
 489static int vhost_user_init(struct virtio_uml_device *vu_dev)
 490{
 491	int rc = vhost_user_set_owner(vu_dev);
 492
 493	if (rc)
 494		return rc;
 495	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
 496	if (rc)
 497		return rc;
 498
 499	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
 500		rc = vhost_user_get_protocol_features(vu_dev,
 501				&vu_dev->protocol_features);
 502		if (rc)
 503			return rc;
 504		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
 505		rc = vhost_user_set_protocol_features(vu_dev,
 506				vu_dev->protocol_features);
 507		if (rc)
 508			return rc;
 509	}
 510
 511	if (vu_dev->protocol_features &
 512			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
 513		rc = vhost_user_init_slave_req(vu_dev);
 514		if (rc)
 515			return rc;
 516	}
 517
 518	return 0;
 519}
 520
 521static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
 522				  u32 offset, void *buf, u32 len)
 523{
 524	u32 cfg_size = offset + len;
 525	struct vhost_user_msg *msg;
 526	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
 527	size_t msg_size = sizeof(msg->header) + payload_size;
 528	int rc;
 529
 530	if (!(vu_dev->protocol_features &
 531	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
 532		return;
 533
 534	msg = kzalloc(msg_size, GFP_KERNEL);
 535	if (!msg)
 536		return;
 537	msg->header.request = VHOST_USER_GET_CONFIG;
 538	msg->header.size = payload_size;
 539	msg->payload.config.offset = 0;
 540	msg->payload.config.size = cfg_size;
 541
 542	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
 543	if (rc) {
 544		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
 545		       rc);
 546		goto free;
 547	}
 548
 549	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
 550	if (rc) {
 551		vu_err(vu_dev,
 552		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
 553		       rc);
 554		goto free;
 555	}
 556
 557	if (msg->header.size != payload_size ||
 558	    msg->payload.config.size != cfg_size) {
 559		rc = -EPROTO;
 560		vu_err(vu_dev,
 561		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
 562		       msg->header.size, payload_size,
 563		       msg->payload.config.size, cfg_size);
 564		goto free;
 565	}
 566	memcpy(buf, msg->payload.config.payload + offset, len);
 567
 568free:
 569	kfree(msg);
 570}
 571
 572static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
 573				  u32 offset, const void *buf, u32 len)
 574{
 575	struct vhost_user_msg *msg;
 576	size_t payload_size = sizeof(msg->payload.config) + len;
 577	size_t msg_size = sizeof(msg->header) + payload_size;
 578	int rc;
 579
 580	if (!(vu_dev->protocol_features &
 581	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
 582		return;
 583
 584	msg = kzalloc(msg_size, GFP_KERNEL);
 585	if (!msg)
 586		return;
 587	msg->header.request = VHOST_USER_SET_CONFIG;
 588	msg->header.size = payload_size;
 589	msg->payload.config.offset = offset;
 590	msg->payload.config.size = len;
 591	memcpy(msg->payload.config.payload, buf, len);
 592
 593	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
 594	if (rc)
 595		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
 596		       rc);
 597
 598	kfree(msg);
 599}
 600
 601static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
 602				      struct vhost_user_mem_region *region_out)
 603{
 604	unsigned long long mem_offset;
 605	int rc = phys_mapping(addr, &mem_offset);
 606
 607	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
 608		return -EFAULT;
 609	*fd_out = rc;
 610	region_out->guest_addr = addr;
 611	region_out->user_addr = addr;
 612	region_out->size = size;
 613	region_out->mmap_offset = mem_offset;
 614
 615	/* Ensure mapping is valid for the entire region */
 616	rc = phys_mapping(addr + size - 1, &mem_offset);
 617	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
 618		 addr + size - 1, rc, *fd_out))
 619		return -EFAULT;
 620	return 0;
 621}
 622
 623static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
 624{
 625	struct vhost_user_msg msg = {
 626		.header.request = VHOST_USER_SET_MEM_TABLE,
 627		.header.size = sizeof(msg.payload.mem_regions),
 628		.payload.mem_regions.num = 1,
 629	};
 630	unsigned long reserved = uml_reserved - uml_physmem;
 631	int fds[2];
 632	int rc;
 633
 634	/*
 635	 * This is a bit tricky, see also the comment with setup_physmem().
 636	 *
 637	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
 638	 * but the code and data we *already* have is omitted. To us, this
 639	 * is no difference, since they both become part of our address
 640	 * space and memory consumption. To somebody looking in from the
 641	 * outside, however, it is different because the part of our memory
 642	 * consumption that's already part of the binary (code/data) is not
 643	 * mapped from the file, so it's not visible to another mmap from
 644	 * the file descriptor.
 645	 *
 646	 * Thus, don't advertise this space to the vhost-user slave. This
 647	 * means that the slave will likely abort or similar when we give
 648	 * it an address from the hidden range, since it's not marked as
 649	 * a valid address, but at least that way we detect the issue and
 650	 * don't just have the slave read an all-zeroes buffer from the
 651	 * shared memory file, or write something there that we can never
 652	 * see (depending on the direction of the virtqueue traffic.)
 653	 *
 654	 * Since we usually don't want to use .text for virtio buffers,
 655	 * this effectively means that you cannot use
 656	 *  1) global variables, which are in the .bss and not in the shm
 657	 *     file-backed memory
 658	 *  2) the stack in some processes, depending on where they have
 659	 *     their stack (or maybe only no interrupt stack?)
 660	 *
 661	 * The stack is already not typically valid for DMA, so this isn't
 662	 * much of a restriction, but global variables might be encountered.
 663	 *
 664	 * It might be possible to fix it by copying around the data that's
 665	 * between bss_start and where we map the file now, but it's not
 666	 * something that you typically encounter with virtio drivers, so
 667	 * it didn't seem worthwhile.
 668	 */
 669	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
 670					&fds[0],
 671					&msg.payload.mem_regions.regions[0]);
 672
 673	if (rc < 0)
 674		return rc;
 675	if (highmem) {
 676		msg.payload.mem_regions.num++;
 677		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
 678				&fds[1], &msg.payload.mem_regions.regions[1]);
 679		if (rc < 0)
 680			return rc;
 681	}
 682
 683	return vhost_user_send(vu_dev, false, &msg, fds,
 684			       msg.payload.mem_regions.num);
 685}
 686
 687static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
 688				      u32 request, u32 index, u32 num)
 689{
 690	struct vhost_user_msg msg = {
 691		.header.request = request,
 692		.header.size = sizeof(msg.payload.vring_state),
 693		.payload.vring_state.index = index,
 694		.payload.vring_state.num = num,
 695	};
 696
 697	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 698}
 699
 700static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
 701				    u32 index, u32 num)
 702{
 703	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
 704					  index, num);
 705}
 706
 707static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
 708				     u32 index, u32 offset)
 709{
 710	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
 711					  index, offset);
 712}
 713
 714static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
 715				     u32 index, u64 desc, u64 used, u64 avail,
 716				     u64 log)
 717{
 718	struct vhost_user_msg msg = {
 719		.header.request = VHOST_USER_SET_VRING_ADDR,
 720		.header.size = sizeof(msg.payload.vring_addr),
 721		.payload.vring_addr.index = index,
 722		.payload.vring_addr.desc = desc,
 723		.payload.vring_addr.used = used,
 724		.payload.vring_addr.avail = avail,
 725		.payload.vring_addr.log = log,
 726	};
 727
 728	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 729}
 730
 731static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
 732				   u32 request, int index, int fd)
 733{
 734	struct vhost_user_msg msg = {
 735		.header.request = request,
 736		.header.size = sizeof(msg.payload.integer),
 737		.payload.integer = index,
 738	};
 739
 740	if (index & ~VHOST_USER_VRING_INDEX_MASK)
 741		return -EINVAL;
 742	if (fd < 0) {
 743		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
 744		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 745	}
 746	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
 747}
 748
 749static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
 750				     int index, int fd)
 751{
 752	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
 753				       index, fd);
 754}
 755
 756static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
 757				     int index, int fd)
 758{
 759	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
 760				       index, fd);
 761}
 762
 763static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
 764				       u32 index, bool enable)
 765{
 766	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
 767		return 0;
 768
 769	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
 770					  index, enable);
 771}
 772
 773
 774/* Virtio interface */
 775
 776static bool vu_notify(struct virtqueue *vq)
 777{
 778	struct virtio_uml_vq_info *info = vq->priv;
 779	const uint64_t n = 1;
 780	int rc;
 781
 782	if (info->suspended)
 783		return true;
 784
 785	time_travel_propagate_time();
 786
 787	if (info->kick_fd < 0) {
 788		struct virtio_uml_device *vu_dev;
 789
 790		vu_dev = to_virtio_uml_device(vq->vdev);
 791
 792		return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
 793						  vq->index, 0) == 0;
 794	}
 795
 796	do {
 797		rc = os_write_file(info->kick_fd, &n, sizeof(n));
 798	} while (rc == -EINTR);
 799	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
 800}
 801
 802static irqreturn_t vu_interrupt(int irq, void *opaque)
 803{
 804	struct virtqueue *vq = opaque;
 805	struct virtio_uml_vq_info *info = vq->priv;
 806	uint64_t n;
 807	int rc;
 808	irqreturn_t ret = IRQ_NONE;
 809
 810	do {
 811		rc = os_read_file(info->call_fd, &n, sizeof(n));
 812		if (rc == sizeof(n))
 813			ret |= vring_interrupt(irq, vq);
 814	} while (rc == sizeof(n) || rc == -EINTR);
 815	WARN(rc != -EAGAIN, "read returned %d\n", rc);
 816	return ret;
 817}
 818
 819
 820static void vu_get(struct virtio_device *vdev, unsigned offset,
 821		   void *buf, unsigned len)
 822{
 823	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 824
 825	vhost_user_get_config(vu_dev, offset, buf, len);
 826}
 827
 828static void vu_set(struct virtio_device *vdev, unsigned offset,
 829		   const void *buf, unsigned len)
 830{
 831	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 832
 833	vhost_user_set_config(vu_dev, offset, buf, len);
 834}
 835
 836static u8 vu_get_status(struct virtio_device *vdev)
 837{
 838	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 839
 840	return vu_dev->status;
 841}
 842
 843static void vu_set_status(struct virtio_device *vdev, u8 status)
 844{
 845	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 846
 847	vu_dev->status = status;
 848}
 849
 850static void vu_reset(struct virtio_device *vdev)
 851{
 852	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 853
 854	vu_dev->status = 0;
 855}
 856
 857static void vu_del_vq(struct virtqueue *vq)
 858{
 859	struct virtio_uml_vq_info *info = vq->priv;
 860
 861	if (info->call_fd >= 0) {
 862		struct virtio_uml_device *vu_dev;
 863
 864		vu_dev = to_virtio_uml_device(vq->vdev);
 865
 866		um_free_irq(vu_dev->irq, vq);
 867		os_close_file(info->call_fd);
 868	}
 869
 870	if (info->kick_fd >= 0)
 871		os_close_file(info->kick_fd);
 872
 873	vring_del_virtqueue(vq);
 874	kfree(info);
 875}
 876
 877static void vu_del_vqs(struct virtio_device *vdev)
 878{
 879	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 880	struct virtqueue *vq, *n;
 881	u64 features;
 882
 883	/* Note: reverse order as a workaround to a decoding bug in snabb */
 884	list_for_each_entry_reverse(vq, &vdev->vqs, list)
 885		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
 886
 887	/* Ensure previous messages have been processed */
 888	WARN_ON(vhost_user_get_features(vu_dev, &features));
 889
 890	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
 891		vu_del_vq(vq);
 892}
 893
 894static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
 895			       struct virtqueue *vq)
 896{
 897	struct virtio_uml_vq_info *info = vq->priv;
 898	int call_fds[2];
 899	int rc;
 900
 901	/* no call FD needed/desired in this case */
 902	if (vu_dev->protocol_features &
 903			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
 904	    vu_dev->protocol_features &
 905			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
 906		info->call_fd = -1;
 907		return 0;
 908	}
 909
 910	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
 911	rc = os_pipe(call_fds, true, true);
 912	if (rc < 0)
 913		return rc;
 914
 915	info->call_fd = call_fds[0];
 916	rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
 917			    vu_interrupt, IRQF_SHARED, info->name, vq);
 918	if (rc < 0)
 919		goto close_both;
 920
 921	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
 922	if (rc)
 923		goto release_irq;
 924
 925	goto out;
 926
 927release_irq:
 928	um_free_irq(vu_dev->irq, vq);
 929close_both:
 930	os_close_file(call_fds[0]);
 931out:
 932	/* Close (unused) write end of call fds */
 933	os_close_file(call_fds[1]);
 934
 935	return rc;
 936}
 937
 938static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
 939				     unsigned index, vq_callback_t *callback,
 940				     const char *name, bool ctx)
 941{
 942	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 943	struct platform_device *pdev = vu_dev->pdev;
 944	struct virtio_uml_vq_info *info;
 945	struct virtqueue *vq;
 946	int num = MAX_SUPPORTED_QUEUE_SIZE;
 947	int rc;
 948
 949	info = kzalloc(sizeof(*info), GFP_KERNEL);
 950	if (!info) {
 951		rc = -ENOMEM;
 952		goto error_kzalloc;
 953	}
 954	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
 955		 pdev->id, name);
 956
 957	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
 958				    ctx, vu_notify, callback, info->name);
 959	if (!vq) {
 960		rc = -ENOMEM;
 961		goto error_create;
 962	}
 963	vq->priv = info;
 964	vq->num_max = num;
 965	num = virtqueue_get_vring_size(vq);
 966
 967	if (vu_dev->protocol_features &
 968			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
 969		info->kick_fd = -1;
 970	} else {
 971		rc = os_eventfd(0, 0);
 972		if (rc < 0)
 973			goto error_kick;
 974		info->kick_fd = rc;
 975	}
 976
 977	rc = vu_setup_vq_call_fd(vu_dev, vq);
 978	if (rc)
 979		goto error_call;
 980
 981	rc = vhost_user_set_vring_num(vu_dev, index, num);
 982	if (rc)
 983		goto error_setup;
 984
 985	rc = vhost_user_set_vring_base(vu_dev, index, 0);
 986	if (rc)
 987		goto error_setup;
 988
 989	rc = vhost_user_set_vring_addr(vu_dev, index,
 990				       virtqueue_get_desc_addr(vq),
 991				       virtqueue_get_used_addr(vq),
 992				       virtqueue_get_avail_addr(vq),
 993				       (u64) -1);
 994	if (rc)
 995		goto error_setup;
 996
 997	return vq;
 998
 999error_setup:
1000	if (info->call_fd >= 0) {
1001		um_free_irq(vu_dev->irq, vq);
1002		os_close_file(info->call_fd);
1003	}
1004error_call:
1005	if (info->kick_fd >= 0)
1006		os_close_file(info->kick_fd);
1007error_kick:
1008	vring_del_virtqueue(vq);
1009error_create:
1010	kfree(info);
1011error_kzalloc:
1012	return ERR_PTR(rc);
1013}
1014
1015static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
1016		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
1017		       const char * const names[], const bool *ctx,
1018		       struct irq_affinity *desc)
1019{
1020	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1021	int i, queue_idx = 0, rc;
1022	struct virtqueue *vq;
1023
1024	/* not supported for now */
1025	if (WARN_ON(nvqs > 64))
1026		return -EINVAL;
1027
1028	rc = vhost_user_set_mem_table(vu_dev);
1029	if (rc)
1030		return rc;
1031
1032	for (i = 0; i < nvqs; ++i) {
1033		if (!names[i]) {
1034			vqs[i] = NULL;
1035			continue;
1036		}
1037
1038		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1039				     ctx ? ctx[i] : false);
1040		if (IS_ERR(vqs[i])) {
1041			rc = PTR_ERR(vqs[i]);
1042			goto error_setup;
1043		}
1044	}
1045
1046	list_for_each_entry(vq, &vdev->vqs, list) {
1047		struct virtio_uml_vq_info *info = vq->priv;
1048
1049		if (info->kick_fd >= 0) {
1050			rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1051						       info->kick_fd);
1052			if (rc)
1053				goto error_setup;
1054		}
1055
1056		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1057		if (rc)
1058			goto error_setup;
1059	}
1060
1061	return 0;
1062
1063error_setup:
1064	vu_del_vqs(vdev);
1065	return rc;
1066}
1067
1068static u64 vu_get_features(struct virtio_device *vdev)
1069{
1070	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1071
1072	return vu_dev->features;
1073}
1074
1075static int vu_finalize_features(struct virtio_device *vdev)
1076{
1077	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1078	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1079
1080	vring_transport_features(vdev);
1081	vu_dev->features = vdev->features | supported;
1082
1083	return vhost_user_set_features(vu_dev, vu_dev->features);
1084}
1085
1086static const char *vu_bus_name(struct virtio_device *vdev)
1087{
1088	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1089
1090	return vu_dev->pdev->name;
1091}
1092
1093static const struct virtio_config_ops virtio_uml_config_ops = {
1094	.get = vu_get,
1095	.set = vu_set,
1096	.get_status = vu_get_status,
1097	.set_status = vu_set_status,
1098	.reset = vu_reset,
1099	.find_vqs = vu_find_vqs,
1100	.del_vqs = vu_del_vqs,
1101	.get_features = vu_get_features,
1102	.finalize_features = vu_finalize_features,
1103	.bus_name = vu_bus_name,
1104};
1105
1106static void virtio_uml_release_dev(struct device *d)
1107{
1108	struct virtio_device *vdev =
1109			container_of(d, struct virtio_device, dev);
1110	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1111
1112	time_travel_propagate_time();
1113
1114	/* might not have been opened due to not negotiating the feature */
1115	if (vu_dev->req_fd >= 0) {
1116		um_free_irq(vu_dev->irq, vu_dev);
1117		os_close_file(vu_dev->req_fd);
1118	}
1119
1120	os_close_file(vu_dev->sock);
1121	kfree(vu_dev);
1122}
1123
1124void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
1125				  bool no_vq_suspend)
1126{
1127	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1128
1129	if (WARN_ON(vdev->config != &virtio_uml_config_ops))
1130		return;
1131
1132	vu_dev->no_vq_suspend = no_vq_suspend;
1133	dev_info(&vdev->dev, "%sabled VQ suspend\n",
1134		 no_vq_suspend ? "dis" : "en");
1135}
1136
1137static void vu_of_conn_broken(struct work_struct *wk)
1138{
1139	/*
1140	 * We can't remove the device from the devicetree so the only thing we
1141	 * can do is warn.
1142	 */
1143	WARN_ON(1);
1144}
1145
1146/* Platform device */
1147
1148static struct virtio_uml_platform_data *
1149virtio_uml_create_pdata(struct platform_device *pdev)
1150{
1151	struct device_node *np = pdev->dev.of_node;
1152	struct virtio_uml_platform_data *pdata;
1153	int ret;
1154
1155	if (!np)
1156		return ERR_PTR(-EINVAL);
1157
1158	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
1159	if (!pdata)
1160		return ERR_PTR(-ENOMEM);
1161
1162	INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken);
1163	pdata->pdev = pdev;
1164
1165	ret = of_property_read_string(np, "socket-path", &pdata->socket_path);
1166	if (ret)
1167		return ERR_PTR(ret);
1168
1169	ret = of_property_read_u32(np, "virtio-device-id",
1170				   &pdata->virtio_device_id);
1171	if (ret)
1172		return ERR_PTR(ret);
1173
1174	return pdata;
1175}
1176
1177static int virtio_uml_probe(struct platform_device *pdev)
1178{
1179	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1180	struct virtio_uml_device *vu_dev;
1181	int rc;
1182
1183	if (!pdata) {
1184		pdata = virtio_uml_create_pdata(pdev);
1185		if (IS_ERR(pdata))
1186			return PTR_ERR(pdata);
1187	}
1188
1189	vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
1190	if (!vu_dev)
1191		return -ENOMEM;
1192
1193	vu_dev->pdata = pdata;
1194	vu_dev->vdev.dev.parent = &pdev->dev;
1195	vu_dev->vdev.dev.release = virtio_uml_release_dev;
1196	vu_dev->vdev.config = &virtio_uml_config_ops;
1197	vu_dev->vdev.id.device = pdata->virtio_device_id;
1198	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1199	vu_dev->pdev = pdev;
1200	vu_dev->req_fd = -1;
1201
1202	time_travel_propagate_time();
1203
1204	do {
1205		rc = os_connect_socket(pdata->socket_path);
1206	} while (rc == -EINTR);
1207	if (rc < 0)
1208		goto error_free;
1209	vu_dev->sock = rc;
1210
1211	spin_lock_init(&vu_dev->sock_lock);
1212
1213	rc = vhost_user_init(vu_dev);
1214	if (rc)
1215		goto error_init;
1216
1217	platform_set_drvdata(pdev, vu_dev);
1218
1219	device_set_wakeup_capable(&vu_dev->vdev.dev, true);
1220
1221	rc = register_virtio_device(&vu_dev->vdev);
1222	if (rc)
1223		put_device(&vu_dev->vdev.dev);
1224	vu_dev->registered = 1;
1225	return rc;
1226
1227error_init:
1228	os_close_file(vu_dev->sock);
1229error_free:
1230	kfree(vu_dev);
1231	return rc;
1232}
1233
1234static int virtio_uml_remove(struct platform_device *pdev)
1235{
1236	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1237
1238	unregister_virtio_device(&vu_dev->vdev);
1239	return 0;
1240}
1241
1242/* Command line device list */
1243
1244static void vu_cmdline_release_dev(struct device *d)
1245{
1246}
1247
1248static struct device vu_cmdline_parent = {
1249	.init_name = "virtio-uml-cmdline",
1250	.release = vu_cmdline_release_dev,
1251};
1252
1253static bool vu_cmdline_parent_registered;
1254static int vu_cmdline_id;
1255
1256static int vu_unregister_cmdline_device(struct device *dev, void *data)
1257{
1258	struct platform_device *pdev = to_platform_device(dev);
1259	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1260
1261	kfree(pdata->socket_path);
1262	platform_device_unregister(pdev);
1263	return 0;
1264}
1265
1266static void vu_conn_broken(struct work_struct *wk)
1267{
1268	struct virtio_uml_platform_data *pdata;
1269
1270	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1271	vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1272}
1273
1274static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1275{
1276	const char *ids = strchr(device, ':');
1277	unsigned int virtio_device_id;
1278	int processed, consumed, err;
1279	char *socket_path;
1280	struct virtio_uml_platform_data pdata, *ppdata;
1281	struct platform_device *pdev;
1282
1283	if (!ids || ids == device)
1284		return -EINVAL;
1285
1286	processed = sscanf(ids, ":%u%n:%d%n",
1287			   &virtio_device_id, &consumed,
1288			   &vu_cmdline_id, &consumed);
1289
1290	if (processed < 1 || ids[consumed])
1291		return -EINVAL;
1292
1293	if (!vu_cmdline_parent_registered) {
1294		err = device_register(&vu_cmdline_parent);
1295		if (err) {
1296			pr_err("Failed to register parent device!\n");
1297			put_device(&vu_cmdline_parent);
1298			return err;
1299		}
1300		vu_cmdline_parent_registered = true;
1301	}
1302
1303	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1304	if (!socket_path)
1305		return -ENOMEM;
1306
1307	pdata.virtio_device_id = (u32) virtio_device_id;
1308	pdata.socket_path = socket_path;
1309
1310	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1311		vu_cmdline_id, virtio_device_id, socket_path);
1312
1313	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1314					     vu_cmdline_id++, &pdata,
1315					     sizeof(pdata));
1316	err = PTR_ERR_OR_ZERO(pdev);
1317	if (err)
1318		goto free;
1319
1320	ppdata = pdev->dev.platform_data;
1321	ppdata->pdev = pdev;
1322	INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1323
1324	return 0;
1325
1326free:
1327	kfree(socket_path);
1328	return err;
1329}
1330
1331static int vu_cmdline_get_device(struct device *dev, void *data)
1332{
1333	struct platform_device *pdev = to_platform_device(dev);
1334	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1335	char *buffer = data;
1336	unsigned int len = strlen(buffer);
1337
1338	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1339		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1340	return 0;
1341}
1342
1343static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1344{
1345	buffer[0] = '\0';
1346	if (vu_cmdline_parent_registered)
1347		device_for_each_child(&vu_cmdline_parent, buffer,
1348				      vu_cmdline_get_device);
1349	return strlen(buffer) + 1;
1350}
1351
1352static const struct kernel_param_ops vu_cmdline_param_ops = {
1353	.set = vu_cmdline_set,
1354	.get = vu_cmdline_get,
1355};
1356
1357device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1358__uml_help(vu_cmdline_param_ops,
1359"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1360"    Configure a virtio device over a vhost-user socket.\n"
1361"    See virtio_ids.h for a list of possible virtio device id values.\n"
1362"    Optionally use a specific platform_device id.\n\n"
1363);
1364
1365
 
 
 
 
 
 
 
 
 
 
1366static void vu_unregister_cmdline_devices(void)
1367{
1368	if (vu_cmdline_parent_registered) {
1369		device_for_each_child(&vu_cmdline_parent, NULL,
1370				      vu_unregister_cmdline_device);
1371		device_unregister(&vu_cmdline_parent);
1372		vu_cmdline_parent_registered = false;
1373	}
1374}
1375
1376/* Platform driver */
1377
1378static const struct of_device_id virtio_uml_match[] = {
1379	{ .compatible = "virtio,uml", },
1380	{ }
1381};
1382MODULE_DEVICE_TABLE(of, virtio_uml_match);
1383
1384static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
1385{
1386	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1387
1388	if (!vu_dev->no_vq_suspend) {
1389		struct virtqueue *vq;
1390
1391		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1392			struct virtio_uml_vq_info *info = vq->priv;
1393
1394			info->suspended = true;
1395			vhost_user_set_vring_enable(vu_dev, vq->index, false);
1396		}
1397	}
1398
1399	if (!device_may_wakeup(&vu_dev->vdev.dev)) {
1400		vu_dev->suspended = true;
1401		return 0;
1402	}
1403
1404	return irq_set_irq_wake(vu_dev->irq, 1);
1405}
1406
1407static int virtio_uml_resume(struct platform_device *pdev)
1408{
1409	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1410
1411	if (!vu_dev->no_vq_suspend) {
1412		struct virtqueue *vq;
1413
1414		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1415			struct virtio_uml_vq_info *info = vq->priv;
1416
1417			info->suspended = false;
1418			vhost_user_set_vring_enable(vu_dev, vq->index, true);
1419		}
1420	}
1421
1422	vu_dev->suspended = false;
1423
1424	if (!device_may_wakeup(&vu_dev->vdev.dev))
1425		return 0;
1426
1427	return irq_set_irq_wake(vu_dev->irq, 0);
1428}
1429
1430static struct platform_driver virtio_uml_driver = {
1431	.probe = virtio_uml_probe,
1432	.remove = virtio_uml_remove,
1433	.driver = {
1434		.name = "virtio-uml",
1435		.of_match_table = virtio_uml_match,
1436	},
1437	.suspend = virtio_uml_suspend,
1438	.resume = virtio_uml_resume,
1439};
1440
1441static int __init virtio_uml_init(void)
1442{
1443	return platform_driver_register(&virtio_uml_driver);
1444}
1445
1446static void __exit virtio_uml_exit(void)
1447{
1448	platform_driver_unregister(&virtio_uml_driver);
1449	vu_unregister_cmdline_devices();
1450}
1451
1452module_init(virtio_uml_init);
1453module_exit(virtio_uml_exit);
1454__uml_exitcall(virtio_uml_exit);
1455
1456MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1457MODULE_LICENSE("GPL");
v5.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Virtio vhost-user driver
   4 *
   5 * Copyright(c) 2019 Intel Corporation
   6 *
   7 * This module allows virtio devices to be used over a vhost-user socket.
   8 *
   9 * Guest devices can be instantiated by kernel module or command line
  10 * parameters. One device will be created for each parameter. Syntax:
  11 *
  12 *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
  13 * where:
  14 *		<socket>	:= vhost-user socket path to connect
  15 *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
  16 *		<platform_id>	:= (optional) platform device id
  17 *
  18 * example:
  19 *		virtio_uml.device=/var/uml.socket:1
  20 *
  21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
  22 */
  23#include <linux/module.h>
 
  24#include <linux/platform_device.h>
  25#include <linux/slab.h>
  26#include <linux/virtio.h>
  27#include <linux/virtio_config.h>
  28#include <linux/virtio_ring.h>
 
 
  29#include <shared/as-layout.h>
  30#include <irq_kern.h>
  31#include <init.h>
  32#include <os.h>
  33#include "vhost_user.h"
  34
  35/* Workaround due to a conflict between irq_user.h and irqreturn.h */
  36#ifdef IRQ_NONE
  37#undef IRQ_NONE
  38#endif
  39
  40#define MAX_SUPPORTED_QUEUE_SIZE	256
  41
  42#define to_virtio_uml_device(_vdev) \
  43	container_of(_vdev, struct virtio_uml_device, vdev)
  44
 
 
 
 
 
 
 
  45struct virtio_uml_device {
  46	struct virtio_device vdev;
  47	struct platform_device *pdev;
 
  48
  49	int sock, req_fd;
 
  50	u64 features;
  51	u64 protocol_features;
  52	u8 status;
 
 
 
 
 
 
 
  53};
  54
  55struct virtio_uml_vq_info {
  56	int kick_fd, call_fd;
  57	char name[32];
 
  58};
  59
  60extern unsigned long long physmem_size, highmem;
  61
  62#define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, __VA_ARGS__)
  63
  64/* Vhost-user protocol */
  65
  66static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
  67			    const int *fds, unsigned int fds_num)
  68{
  69	int rc;
  70
  71	do {
  72		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
  73		if (rc > 0) {
  74			buf += rc;
  75			len -= rc;
  76			fds = NULL;
  77			fds_num = 0;
  78		}
  79	} while (len && (rc >= 0 || rc == -EINTR));
  80
  81	if (rc < 0)
  82		return rc;
  83	return 0;
  84}
  85
  86static int full_read(int fd, void *buf, int len)
  87{
  88	int rc;
  89
 
 
 
  90	do {
  91		rc = os_read_file(fd, buf, len);
  92		if (rc > 0) {
  93			buf += rc;
  94			len -= rc;
  95		}
  96	} while (len && (rc > 0 || rc == -EINTR));
  97
  98	if (rc < 0)
  99		return rc;
 100	if (rc == 0)
 101		return -ECONNRESET;
 102	return 0;
 103}
 104
 105static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
 106{
 107	return full_read(fd, msg, sizeof(msg->header));
 108}
 109
 110static int vhost_user_recv(int fd, struct vhost_user_msg *msg,
 111			   size_t max_payload_size)
 
 112{
 113	size_t size;
 114	int rc = vhost_user_recv_header(fd, msg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 115
 116	if (rc)
 117		return rc;
 118	size = msg->header.size;
 119	if (size > max_payload_size)
 120		return -EPROTO;
 121	return full_read(fd, &msg->payload, size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 122}
 123
 124static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
 125				struct vhost_user_msg *msg,
 126				size_t max_payload_size)
 127{
 128	int rc = vhost_user_recv(vu_dev->sock, msg, max_payload_size);
 
 129
 130	if (rc)
 
 131		return rc;
 
 132
 133	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
 134		return -EPROTO;
 135
 136	return 0;
 137}
 138
 139static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
 140			       u64 *value)
 141{
 142	struct vhost_user_msg msg;
 143	int rc = vhost_user_recv_resp(vu_dev, &msg,
 144				      sizeof(msg.payload.integer));
 145
 146	if (rc)
 147		return rc;
 148	if (msg.header.size != sizeof(msg.payload.integer))
 149		return -EPROTO;
 150	*value = msg.payload.integer;
 151	return 0;
 152}
 153
 154static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
 155			       struct vhost_user_msg *msg,
 156			       size_t max_payload_size)
 157{
 158	int rc = vhost_user_recv(vu_dev->req_fd, msg, max_payload_size);
 
 159
 160	if (rc)
 161		return rc;
 162
 163	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
 164			VHOST_USER_VERSION)
 165		return -EPROTO;
 166
 167	return 0;
 168}
 169
 170static int vhost_user_send(struct virtio_uml_device *vu_dev,
 171			   bool need_response, struct vhost_user_msg *msg,
 172			   int *fds, size_t num_fds)
 173{
 174	size_t size = sizeof(msg->header) + msg->header.size;
 
 175	bool request_ack;
 176	int rc;
 177
 178	msg->header.flags |= VHOST_USER_VERSION;
 179
 180	/*
 181	 * The need_response flag indicates that we already need a response,
 182	 * e.g. to read the features. In these cases, don't request an ACK as
 183	 * it is meaningless. Also request an ACK only if supported.
 184	 */
 185	request_ack = !need_response;
 186	if (!(vu_dev->protocol_features &
 187			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
 188		request_ack = false;
 189
 190	if (request_ack)
 191		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
 192
 
 193	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
 194	if (rc < 0)
 195		return rc;
 196
 197	if (request_ack) {
 198		uint64_t status;
 199
 200		rc = vhost_user_recv_u64(vu_dev, &status);
 201		if (rc)
 202			return rc;
 203
 204		if (status) {
 205			vu_err(vu_dev, "slave reports error: %llu\n", status);
 206			return -EIO;
 
 207		}
 208	}
 209
 210	return 0;
 
 
 211}
 212
 213static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
 214				      bool need_response, u32 request)
 215{
 216	struct vhost_user_msg msg = {
 217		.header.request = request,
 218	};
 219
 220	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
 221}
 222
 223static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
 224					 u32 request, int fd)
 225{
 226	struct vhost_user_msg msg = {
 227		.header.request = request,
 228	};
 229
 230	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
 231}
 232
 233static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
 234			       u32 request, u64 value)
 235{
 236	struct vhost_user_msg msg = {
 237		.header.request = request,
 238		.header.size = sizeof(msg.payload.integer),
 239		.payload.integer = value,
 240	};
 241
 242	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 243}
 244
 245static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
 246{
 247	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
 248}
 249
 250static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
 251				   u64 *features)
 252{
 253	int rc = vhost_user_send_no_payload(vu_dev, true,
 254					    VHOST_USER_GET_FEATURES);
 255
 256	if (rc)
 257		return rc;
 258	return vhost_user_recv_u64(vu_dev, features);
 259}
 260
 261static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
 262				   u64 features)
 263{
 264	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
 265}
 266
 267static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
 268					    u64 *protocol_features)
 269{
 270	int rc = vhost_user_send_no_payload(vu_dev, true,
 271			VHOST_USER_GET_PROTOCOL_FEATURES);
 272
 273	if (rc)
 274		return rc;
 275	return vhost_user_recv_u64(vu_dev, protocol_features);
 276}
 277
 278static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
 279					    u64 protocol_features)
 280{
 281	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
 282				   protocol_features);
 283}
 284
 285static void vhost_user_reply(struct virtio_uml_device *vu_dev,
 286			     struct vhost_user_msg *msg, int response)
 287{
 288	struct vhost_user_msg reply = {
 289		.payload.integer = response,
 290	};
 291	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
 292	int rc;
 293
 294	reply.header = msg->header;
 295	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
 296	reply.header.flags |= VHOST_USER_FLAG_REPLY;
 297	reply.header.size = sizeof(reply.payload.integer);
 298
 299	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
 300
 301	if (rc)
 302		vu_err(vu_dev,
 303		       "sending reply to slave request failed: %d (size %zu)\n",
 304		       rc, size);
 305}
 306
 307static irqreturn_t vu_req_interrupt(int irq, void *data)
 
 308{
 309	struct virtio_uml_device *vu_dev = data;
 310	int response = 1;
 311	struct {
 312		struct vhost_user_msg msg;
 313		u8 extra_payload[512];
 314	} msg;
 315	int rc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 316
 317	rc = vhost_user_recv_req(vu_dev, &msg.msg,
 318				 sizeof(msg.msg.payload) +
 319				 sizeof(msg.extra_payload));
 
 320
 321	if (rc)
 322		return IRQ_NONE;
 323
 324	switch (msg.msg.header.request) {
 325	case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
 
 
 
 
 
 
 
 
 
 326		virtio_config_changed(&vu_dev->vdev);
 327		response = 0;
 328		break;
 329	case VHOST_USER_SLAVE_IOTLB_MSG:
 330		/* not supported - VIRTIO_F_IOMMU_PLATFORM */
 331	case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
 332		/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
 333	default:
 334		vu_err(vu_dev, "unexpected slave request %d\n",
 335		       msg.msg.header.request);
 336	}
 337
 338	if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
 339		vhost_user_reply(vu_dev, &msg.msg, response);
 340
 341	return IRQ_HANDLED;
 
 
 
 342}
 343
 344static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
 345{
 346	int rc, req_fds[2];
 347
 348	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
 349	rc = os_pipe(req_fds, true, true);
 350	if (rc < 0)
 351		return rc;
 352	vu_dev->req_fd = req_fds[0];
 353
 354	rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
 355			    vu_req_interrupt, IRQF_SHARED,
 356			    vu_dev->pdev->name, vu_dev);
 357	if (rc)
 
 358		goto err_close;
 359
 
 
 360	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
 361					   req_fds[1]);
 362	if (rc)
 363		goto err_free_irq;
 364
 365	goto out;
 366
 367err_free_irq:
 368	um_free_irq(VIRTIO_IRQ, vu_dev);
 369err_close:
 370	os_close_file(req_fds[0]);
 371out:
 372	/* Close unused write end of request fds */
 373	os_close_file(req_fds[1]);
 374	return rc;
 375}
 376
 377static int vhost_user_init(struct virtio_uml_device *vu_dev)
 378{
 379	int rc = vhost_user_set_owner(vu_dev);
 380
 381	if (rc)
 382		return rc;
 383	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
 384	if (rc)
 385		return rc;
 386
 387	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
 388		rc = vhost_user_get_protocol_features(vu_dev,
 389				&vu_dev->protocol_features);
 390		if (rc)
 391			return rc;
 392		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
 393		rc = vhost_user_set_protocol_features(vu_dev,
 394				vu_dev->protocol_features);
 395		if (rc)
 396			return rc;
 397	}
 398
 399	if (vu_dev->protocol_features &
 400			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
 401		rc = vhost_user_init_slave_req(vu_dev);
 402		if (rc)
 403			return rc;
 404	}
 405
 406	return 0;
 407}
 408
 409static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
 410				  u32 offset, void *buf, u32 len)
 411{
 412	u32 cfg_size = offset + len;
 413	struct vhost_user_msg *msg;
 414	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
 415	size_t msg_size = sizeof(msg->header) + payload_size;
 416	int rc;
 417
 418	if (!(vu_dev->protocol_features &
 419	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
 420		return;
 421
 422	msg = kzalloc(msg_size, GFP_KERNEL);
 423	if (!msg)
 424		return;
 425	msg->header.request = VHOST_USER_GET_CONFIG;
 426	msg->header.size = payload_size;
 427	msg->payload.config.offset = 0;
 428	msg->payload.config.size = cfg_size;
 429
 430	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
 431	if (rc) {
 432		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
 433		       rc);
 434		goto free;
 435	}
 436
 437	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
 438	if (rc) {
 439		vu_err(vu_dev,
 440		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
 441		       rc);
 442		goto free;
 443	}
 444
 445	if (msg->header.size != payload_size ||
 446	    msg->payload.config.size != cfg_size) {
 447		rc = -EPROTO;
 448		vu_err(vu_dev,
 449		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
 450		       msg->header.size, payload_size,
 451		       msg->payload.config.size, cfg_size);
 452		goto free;
 453	}
 454	memcpy(buf, msg->payload.config.payload + offset, len);
 455
 456free:
 457	kfree(msg);
 458}
 459
 460static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
 461				  u32 offset, const void *buf, u32 len)
 462{
 463	struct vhost_user_msg *msg;
 464	size_t payload_size = sizeof(msg->payload.config) + len;
 465	size_t msg_size = sizeof(msg->header) + payload_size;
 466	int rc;
 467
 468	if (!(vu_dev->protocol_features &
 469	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
 470		return;
 471
 472	msg = kzalloc(msg_size, GFP_KERNEL);
 473	if (!msg)
 474		return;
 475	msg->header.request = VHOST_USER_SET_CONFIG;
 476	msg->header.size = payload_size;
 477	msg->payload.config.offset = offset;
 478	msg->payload.config.size = len;
 479	memcpy(msg->payload.config.payload, buf, len);
 480
 481	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
 482	if (rc)
 483		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
 484		       rc);
 485
 486	kfree(msg);
 487}
 488
 489static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
 490				      struct vhost_user_mem_region *region_out)
 491{
 492	unsigned long long mem_offset;
 493	int rc = phys_mapping(addr, &mem_offset);
 494
 495	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
 496		return -EFAULT;
 497	*fd_out = rc;
 498	region_out->guest_addr = addr;
 499	region_out->user_addr = addr;
 500	region_out->size = size;
 501	region_out->mmap_offset = mem_offset;
 502
 503	/* Ensure mapping is valid for the entire region */
 504	rc = phys_mapping(addr + size - 1, &mem_offset);
 505	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
 506		 addr + size - 1, rc, *fd_out))
 507		return -EFAULT;
 508	return 0;
 509}
 510
 511static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
 512{
 513	struct vhost_user_msg msg = {
 514		.header.request = VHOST_USER_SET_MEM_TABLE,
 515		.header.size = sizeof(msg.payload.mem_regions),
 516		.payload.mem_regions.num = 1,
 517	};
 518	unsigned long reserved = uml_reserved - uml_physmem;
 519	int fds[2];
 520	int rc;
 521
 522	/*
 523	 * This is a bit tricky, see also the comment with setup_physmem().
 524	 *
 525	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
 526	 * but the code and data we *already* have is omitted. To us, this
 527	 * is no difference, since they both become part of our address
 528	 * space and memory consumption. To somebody looking in from the
 529	 * outside, however, it is different because the part of our memory
 530	 * consumption that's already part of the binary (code/data) is not
 531	 * mapped from the file, so it's not visible to another mmap from
 532	 * the file descriptor.
 533	 *
 534	 * Thus, don't advertise this space to the vhost-user slave. This
 535	 * means that the slave will likely abort or similar when we give
 536	 * it an address from the hidden range, since it's not marked as
 537	 * a valid address, but at least that way we detect the issue and
 538	 * don't just have the slave read an all-zeroes buffer from the
 539	 * shared memory file, or write something there that we can never
 540	 * see (depending on the direction of the virtqueue traffic.)
 541	 *
 542	 * Since we usually don't want to use .text for virtio buffers,
 543	 * this effectively means that you cannot use
 544	 *  1) global variables, which are in the .bss and not in the shm
 545	 *     file-backed memory
 546	 *  2) the stack in some processes, depending on where they have
 547	 *     their stack (or maybe only no interrupt stack?)
 548	 *
 549	 * The stack is already not typically valid for DMA, so this isn't
 550	 * much of a restriction, but global variables might be encountered.
 551	 *
 552	 * It might be possible to fix it by copying around the data that's
 553	 * between bss_start and where we map the file now, but it's not
 554	 * something that you typically encounter with virtio drivers, so
 555	 * it didn't seem worthwhile.
 556	 */
 557	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
 558					&fds[0],
 559					&msg.payload.mem_regions.regions[0]);
 560
 561	if (rc < 0)
 562		return rc;
 563	if (highmem) {
 564		msg.payload.mem_regions.num++;
 565		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
 566				&fds[1], &msg.payload.mem_regions.regions[1]);
 567		if (rc < 0)
 568			return rc;
 569	}
 570
 571	return vhost_user_send(vu_dev, false, &msg, fds,
 572			       msg.payload.mem_regions.num);
 573}
 574
 575static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
 576				      u32 request, u32 index, u32 num)
 577{
 578	struct vhost_user_msg msg = {
 579		.header.request = request,
 580		.header.size = sizeof(msg.payload.vring_state),
 581		.payload.vring_state.index = index,
 582		.payload.vring_state.num = num,
 583	};
 584
 585	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 586}
 587
 588static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
 589				    u32 index, u32 num)
 590{
 591	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
 592					  index, num);
 593}
 594
 595static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
 596				     u32 index, u32 offset)
 597{
 598	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
 599					  index, offset);
 600}
 601
 602static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
 603				     u32 index, u64 desc, u64 used, u64 avail,
 604				     u64 log)
 605{
 606	struct vhost_user_msg msg = {
 607		.header.request = VHOST_USER_SET_VRING_ADDR,
 608		.header.size = sizeof(msg.payload.vring_addr),
 609		.payload.vring_addr.index = index,
 610		.payload.vring_addr.desc = desc,
 611		.payload.vring_addr.used = used,
 612		.payload.vring_addr.avail = avail,
 613		.payload.vring_addr.log = log,
 614	};
 615
 616	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 617}
 618
 619static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
 620				   u32 request, int index, int fd)
 621{
 622	struct vhost_user_msg msg = {
 623		.header.request = request,
 624		.header.size = sizeof(msg.payload.integer),
 625		.payload.integer = index,
 626	};
 627
 628	if (index & ~VHOST_USER_VRING_INDEX_MASK)
 629		return -EINVAL;
 630	if (fd < 0) {
 631		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
 632		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 633	}
 634	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
 635}
 636
 637static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
 638				     int index, int fd)
 639{
 640	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
 641				       index, fd);
 642}
 643
 644static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
 645				     int index, int fd)
 646{
 647	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
 648				       index, fd);
 649}
 650
 651static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
 652				       u32 index, bool enable)
 653{
 654	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
 655		return 0;
 656
 657	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
 658					  index, enable);
 659}
 660
 661
 662/* Virtio interface */
 663
 664static bool vu_notify(struct virtqueue *vq)
 665{
 666	struct virtio_uml_vq_info *info = vq->priv;
 667	const uint64_t n = 1;
 668	int rc;
 669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 670	do {
 671		rc = os_write_file(info->kick_fd, &n, sizeof(n));
 672	} while (rc == -EINTR);
 673	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
 674}
 675
 676static irqreturn_t vu_interrupt(int irq, void *opaque)
 677{
 678	struct virtqueue *vq = opaque;
 679	struct virtio_uml_vq_info *info = vq->priv;
 680	uint64_t n;
 681	int rc;
 682	irqreturn_t ret = IRQ_NONE;
 683
 684	do {
 685		rc = os_read_file(info->call_fd, &n, sizeof(n));
 686		if (rc == sizeof(n))
 687			ret |= vring_interrupt(irq, vq);
 688	} while (rc == sizeof(n) || rc == -EINTR);
 689	WARN(rc != -EAGAIN, "read returned %d\n", rc);
 690	return ret;
 691}
 692
 693
 694static void vu_get(struct virtio_device *vdev, unsigned offset,
 695		   void *buf, unsigned len)
 696{
 697	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 698
 699	vhost_user_get_config(vu_dev, offset, buf, len);
 700}
 701
 702static void vu_set(struct virtio_device *vdev, unsigned offset,
 703		   const void *buf, unsigned len)
 704{
 705	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 706
 707	vhost_user_set_config(vu_dev, offset, buf, len);
 708}
 709
 710static u8 vu_get_status(struct virtio_device *vdev)
 711{
 712	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 713
 714	return vu_dev->status;
 715}
 716
 717static void vu_set_status(struct virtio_device *vdev, u8 status)
 718{
 719	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 720
 721	vu_dev->status = status;
 722}
 723
 724static void vu_reset(struct virtio_device *vdev)
 725{
 726	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 727
 728	vu_dev->status = 0;
 729}
 730
 731static void vu_del_vq(struct virtqueue *vq)
 732{
 733	struct virtio_uml_vq_info *info = vq->priv;
 734
 735	um_free_irq(VIRTIO_IRQ, vq);
 
 
 
 
 
 
 
 736
 737	os_close_file(info->call_fd);
 738	os_close_file(info->kick_fd);
 739
 740	vring_del_virtqueue(vq);
 741	kfree(info);
 742}
 743
 744static void vu_del_vqs(struct virtio_device *vdev)
 745{
 746	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 747	struct virtqueue *vq, *n;
 748	u64 features;
 749
 750	/* Note: reverse order as a workaround to a decoding bug in snabb */
 751	list_for_each_entry_reverse(vq, &vdev->vqs, list)
 752		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
 753
 754	/* Ensure previous messages have been processed */
 755	WARN_ON(vhost_user_get_features(vu_dev, &features));
 756
 757	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
 758		vu_del_vq(vq);
 759}
 760
 761static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
 762			       struct virtqueue *vq)
 763{
 764	struct virtio_uml_vq_info *info = vq->priv;
 765	int call_fds[2];
 766	int rc;
 767
 
 
 
 
 
 
 
 
 
 768	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
 769	rc = os_pipe(call_fds, true, true);
 770	if (rc < 0)
 771		return rc;
 772
 773	info->call_fd = call_fds[0];
 774	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
 775			    vu_interrupt, IRQF_SHARED, info->name, vq);
 776	if (rc)
 777		goto close_both;
 778
 779	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
 780	if (rc)
 781		goto release_irq;
 782
 783	goto out;
 784
 785release_irq:
 786	um_free_irq(VIRTIO_IRQ, vq);
 787close_both:
 788	os_close_file(call_fds[0]);
 789out:
 790	/* Close (unused) write end of call fds */
 791	os_close_file(call_fds[1]);
 792
 793	return rc;
 794}
 795
 796static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
 797				     unsigned index, vq_callback_t *callback,
 798				     const char *name, bool ctx)
 799{
 800	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 801	struct platform_device *pdev = vu_dev->pdev;
 802	struct virtio_uml_vq_info *info;
 803	struct virtqueue *vq;
 804	int num = MAX_SUPPORTED_QUEUE_SIZE;
 805	int rc;
 806
 807	info = kzalloc(sizeof(*info), GFP_KERNEL);
 808	if (!info) {
 809		rc = -ENOMEM;
 810		goto error_kzalloc;
 811	}
 812	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
 813		 pdev->id, name);
 814
 815	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
 816				    ctx, vu_notify, callback, info->name);
 817	if (!vq) {
 818		rc = -ENOMEM;
 819		goto error_create;
 820	}
 821	vq->priv = info;
 
 822	num = virtqueue_get_vring_size(vq);
 823
 824	rc = os_eventfd(0, 0);
 825	if (rc < 0)
 826		goto error_kick;
 827	info->kick_fd = rc;
 
 
 
 
 
 828
 829	rc = vu_setup_vq_call_fd(vu_dev, vq);
 830	if (rc)
 831		goto error_call;
 832
 833	rc = vhost_user_set_vring_num(vu_dev, index, num);
 834	if (rc)
 835		goto error_setup;
 836
 837	rc = vhost_user_set_vring_base(vu_dev, index, 0);
 838	if (rc)
 839		goto error_setup;
 840
 841	rc = vhost_user_set_vring_addr(vu_dev, index,
 842				       virtqueue_get_desc_addr(vq),
 843				       virtqueue_get_used_addr(vq),
 844				       virtqueue_get_avail_addr(vq),
 845				       (u64) -1);
 846	if (rc)
 847		goto error_setup;
 848
 849	return vq;
 850
 851error_setup:
 852	um_free_irq(VIRTIO_IRQ, vq);
 853	os_close_file(info->call_fd);
 
 
 854error_call:
 855	os_close_file(info->kick_fd);
 
 856error_kick:
 857	vring_del_virtqueue(vq);
 858error_create:
 859	kfree(info);
 860error_kzalloc:
 861	return ERR_PTR(rc);
 862}
 863
 864static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 865		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
 866		       const char * const names[], const bool *ctx,
 867		       struct irq_affinity *desc)
 868{
 869	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 870	int i, queue_idx = 0, rc;
 871	struct virtqueue *vq;
 872
 
 
 
 
 873	rc = vhost_user_set_mem_table(vu_dev);
 874	if (rc)
 875		return rc;
 876
 877	for (i = 0; i < nvqs; ++i) {
 878		if (!names[i]) {
 879			vqs[i] = NULL;
 880			continue;
 881		}
 882
 883		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
 884				     ctx ? ctx[i] : false);
 885		if (IS_ERR(vqs[i])) {
 886			rc = PTR_ERR(vqs[i]);
 887			goto error_setup;
 888		}
 889	}
 890
 891	list_for_each_entry(vq, &vdev->vqs, list) {
 892		struct virtio_uml_vq_info *info = vq->priv;
 893
 894		rc = vhost_user_set_vring_kick(vu_dev, vq->index,
 895					       info->kick_fd);
 896		if (rc)
 897			goto error_setup;
 
 
 898
 899		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
 900		if (rc)
 901			goto error_setup;
 902	}
 903
 904	return 0;
 905
 906error_setup:
 907	vu_del_vqs(vdev);
 908	return rc;
 909}
 910
 911static u64 vu_get_features(struct virtio_device *vdev)
 912{
 913	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 914
 915	return vu_dev->features;
 916}
 917
 918static int vu_finalize_features(struct virtio_device *vdev)
 919{
 920	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 921	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
 922
 923	vring_transport_features(vdev);
 924	vu_dev->features = vdev->features | supported;
 925
 926	return vhost_user_set_features(vu_dev, vu_dev->features);
 927}
 928
 929static const char *vu_bus_name(struct virtio_device *vdev)
 930{
 931	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 932
 933	return vu_dev->pdev->name;
 934}
 935
 936static const struct virtio_config_ops virtio_uml_config_ops = {
 937	.get = vu_get,
 938	.set = vu_set,
 939	.get_status = vu_get_status,
 940	.set_status = vu_set_status,
 941	.reset = vu_reset,
 942	.find_vqs = vu_find_vqs,
 943	.del_vqs = vu_del_vqs,
 944	.get_features = vu_get_features,
 945	.finalize_features = vu_finalize_features,
 946	.bus_name = vu_bus_name,
 947};
 948
 949static void virtio_uml_release_dev(struct device *d)
 950{
 951	struct virtio_device *vdev =
 952			container_of(d, struct virtio_device, dev);
 953	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 954
 
 
 955	/* might not have been opened due to not negotiating the feature */
 956	if (vu_dev->req_fd >= 0) {
 957		um_free_irq(VIRTIO_IRQ, vu_dev);
 958		os_close_file(vu_dev->req_fd);
 959	}
 960
 961	os_close_file(vu_dev->sock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 962}
 963
 964/* Platform device */
 965
 966struct virtio_uml_platform_data {
 967	u32 virtio_device_id;
 968	const char *socket_path;
 969};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 970
 971static int virtio_uml_probe(struct platform_device *pdev)
 972{
 973	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
 974	struct virtio_uml_device *vu_dev;
 975	int rc;
 976
 977	if (!pdata)
 978		return -EINVAL;
 
 
 
 979
 980	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
 981	if (!vu_dev)
 982		return -ENOMEM;
 983
 
 984	vu_dev->vdev.dev.parent = &pdev->dev;
 985	vu_dev->vdev.dev.release = virtio_uml_release_dev;
 986	vu_dev->vdev.config = &virtio_uml_config_ops;
 987	vu_dev->vdev.id.device = pdata->virtio_device_id;
 988	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
 989	vu_dev->pdev = pdev;
 990	vu_dev->req_fd = -1;
 991
 
 
 992	do {
 993		rc = os_connect_socket(pdata->socket_path);
 994	} while (rc == -EINTR);
 995	if (rc < 0)
 996		return rc;
 997	vu_dev->sock = rc;
 998
 
 
 999	rc = vhost_user_init(vu_dev);
1000	if (rc)
1001		goto error_init;
1002
1003	platform_set_drvdata(pdev, vu_dev);
1004
 
 
1005	rc = register_virtio_device(&vu_dev->vdev);
1006	if (rc)
1007		put_device(&vu_dev->vdev.dev);
 
1008	return rc;
1009
1010error_init:
1011	os_close_file(vu_dev->sock);
 
 
1012	return rc;
1013}
1014
1015static int virtio_uml_remove(struct platform_device *pdev)
1016{
1017	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1018
1019	unregister_virtio_device(&vu_dev->vdev);
1020	return 0;
1021}
1022
1023/* Command line device list */
1024
1025static void vu_cmdline_release_dev(struct device *d)
1026{
1027}
1028
1029static struct device vu_cmdline_parent = {
1030	.init_name = "virtio-uml-cmdline",
1031	.release = vu_cmdline_release_dev,
1032};
1033
1034static bool vu_cmdline_parent_registered;
1035static int vu_cmdline_id;
1036
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1037static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1038{
1039	const char *ids = strchr(device, ':');
1040	unsigned int virtio_device_id;
1041	int processed, consumed, err;
1042	char *socket_path;
1043	struct virtio_uml_platform_data pdata;
1044	struct platform_device *pdev;
1045
1046	if (!ids || ids == device)
1047		return -EINVAL;
1048
1049	processed = sscanf(ids, ":%u%n:%d%n",
1050			   &virtio_device_id, &consumed,
1051			   &vu_cmdline_id, &consumed);
1052
1053	if (processed < 1 || ids[consumed])
1054		return -EINVAL;
1055
1056	if (!vu_cmdline_parent_registered) {
1057		err = device_register(&vu_cmdline_parent);
1058		if (err) {
1059			pr_err("Failed to register parent device!\n");
1060			put_device(&vu_cmdline_parent);
1061			return err;
1062		}
1063		vu_cmdline_parent_registered = true;
1064	}
1065
1066	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1067	if (!socket_path)
1068		return -ENOMEM;
1069
1070	pdata.virtio_device_id = (u32) virtio_device_id;
1071	pdata.socket_path = socket_path;
1072
1073	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1074		vu_cmdline_id, virtio_device_id, socket_path);
1075
1076	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1077					     vu_cmdline_id++, &pdata,
1078					     sizeof(pdata));
1079	err = PTR_ERR_OR_ZERO(pdev);
1080	if (err)
1081		goto free;
 
 
 
 
 
1082	return 0;
1083
1084free:
1085	kfree(socket_path);
1086	return err;
1087}
1088
1089static int vu_cmdline_get_device(struct device *dev, void *data)
1090{
1091	struct platform_device *pdev = to_platform_device(dev);
1092	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1093	char *buffer = data;
1094	unsigned int len = strlen(buffer);
1095
1096	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1097		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1098	return 0;
1099}
1100
1101static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1102{
1103	buffer[0] = '\0';
1104	if (vu_cmdline_parent_registered)
1105		device_for_each_child(&vu_cmdline_parent, buffer,
1106				      vu_cmdline_get_device);
1107	return strlen(buffer) + 1;
1108}
1109
1110static const struct kernel_param_ops vu_cmdline_param_ops = {
1111	.set = vu_cmdline_set,
1112	.get = vu_cmdline_get,
1113};
1114
1115device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1116__uml_help(vu_cmdline_param_ops,
1117"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1118"    Configure a virtio device over a vhost-user socket.\n"
1119"    See virtio_ids.h for a list of possible virtio device id values.\n"
1120"    Optionally use a specific platform_device id.\n\n"
1121);
1122
1123
1124static int vu_unregister_cmdline_device(struct device *dev, void *data)
1125{
1126	struct platform_device *pdev = to_platform_device(dev);
1127	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1128
1129	kfree(pdata->socket_path);
1130	platform_device_unregister(pdev);
1131	return 0;
1132}
1133
1134static void vu_unregister_cmdline_devices(void)
1135{
1136	if (vu_cmdline_parent_registered) {
1137		device_for_each_child(&vu_cmdline_parent, NULL,
1138				      vu_unregister_cmdline_device);
1139		device_unregister(&vu_cmdline_parent);
1140		vu_cmdline_parent_registered = false;
1141	}
1142}
1143
1144/* Platform driver */
1145
1146static const struct of_device_id virtio_uml_match[] = {
1147	{ .compatible = "virtio,uml", },
1148	{ }
1149};
1150MODULE_DEVICE_TABLE(of, virtio_uml_match);
1151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152static struct platform_driver virtio_uml_driver = {
1153	.probe = virtio_uml_probe,
1154	.remove = virtio_uml_remove,
1155	.driver = {
1156		.name = "virtio-uml",
1157		.of_match_table = virtio_uml_match,
1158	},
 
 
1159};
1160
1161static int __init virtio_uml_init(void)
1162{
1163	return platform_driver_register(&virtio_uml_driver);
1164}
1165
1166static void __exit virtio_uml_exit(void)
1167{
1168	platform_driver_unregister(&virtio_uml_driver);
1169	vu_unregister_cmdline_devices();
1170}
1171
1172module_init(virtio_uml_init);
1173module_exit(virtio_uml_exit);
1174__uml_exitcall(virtio_uml_exit);
1175
1176MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1177MODULE_LICENSE("GPL");