Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Virtio vhost-user driver
   4 *
   5 * Copyright(c) 2019 Intel Corporation
   6 *
   7 * This driver allows virtio devices to be used over a vhost-user socket.
   8 *
   9 * Guest devices can be instantiated by kernel module or command line
  10 * parameters. One device will be created for each parameter. Syntax:
  11 *
  12 *		virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
  13 * where:
  14 *		<socket>	:= vhost-user socket path to connect
  15 *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
  16 *		<platform_id>	:= (optional) platform device id
  17 *
  18 * example:
  19 *		virtio_uml.device=/var/uml.socket:1
  20 *
  21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
  22 */
  23#include <linux/module.h>
  24#include <linux/platform_device.h>
  25#include <linux/slab.h>
  26#include <linux/virtio.h>
  27#include <linux/virtio_config.h>
  28#include <linux/virtio_ring.h>
  29#include <linux/time-internal.h>
  30#include <shared/as-layout.h>
  31#include <irq_kern.h>
  32#include <init.h>
  33#include <os.h>
  34#include "vhost_user.h"
  35
  36/* Workaround due to a conflict between irq_user.h and irqreturn.h */
  37#ifdef IRQ_NONE
  38#undef IRQ_NONE
  39#endif
  40
  41#define MAX_SUPPORTED_QUEUE_SIZE	256
  42
  43#define to_virtio_uml_device(_vdev) \
  44	container_of(_vdev, struct virtio_uml_device, vdev)
  45
  46struct virtio_uml_platform_data {
  47	u32 virtio_device_id;
  48	const char *socket_path;
  49	struct work_struct conn_broken_wk;
  50	struct platform_device *pdev;
  51};
  52
  53struct virtio_uml_device {
  54	struct virtio_device vdev;
  55	struct platform_device *pdev;
  56
  57	spinlock_t sock_lock;
  58	int sock, req_fd;
  59	u64 features;
  60	u64 protocol_features;
  61	u8 status;
  62	u8 registered:1;
  63};
  64
  65struct virtio_uml_vq_info {
  66	int kick_fd, call_fd;
  67	char name[32];
  68#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
  69	struct virtqueue *vq;
  70	vq_callback_t *callback;
  71	struct time_travel_event defer;
  72#endif
  73};
  74
  75extern unsigned long long physmem_size, highmem;
  76
  77#define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
  78
  79/* Vhost-user protocol */
  80
  81static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
  82			    const int *fds, unsigned int fds_num)
  83{
  84	int rc;
  85
  86	do {
  87		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
  88		if (rc > 0) {
  89			buf += rc;
  90			len -= rc;
  91			fds = NULL;
  92			fds_num = 0;
  93		}
  94	} while (len && (rc >= 0 || rc == -EINTR));
  95
  96	if (rc < 0)
  97		return rc;
  98	return 0;
  99}
 100
 101static int full_read(int fd, void *buf, int len, bool abortable)
 102{
 103	int rc;
 104
 105	do {
 106		rc = os_read_file(fd, buf, len);
 107		if (rc > 0) {
 108			buf += rc;
 109			len -= rc;
 110		}
 111	} while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
 112
 113	if (rc < 0)
 114		return rc;
 115	if (rc == 0)
 116		return -ECONNRESET;
 117	return 0;
 118}
 119
 120static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
 121{
 122	return full_read(fd, msg, sizeof(msg->header), true);
 123}
 124
 125static int vhost_user_recv(struct virtio_uml_device *vu_dev,
 126			   int fd, struct vhost_user_msg *msg,
 127			   size_t max_payload_size, bool wait)
 128{
 129	size_t size;
 130	int rc;
 131
 132	/*
 133	 * In virtio time-travel mode, we're handling all the vhost-user
 134	 * FDs by polling them whenever appropriate. However, we may get
 135	 * into a situation where we're sending out an interrupt message
 136	 * to a device (e.g. a net device) and need to handle a simulation
 137	 * time message while doing so, e.g. one that tells us to update
 138	 * our idea of how long we can run without scheduling.
 139	 *
 140	 * Thus, we need to not just read() from the given fd, but need
 141	 * to also handle messages for the simulation time - this function
 142	 * does that for us while waiting for the given fd to be readable.
 143	 */
 144	if (wait)
 145		time_travel_wait_readable(fd);
 146
 147	rc = vhost_user_recv_header(fd, msg);
 148
 149	if (rc == -ECONNRESET && vu_dev->registered) {
 150		struct virtio_uml_platform_data *pdata;
 151
 152		pdata = vu_dev->pdev->dev.platform_data;
 153
 154		virtio_break_device(&vu_dev->vdev);
 155		schedule_work(&pdata->conn_broken_wk);
 156	}
 157	if (rc)
 158		return rc;
 159	size = msg->header.size;
 160	if (size > max_payload_size)
 161		return -EPROTO;
 162	return full_read(fd, &msg->payload, size, false);
 163}
 164
 165static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
 166				struct vhost_user_msg *msg,
 167				size_t max_payload_size)
 168{
 169	int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
 170				 max_payload_size, true);
 171
 172	if (rc)
 173		return rc;
 174
 175	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
 176		return -EPROTO;
 177
 178	return 0;
 179}
 180
 181static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
 182			       u64 *value)
 183{
 184	struct vhost_user_msg msg;
 185	int rc = vhost_user_recv_resp(vu_dev, &msg,
 186				      sizeof(msg.payload.integer));
 187
 188	if (rc)
 189		return rc;
 190	if (msg.header.size != sizeof(msg.payload.integer))
 191		return -EPROTO;
 192	*value = msg.payload.integer;
 193	return 0;
 194}
 195
 196static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
 197			       struct vhost_user_msg *msg,
 198			       size_t max_payload_size)
 199{
 200	int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
 201				 max_payload_size, false);
 202
 203	if (rc)
 204		return rc;
 205
 206	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
 207			VHOST_USER_VERSION)
 208		return -EPROTO;
 209
 210	return 0;
 211}
 212
 213static int vhost_user_send(struct virtio_uml_device *vu_dev,
 214			   bool need_response, struct vhost_user_msg *msg,
 215			   int *fds, size_t num_fds)
 216{
 217	size_t size = sizeof(msg->header) + msg->header.size;
 218	unsigned long flags;
 219	bool request_ack;
 220	int rc;
 221
 222	msg->header.flags |= VHOST_USER_VERSION;
 223
 224	/*
 225	 * The need_response flag indicates that we already need a response,
 226	 * e.g. to read the features. In these cases, don't request an ACK as
 227	 * it is meaningless. Also request an ACK only if supported.
 228	 */
 229	request_ack = !need_response;
 230	if (!(vu_dev->protocol_features &
 231			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
 232		request_ack = false;
 233
 234	if (request_ack)
 235		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
 236
 237	spin_lock_irqsave(&vu_dev->sock_lock, flags);
 238	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
 239	if (rc < 0)
 240		goto out;
 241
 242	if (request_ack) {
 243		uint64_t status;
 244
 245		rc = vhost_user_recv_u64(vu_dev, &status);
 246		if (rc)
 247			goto out;
 248
 249		if (status) {
 250			vu_err(vu_dev, "slave reports error: %llu\n", status);
 251			rc = -EIO;
 252			goto out;
 253		}
 254	}
 255
 256out:
 257	spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
 258	return rc;
 259}
 260
 261static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
 262				      bool need_response, u32 request)
 263{
 264	struct vhost_user_msg msg = {
 265		.header.request = request,
 266	};
 267
 268	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
 269}
 270
 271static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
 272					 u32 request, int fd)
 273{
 274	struct vhost_user_msg msg = {
 275		.header.request = request,
 276	};
 277
 278	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
 279}
 280
 281static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
 282			       u32 request, u64 value)
 283{
 284	struct vhost_user_msg msg = {
 285		.header.request = request,
 286		.header.size = sizeof(msg.payload.integer),
 287		.payload.integer = value,
 288	};
 289
 290	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 291}
 292
 293static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
 294{
 295	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
 296}
 297
 298static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
 299				   u64 *features)
 300{
 301	int rc = vhost_user_send_no_payload(vu_dev, true,
 302					    VHOST_USER_GET_FEATURES);
 303
 304	if (rc)
 305		return rc;
 306	return vhost_user_recv_u64(vu_dev, features);
 307}
 308
 309static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
 310				   u64 features)
 311{
 312	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
 313}
 314
 315static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
 316					    u64 *protocol_features)
 317{
 318	int rc = vhost_user_send_no_payload(vu_dev, true,
 319			VHOST_USER_GET_PROTOCOL_FEATURES);
 320
 321	if (rc)
 322		return rc;
 323	return vhost_user_recv_u64(vu_dev, protocol_features);
 324}
 325
 326static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
 327					    u64 protocol_features)
 328{
 329	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
 330				   protocol_features);
 331}
 332
 333static void vhost_user_reply(struct virtio_uml_device *vu_dev,
 334			     struct vhost_user_msg *msg, int response)
 335{
 336	struct vhost_user_msg reply = {
 337		.payload.integer = response,
 338	};
 339	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
 340	int rc;
 341
 342	reply.header = msg->header;
 343	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
 344	reply.header.flags |= VHOST_USER_FLAG_REPLY;
 345	reply.header.size = sizeof(reply.payload.integer);
 346
 347	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
 348
 349	if (rc)
 350		vu_err(vu_dev,
 351		       "sending reply to slave request failed: %d (size %zu)\n",
 352		       rc, size);
 353}
 354
 355static irqreturn_t vu_req_interrupt(int irq, void *data)
 356{
 357	struct virtio_uml_device *vu_dev = data;
 358	struct virtqueue *vq;
 359	int response = 1;
 360	struct {
 361		struct vhost_user_msg msg;
 362		u8 extra_payload[512];
 363	} msg;
 364	int rc;
 365
 366	rc = vhost_user_recv_req(vu_dev, &msg.msg,
 367				 sizeof(msg.msg.payload) +
 368				 sizeof(msg.extra_payload));
 369
 370	if (rc)
 371		return IRQ_NONE;
 372
 373	switch (msg.msg.header.request) {
 374	case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
 375		virtio_config_changed(&vu_dev->vdev);
 376		response = 0;
 377		break;
 378	case VHOST_USER_SLAVE_VRING_CALL:
 379		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
 380			if (vq->index == msg.msg.payload.vring_state.index) {
 381				response = 0;
 382				vring_interrupt(0 /* ignored */, vq);
 383				break;
 384			}
 385		}
 386		break;
 387	case VHOST_USER_SLAVE_IOTLB_MSG:
 388		/* not supported - VIRTIO_F_ACCESS_PLATFORM */
 389	case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
 390		/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
 391	default:
 392		vu_err(vu_dev, "unexpected slave request %d\n",
 393		       msg.msg.header.request);
 394	}
 395
 396	if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
 397		vhost_user_reply(vu_dev, &msg.msg, response);
 398
 399	return IRQ_HANDLED;
 400}
 401
 402static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
 403{
 404	int rc, req_fds[2];
 405
 406	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
 407	rc = os_pipe(req_fds, true, true);
 408	if (rc < 0)
 409		return rc;
 410	vu_dev->req_fd = req_fds[0];
 411
 412	rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
 413			    vu_req_interrupt, IRQF_SHARED,
 414			    vu_dev->pdev->name, vu_dev);
 415	if (rc)
 416		goto err_close;
 417
 418	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
 419					   req_fds[1]);
 420	if (rc)
 421		goto err_free_irq;
 422
 423	goto out;
 424
 425err_free_irq:
 426	um_free_irq(VIRTIO_IRQ, vu_dev);
 427err_close:
 428	os_close_file(req_fds[0]);
 429out:
 430	/* Close unused write end of request fds */
 431	os_close_file(req_fds[1]);
 432	return rc;
 433}
 434
 435static int vhost_user_init(struct virtio_uml_device *vu_dev)
 436{
 437	int rc = vhost_user_set_owner(vu_dev);
 438
 439	if (rc)
 440		return rc;
 441	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
 442	if (rc)
 443		return rc;
 444
 445	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
 446		rc = vhost_user_get_protocol_features(vu_dev,
 447				&vu_dev->protocol_features);
 448		if (rc)
 449			return rc;
 450		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
 451		rc = vhost_user_set_protocol_features(vu_dev,
 452				vu_dev->protocol_features);
 453		if (rc)
 454			return rc;
 455	}
 456
 457	if (vu_dev->protocol_features &
 458			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
 459		rc = vhost_user_init_slave_req(vu_dev);
 460		if (rc)
 461			return rc;
 462	}
 463
 464	return 0;
 465}
 466
 467static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
 468				  u32 offset, void *buf, u32 len)
 469{
 470	u32 cfg_size = offset + len;
 471	struct vhost_user_msg *msg;
 472	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
 473	size_t msg_size = sizeof(msg->header) + payload_size;
 474	int rc;
 475
 476	if (!(vu_dev->protocol_features &
 477	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
 478		return;
 479
 480	msg = kzalloc(msg_size, GFP_KERNEL);
 481	if (!msg)
 482		return;
 483	msg->header.request = VHOST_USER_GET_CONFIG;
 484	msg->header.size = payload_size;
 485	msg->payload.config.offset = 0;
 486	msg->payload.config.size = cfg_size;
 487
 488	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
 489	if (rc) {
 490		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
 491		       rc);
 492		goto free;
 493	}
 494
 495	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
 496	if (rc) {
 497		vu_err(vu_dev,
 498		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
 499		       rc);
 500		goto free;
 501	}
 502
 503	if (msg->header.size != payload_size ||
 504	    msg->payload.config.size != cfg_size) {
 505		rc = -EPROTO;
 506		vu_err(vu_dev,
 507		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
 508		       msg->header.size, payload_size,
 509		       msg->payload.config.size, cfg_size);
 510		goto free;
 511	}
 512	memcpy(buf, msg->payload.config.payload + offset, len);
 513
 514free:
 515	kfree(msg);
 516}
 517
 518static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
 519				  u32 offset, const void *buf, u32 len)
 520{
 521	struct vhost_user_msg *msg;
 522	size_t payload_size = sizeof(msg->payload.config) + len;
 523	size_t msg_size = sizeof(msg->header) + payload_size;
 524	int rc;
 525
 526	if (!(vu_dev->protocol_features &
 527	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
 528		return;
 529
 530	msg = kzalloc(msg_size, GFP_KERNEL);
 531	if (!msg)
 532		return;
 533	msg->header.request = VHOST_USER_SET_CONFIG;
 534	msg->header.size = payload_size;
 535	msg->payload.config.offset = offset;
 536	msg->payload.config.size = len;
 537	memcpy(msg->payload.config.payload, buf, len);
 538
 539	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
 540	if (rc)
 541		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
 542		       rc);
 543
 544	kfree(msg);
 545}
 546
 547static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
 548				      struct vhost_user_mem_region *region_out)
 549{
 550	unsigned long long mem_offset;
 551	int rc = phys_mapping(addr, &mem_offset);
 552
 553	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
 554		return -EFAULT;
 555	*fd_out = rc;
 556	region_out->guest_addr = addr;
 557	region_out->user_addr = addr;
 558	region_out->size = size;
 559	region_out->mmap_offset = mem_offset;
 560
 561	/* Ensure mapping is valid for the entire region */
 562	rc = phys_mapping(addr + size - 1, &mem_offset);
 563	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
 564		 addr + size - 1, rc, *fd_out))
 565		return -EFAULT;
 566	return 0;
 567}
 568
 569static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
 570{
 571	struct vhost_user_msg msg = {
 572		.header.request = VHOST_USER_SET_MEM_TABLE,
 573		.header.size = sizeof(msg.payload.mem_regions),
 574		.payload.mem_regions.num = 1,
 575	};
 576	unsigned long reserved = uml_reserved - uml_physmem;
 577	int fds[2];
 578	int rc;
 579
 580	/*
 581	 * This is a bit tricky, see also the comment with setup_physmem().
 582	 *
 583	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
 584	 * but the code and data we *already* have is omitted. To us, this
 585	 * is no difference, since they both become part of our address
 586	 * space and memory consumption. To somebody looking in from the
 587	 * outside, however, it is different because the part of our memory
 588	 * consumption that's already part of the binary (code/data) is not
 589	 * mapped from the file, so it's not visible to another mmap from
 590	 * the file descriptor.
 591	 *
 592	 * Thus, don't advertise this space to the vhost-user slave. This
 593	 * means that the slave will likely abort or similar when we give
 594	 * it an address from the hidden range, since it's not marked as
 595	 * a valid address, but at least that way we detect the issue and
 596	 * don't just have the slave read an all-zeroes buffer from the
 597	 * shared memory file, or write something there that we can never
 598	 * see (depending on the direction of the virtqueue traffic.)
 599	 *
 600	 * Since we usually don't want to use .text for virtio buffers,
 601	 * this effectively means that you cannot use
 602	 *  1) global variables, which are in the .bss and not in the shm
 603	 *     file-backed memory
 604	 *  2) the stack in some processes, depending on where they have
 605	 *     their stack (or maybe only no interrupt stack?)
 606	 *
 607	 * The stack is already not typically valid for DMA, so this isn't
 608	 * much of a restriction, but global variables might be encountered.
 609	 *
 610	 * It might be possible to fix it by copying around the data that's
 611	 * between bss_start and where we map the file now, but it's not
 612	 * something that you typically encounter with virtio drivers, so
 613	 * it didn't seem worthwhile.
 614	 */
 615	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
 616					&fds[0],
 617					&msg.payload.mem_regions.regions[0]);
 618
 619	if (rc < 0)
 620		return rc;
 621	if (highmem) {
 622		msg.payload.mem_regions.num++;
 623		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
 624				&fds[1], &msg.payload.mem_regions.regions[1]);
 625		if (rc < 0)
 626			return rc;
 627	}
 628
 629	return vhost_user_send(vu_dev, false, &msg, fds,
 630			       msg.payload.mem_regions.num);
 631}
 632
 633static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
 634				      u32 request, u32 index, u32 num)
 635{
 636	struct vhost_user_msg msg = {
 637		.header.request = request,
 638		.header.size = sizeof(msg.payload.vring_state),
 639		.payload.vring_state.index = index,
 640		.payload.vring_state.num = num,
 641	};
 642
 643	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 644}
 645
 646static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
 647				    u32 index, u32 num)
 648{
 649	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
 650					  index, num);
 651}
 652
 653static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
 654				     u32 index, u32 offset)
 655{
 656	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
 657					  index, offset);
 658}
 659
 660static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
 661				     u32 index, u64 desc, u64 used, u64 avail,
 662				     u64 log)
 663{
 664	struct vhost_user_msg msg = {
 665		.header.request = VHOST_USER_SET_VRING_ADDR,
 666		.header.size = sizeof(msg.payload.vring_addr),
 667		.payload.vring_addr.index = index,
 668		.payload.vring_addr.desc = desc,
 669		.payload.vring_addr.used = used,
 670		.payload.vring_addr.avail = avail,
 671		.payload.vring_addr.log = log,
 672	};
 673
 674	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 675}
 676
 677static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
 678				   u32 request, int index, int fd)
 679{
 680	struct vhost_user_msg msg = {
 681		.header.request = request,
 682		.header.size = sizeof(msg.payload.integer),
 683		.payload.integer = index,
 684	};
 685
 686	if (index & ~VHOST_USER_VRING_INDEX_MASK)
 687		return -EINVAL;
 688	if (fd < 0) {
 689		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
 690		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
 691	}
 692	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
 693}
 694
 695static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
 696				     int index, int fd)
 697{
 698	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
 699				       index, fd);
 700}
 701
 702static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
 703				     int index, int fd)
 704{
 705	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
 706				       index, fd);
 707}
 708
 709static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
 710				       u32 index, bool enable)
 711{
 712	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
 713		return 0;
 714
 715	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
 716					  index, enable);
 717}
 718
 719
 720/* Virtio interface */
 721
 722static bool vu_notify(struct virtqueue *vq)
 723{
 724	struct virtio_uml_vq_info *info = vq->priv;
 725	const uint64_t n = 1;
 726	int rc;
 727
 728	time_travel_propagate_time();
 729
 730	if (info->kick_fd < 0) {
 731		struct virtio_uml_device *vu_dev;
 732
 733		vu_dev = to_virtio_uml_device(vq->vdev);
 734
 735		return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
 736						  vq->index, 0) == 0;
 737	}
 738
 739	do {
 740		rc = os_write_file(info->kick_fd, &n, sizeof(n));
 741	} while (rc == -EINTR);
 742	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
 743}
 744
 745static irqreturn_t vu_interrupt(int irq, void *opaque)
 746{
 747	struct virtqueue *vq = opaque;
 748	struct virtio_uml_vq_info *info = vq->priv;
 749	uint64_t n;
 750	int rc;
 751	irqreturn_t ret = IRQ_NONE;
 752
 753	do {
 754		rc = os_read_file(info->call_fd, &n, sizeof(n));
 755		if (rc == sizeof(n))
 756			ret |= vring_interrupt(irq, vq);
 757	} while (rc == sizeof(n) || rc == -EINTR);
 758	WARN(rc != -EAGAIN, "read returned %d\n", rc);
 759	return ret;
 760}
 761
 762
 763static void vu_get(struct virtio_device *vdev, unsigned offset,
 764		   void *buf, unsigned len)
 765{
 766	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 767
 768	vhost_user_get_config(vu_dev, offset, buf, len);
 769}
 770
 771static void vu_set(struct virtio_device *vdev, unsigned offset,
 772		   const void *buf, unsigned len)
 773{
 774	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 775
 776	vhost_user_set_config(vu_dev, offset, buf, len);
 777}
 778
 779static u8 vu_get_status(struct virtio_device *vdev)
 780{
 781	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 782
 783	return vu_dev->status;
 784}
 785
 786static void vu_set_status(struct virtio_device *vdev, u8 status)
 787{
 788	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 789
 790	vu_dev->status = status;
 791}
 792
 793static void vu_reset(struct virtio_device *vdev)
 794{
 795	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 796
 797	vu_dev->status = 0;
 798}
 799
 800static void vu_del_vq(struct virtqueue *vq)
 801{
 802	struct virtio_uml_vq_info *info = vq->priv;
 803
 804	if (info->call_fd >= 0) {
 805		um_free_irq(VIRTIO_IRQ, vq);
 806		os_close_file(info->call_fd);
 807	}
 808
 809	if (info->kick_fd >= 0)
 810		os_close_file(info->kick_fd);
 811
 812	vring_del_virtqueue(vq);
 813	kfree(info);
 814}
 815
 816static void vu_del_vqs(struct virtio_device *vdev)
 817{
 818	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 819	struct virtqueue *vq, *n;
 820	u64 features;
 821
 822	/* Note: reverse order as a workaround to a decoding bug in snabb */
 823	list_for_each_entry_reverse(vq, &vdev->vqs, list)
 824		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
 825
 826	/* Ensure previous messages have been processed */
 827	WARN_ON(vhost_user_get_features(vu_dev, &features));
 828
 829	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
 830		vu_del_vq(vq);
 831}
 832
 833static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
 834			       struct virtqueue *vq)
 835{
 836	struct virtio_uml_vq_info *info = vq->priv;
 837	int call_fds[2];
 838	int rc;
 839
 840	/* no call FD needed/desired in this case */
 841	if (vu_dev->protocol_features &
 842			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
 843	    vu_dev->protocol_features &
 844			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
 845		info->call_fd = -1;
 846		return 0;
 847	}
 848
 849	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
 850	rc = os_pipe(call_fds, true, true);
 851	if (rc < 0)
 852		return rc;
 853
 854	info->call_fd = call_fds[0];
 855	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
 856			    vu_interrupt, IRQF_SHARED, info->name, vq);
 857	if (rc)
 858		goto close_both;
 859
 860	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
 861	if (rc)
 862		goto release_irq;
 863
 864	goto out;
 865
 866release_irq:
 867	um_free_irq(VIRTIO_IRQ, vq);
 868close_both:
 869	os_close_file(call_fds[0]);
 870out:
 871	/* Close (unused) write end of call fds */
 872	os_close_file(call_fds[1]);
 873
 874	return rc;
 875}
 876
 877#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
 878static void vu_defer_irq_handle(struct time_travel_event *d)
 879{
 880	struct virtio_uml_vq_info *info;
 881
 882	info = container_of(d, struct virtio_uml_vq_info, defer);
 883	info->callback(info->vq);
 884}
 885
 886static void vu_defer_irq_callback(struct virtqueue *vq)
 887{
 888	struct virtio_uml_vq_info *info = vq->priv;
 889
 890	time_travel_add_irq_event(&info->defer);
 891}
 892#endif
 893
 894static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
 895				     unsigned index, vq_callback_t *callback,
 896				     const char *name, bool ctx)
 897{
 898	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 899	struct platform_device *pdev = vu_dev->pdev;
 900	struct virtio_uml_vq_info *info;
 901	struct virtqueue *vq;
 902	int num = MAX_SUPPORTED_QUEUE_SIZE;
 903	int rc;
 904
 905	info = kzalloc(sizeof(*info), GFP_KERNEL);
 906	if (!info) {
 907		rc = -ENOMEM;
 908		goto error_kzalloc;
 909	}
 910	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
 911		 pdev->id, name);
 912
 913#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
 914	/*
 915	 * When we get an interrupt, we must bounce it through the simulation
 916	 * calendar (the simtime device), except for the simtime device itself
 917	 * since that's part of the simulation control.
 918	 */
 919	if (time_travel_mode == TT_MODE_EXTERNAL && callback) {
 920		info->callback = callback;
 921		callback = vu_defer_irq_callback;
 922		time_travel_set_event_fn(&info->defer, vu_defer_irq_handle);
 923	}
 924#endif
 925
 926	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
 927				    ctx, vu_notify, callback, info->name);
 928	if (!vq) {
 929		rc = -ENOMEM;
 930		goto error_create;
 931	}
 932	vq->priv = info;
 933	num = virtqueue_get_vring_size(vq);
 934#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
 935	info->vq = vq;
 936#endif
 937
 938	if (vu_dev->protocol_features &
 939			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
 940		info->kick_fd = -1;
 941	} else {
 942		rc = os_eventfd(0, 0);
 943		if (rc < 0)
 944			goto error_kick;
 945		info->kick_fd = rc;
 946	}
 947
 948	rc = vu_setup_vq_call_fd(vu_dev, vq);
 949	if (rc)
 950		goto error_call;
 951
 952	rc = vhost_user_set_vring_num(vu_dev, index, num);
 953	if (rc)
 954		goto error_setup;
 955
 956	rc = vhost_user_set_vring_base(vu_dev, index, 0);
 957	if (rc)
 958		goto error_setup;
 959
 960	rc = vhost_user_set_vring_addr(vu_dev, index,
 961				       virtqueue_get_desc_addr(vq),
 962				       virtqueue_get_used_addr(vq),
 963				       virtqueue_get_avail_addr(vq),
 964				       (u64) -1);
 965	if (rc)
 966		goto error_setup;
 967
 968	return vq;
 969
 970error_setup:
 971	if (info->call_fd >= 0) {
 972		um_free_irq(VIRTIO_IRQ, vq);
 973		os_close_file(info->call_fd);
 974	}
 975error_call:
 976	if (info->kick_fd >= 0)
 977		os_close_file(info->kick_fd);
 978error_kick:
 979	vring_del_virtqueue(vq);
 980error_create:
 981	kfree(info);
 982error_kzalloc:
 983	return ERR_PTR(rc);
 984}
 985
 986static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 987		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
 988		       const char * const names[], const bool *ctx,
 989		       struct irq_affinity *desc)
 990{
 991	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
 992	int i, queue_idx = 0, rc;
 993	struct virtqueue *vq;
 994
 995	rc = vhost_user_set_mem_table(vu_dev);
 996	if (rc)
 997		return rc;
 998
 999	for (i = 0; i < nvqs; ++i) {
1000		if (!names[i]) {
1001			vqs[i] = NULL;
1002			continue;
1003		}
1004
1005		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1006				     ctx ? ctx[i] : false);
1007		if (IS_ERR(vqs[i])) {
1008			rc = PTR_ERR(vqs[i]);
1009			goto error_setup;
1010		}
1011	}
1012
1013	list_for_each_entry(vq, &vdev->vqs, list) {
1014		struct virtio_uml_vq_info *info = vq->priv;
1015
1016		if (info->kick_fd >= 0) {
1017			rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1018						       info->kick_fd);
1019			if (rc)
1020				goto error_setup;
1021		}
1022
1023		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1024		if (rc)
1025			goto error_setup;
1026	}
1027
1028	return 0;
1029
1030error_setup:
1031	vu_del_vqs(vdev);
1032	return rc;
1033}
1034
1035static u64 vu_get_features(struct virtio_device *vdev)
1036{
1037	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1038
1039	return vu_dev->features;
1040}
1041
1042static int vu_finalize_features(struct virtio_device *vdev)
1043{
1044	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1045	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1046
1047	vring_transport_features(vdev);
1048	vu_dev->features = vdev->features | supported;
1049
1050	return vhost_user_set_features(vu_dev, vu_dev->features);
1051}
1052
1053static const char *vu_bus_name(struct virtio_device *vdev)
1054{
1055	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1056
1057	return vu_dev->pdev->name;
1058}
1059
1060static const struct virtio_config_ops virtio_uml_config_ops = {
1061	.get = vu_get,
1062	.set = vu_set,
1063	.get_status = vu_get_status,
1064	.set_status = vu_set_status,
1065	.reset = vu_reset,
1066	.find_vqs = vu_find_vqs,
1067	.del_vqs = vu_del_vqs,
1068	.get_features = vu_get_features,
1069	.finalize_features = vu_finalize_features,
1070	.bus_name = vu_bus_name,
1071};
1072
1073static void virtio_uml_release_dev(struct device *d)
1074{
1075	struct virtio_device *vdev =
1076			container_of(d, struct virtio_device, dev);
1077	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1078
1079	/* might not have been opened due to not negotiating the feature */
1080	if (vu_dev->req_fd >= 0) {
1081		um_free_irq(VIRTIO_IRQ, vu_dev);
1082		os_close_file(vu_dev->req_fd);
1083	}
1084
1085	os_close_file(vu_dev->sock);
1086}
1087
1088/* Platform device */
1089
1090static int virtio_uml_probe(struct platform_device *pdev)
1091{
1092	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1093	struct virtio_uml_device *vu_dev;
1094	int rc;
1095
1096	if (!pdata)
1097		return -EINVAL;
1098
1099	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
1100	if (!vu_dev)
1101		return -ENOMEM;
1102
1103	vu_dev->vdev.dev.parent = &pdev->dev;
1104	vu_dev->vdev.dev.release = virtio_uml_release_dev;
1105	vu_dev->vdev.config = &virtio_uml_config_ops;
1106	vu_dev->vdev.id.device = pdata->virtio_device_id;
1107	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1108	vu_dev->pdev = pdev;
1109	vu_dev->req_fd = -1;
1110
1111	do {
1112		rc = os_connect_socket(pdata->socket_path);
1113	} while (rc == -EINTR);
1114	if (rc < 0)
1115		return rc;
1116	vu_dev->sock = rc;
1117
1118	spin_lock_init(&vu_dev->sock_lock);
1119
1120	rc = vhost_user_init(vu_dev);
1121	if (rc)
1122		goto error_init;
1123
1124	platform_set_drvdata(pdev, vu_dev);
1125
1126	rc = register_virtio_device(&vu_dev->vdev);
1127	if (rc)
1128		put_device(&vu_dev->vdev.dev);
1129	vu_dev->registered = 1;
1130	return rc;
1131
1132error_init:
1133	os_close_file(vu_dev->sock);
1134	return rc;
1135}
1136
1137static int virtio_uml_remove(struct platform_device *pdev)
1138{
1139	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1140
1141	unregister_virtio_device(&vu_dev->vdev);
1142	return 0;
1143}
1144
1145/* Command line device list */
1146
1147static void vu_cmdline_release_dev(struct device *d)
1148{
1149}
1150
1151static struct device vu_cmdline_parent = {
1152	.init_name = "virtio-uml-cmdline",
1153	.release = vu_cmdline_release_dev,
1154};
1155
1156static bool vu_cmdline_parent_registered;
1157static int vu_cmdline_id;
1158
1159static int vu_unregister_cmdline_device(struct device *dev, void *data)
1160{
1161	struct platform_device *pdev = to_platform_device(dev);
1162	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1163
1164	kfree(pdata->socket_path);
1165	platform_device_unregister(pdev);
1166	return 0;
1167}
1168
1169static void vu_conn_broken(struct work_struct *wk)
1170{
1171	struct virtio_uml_platform_data *pdata;
1172
1173	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1174	vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1175}
1176
1177static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1178{
1179	const char *ids = strchr(device, ':');
1180	unsigned int virtio_device_id;
1181	int processed, consumed, err;
1182	char *socket_path;
1183	struct virtio_uml_platform_data pdata, *ppdata;
1184	struct platform_device *pdev;
1185
1186	if (!ids || ids == device)
1187		return -EINVAL;
1188
1189	processed = sscanf(ids, ":%u%n:%d%n",
1190			   &virtio_device_id, &consumed,
1191			   &vu_cmdline_id, &consumed);
1192
1193	if (processed < 1 || ids[consumed])
1194		return -EINVAL;
1195
1196	if (!vu_cmdline_parent_registered) {
1197		err = device_register(&vu_cmdline_parent);
1198		if (err) {
1199			pr_err("Failed to register parent device!\n");
1200			put_device(&vu_cmdline_parent);
1201			return err;
1202		}
1203		vu_cmdline_parent_registered = true;
1204	}
1205
1206	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1207	if (!socket_path)
1208		return -ENOMEM;
1209
1210	pdata.virtio_device_id = (u32) virtio_device_id;
1211	pdata.socket_path = socket_path;
1212
1213	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1214		vu_cmdline_id, virtio_device_id, socket_path);
1215
1216	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1217					     vu_cmdline_id++, &pdata,
1218					     sizeof(pdata));
1219	err = PTR_ERR_OR_ZERO(pdev);
1220	if (err)
1221		goto free;
1222
1223	ppdata = pdev->dev.platform_data;
1224	ppdata->pdev = pdev;
1225	INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1226
1227	return 0;
1228
1229free:
1230	kfree(socket_path);
1231	return err;
1232}
1233
1234static int vu_cmdline_get_device(struct device *dev, void *data)
1235{
1236	struct platform_device *pdev = to_platform_device(dev);
1237	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1238	char *buffer = data;
1239	unsigned int len = strlen(buffer);
1240
1241	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1242		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1243	return 0;
1244}
1245
1246static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1247{
1248	buffer[0] = '\0';
1249	if (vu_cmdline_parent_registered)
1250		device_for_each_child(&vu_cmdline_parent, buffer,
1251				      vu_cmdline_get_device);
1252	return strlen(buffer) + 1;
1253}
1254
1255static const struct kernel_param_ops vu_cmdline_param_ops = {
1256	.set = vu_cmdline_set,
1257	.get = vu_cmdline_get,
1258};
1259
1260device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1261__uml_help(vu_cmdline_param_ops,
1262"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1263"    Configure a virtio device over a vhost-user socket.\n"
1264"    See virtio_ids.h for a list of possible virtio device id values.\n"
1265"    Optionally use a specific platform_device id.\n\n"
1266);
1267
1268
1269static void vu_unregister_cmdline_devices(void)
1270{
1271	if (vu_cmdline_parent_registered) {
1272		device_for_each_child(&vu_cmdline_parent, NULL,
1273				      vu_unregister_cmdline_device);
1274		device_unregister(&vu_cmdline_parent);
1275		vu_cmdline_parent_registered = false;
1276	}
1277}
1278
1279/* Platform driver */
1280
1281static const struct of_device_id virtio_uml_match[] = {
1282	{ .compatible = "virtio,uml", },
1283	{ }
1284};
1285MODULE_DEVICE_TABLE(of, virtio_uml_match);
1286
1287static struct platform_driver virtio_uml_driver = {
1288	.probe = virtio_uml_probe,
1289	.remove = virtio_uml_remove,
1290	.driver = {
1291		.name = "virtio-uml",
1292		.of_match_table = virtio_uml_match,
1293	},
1294};
1295
1296static int __init virtio_uml_init(void)
1297{
1298	return platform_driver_register(&virtio_uml_driver);
1299}
1300
1301static void __exit virtio_uml_exit(void)
1302{
1303	platform_driver_unregister(&virtio_uml_driver);
1304	vu_unregister_cmdline_devices();
1305}
1306
1307module_init(virtio_uml_init);
1308module_exit(virtio_uml_exit);
1309__uml_exitcall(virtio_uml_exit);
1310
1311MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1312MODULE_LICENSE("GPL");