Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Virtio vhost-user driver
4 *
5 * Copyright(c) 2019 Intel Corporation
6 *
7 * This driver allows virtio devices to be used over a vhost-user socket.
8 *
9 * Guest devices can be instantiated by kernel module or command line
10 * parameters. One device will be created for each parameter. Syntax:
11 *
12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13 * where:
14 * <socket> := vhost-user socket path to connect
15 * <virtio_id> := virtio device id (as in virtio_ids.h)
16 * <platform_id> := (optional) platform device id
17 *
18 * example:
19 * virtio_uml.device=/var/uml.socket:1
20 *
21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22 */
23#include <linux/module.h>
24#include <linux/of.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/virtio.h>
28#include <linux/virtio_config.h>
29#include <linux/virtio_ring.h>
30#include <linux/time-internal.h>
31#include <linux/virtio-uml.h>
32#include <shared/as-layout.h>
33#include <irq_kern.h>
34#include <init.h>
35#include <os.h>
36#include "vhost_user.h"
37
38#define MAX_SUPPORTED_QUEUE_SIZE 256
39
40#define to_virtio_uml_device(_vdev) \
41 container_of(_vdev, struct virtio_uml_device, vdev)
42
43struct virtio_uml_platform_data {
44 u32 virtio_device_id;
45 const char *socket_path;
46 struct work_struct conn_broken_wk;
47 struct platform_device *pdev;
48};
49
50struct virtio_uml_device {
51 struct virtio_device vdev;
52 struct platform_device *pdev;
53 struct virtio_uml_platform_data *pdata;
54
55 spinlock_t sock_lock;
56 int sock, req_fd, irq;
57 u64 features;
58 u64 protocol_features;
59 u8 status;
60 u8 registered:1;
61 u8 suspended:1;
62 u8 no_vq_suspend:1;
63
64 u8 config_changed_irq:1;
65 uint64_t vq_irq_vq_map;
66 int recv_rc;
67};
68
69struct virtio_uml_vq_info {
70 int kick_fd, call_fd;
71 char name[32];
72 bool suspended;
73};
74
75extern unsigned long long physmem_size, highmem;
76
77#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
78
79/* Vhost-user protocol */
80
81static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
82 const int *fds, unsigned int fds_num)
83{
84 int rc;
85
86 do {
87 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
88 if (rc > 0) {
89 buf += rc;
90 len -= rc;
91 fds = NULL;
92 fds_num = 0;
93 }
94 } while (len && (rc >= 0 || rc == -EINTR));
95
96 if (rc < 0)
97 return rc;
98 return 0;
99}
100
101static int full_read(int fd, void *buf, int len, bool abortable)
102{
103 int rc;
104
105 if (!len)
106 return 0;
107
108 do {
109 rc = os_read_file(fd, buf, len);
110 if (rc > 0) {
111 buf += rc;
112 len -= rc;
113 }
114 } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
115
116 if (rc < 0)
117 return rc;
118 if (rc == 0)
119 return -ECONNRESET;
120 return 0;
121}
122
123static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
124{
125 return full_read(fd, msg, sizeof(msg->header), true);
126}
127
128static int vhost_user_recv(struct virtio_uml_device *vu_dev,
129 int fd, struct vhost_user_msg *msg,
130 size_t max_payload_size, bool wait)
131{
132 size_t size;
133 int rc;
134
135 /*
136 * In virtio time-travel mode, we're handling all the vhost-user
137 * FDs by polling them whenever appropriate. However, we may get
138 * into a situation where we're sending out an interrupt message
139 * to a device (e.g. a net device) and need to handle a simulation
140 * time message while doing so, e.g. one that tells us to update
141 * our idea of how long we can run without scheduling.
142 *
143 * Thus, we need to not just read() from the given fd, but need
144 * to also handle messages for the simulation time - this function
145 * does that for us while waiting for the given fd to be readable.
146 */
147 if (wait)
148 time_travel_wait_readable(fd);
149
150 rc = vhost_user_recv_header(fd, msg);
151
152 if (rc)
153 return rc;
154 size = msg->header.size;
155 if (size > max_payload_size)
156 return -EPROTO;
157 return full_read(fd, &msg->payload, size, false);
158}
159
160static void vhost_user_check_reset(struct virtio_uml_device *vu_dev,
161 int rc)
162{
163 struct virtio_uml_platform_data *pdata = vu_dev->pdata;
164
165 if (rc != -ECONNRESET)
166 return;
167
168 if (!vu_dev->registered)
169 return;
170
171 virtio_break_device(&vu_dev->vdev);
172 schedule_work(&pdata->conn_broken_wk);
173}
174
175static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
176 struct vhost_user_msg *msg,
177 size_t max_payload_size)
178{
179 int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
180 max_payload_size, true);
181
182 if (rc) {
183 vhost_user_check_reset(vu_dev, rc);
184 return rc;
185 }
186
187 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
188 return -EPROTO;
189
190 return 0;
191}
192
193static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
194 u64 *value)
195{
196 struct vhost_user_msg msg;
197 int rc = vhost_user_recv_resp(vu_dev, &msg,
198 sizeof(msg.payload.integer));
199
200 if (rc)
201 return rc;
202 if (msg.header.size != sizeof(msg.payload.integer))
203 return -EPROTO;
204 *value = msg.payload.integer;
205 return 0;
206}
207
208static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
209 struct vhost_user_msg *msg,
210 size_t max_payload_size)
211{
212 int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
213 max_payload_size, false);
214
215 if (rc)
216 return rc;
217
218 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
219 VHOST_USER_VERSION)
220 return -EPROTO;
221
222 return 0;
223}
224
225static int vhost_user_send(struct virtio_uml_device *vu_dev,
226 bool need_response, struct vhost_user_msg *msg,
227 int *fds, size_t num_fds)
228{
229 size_t size = sizeof(msg->header) + msg->header.size;
230 unsigned long flags;
231 bool request_ack;
232 int rc;
233
234 msg->header.flags |= VHOST_USER_VERSION;
235
236 /*
237 * The need_response flag indicates that we already need a response,
238 * e.g. to read the features. In these cases, don't request an ACK as
239 * it is meaningless. Also request an ACK only if supported.
240 */
241 request_ack = !need_response;
242 if (!(vu_dev->protocol_features &
243 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
244 request_ack = false;
245
246 if (request_ack)
247 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
248
249 spin_lock_irqsave(&vu_dev->sock_lock, flags);
250 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
251 if (rc < 0)
252 goto out;
253
254 if (request_ack) {
255 uint64_t status;
256
257 rc = vhost_user_recv_u64(vu_dev, &status);
258 if (rc)
259 goto out;
260
261 if (status) {
262 vu_err(vu_dev, "slave reports error: %llu\n", status);
263 rc = -EIO;
264 goto out;
265 }
266 }
267
268out:
269 spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
270 return rc;
271}
272
273static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
274 bool need_response, u32 request)
275{
276 struct vhost_user_msg msg = {
277 .header.request = request,
278 };
279
280 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
281}
282
283static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
284 u32 request, int fd)
285{
286 struct vhost_user_msg msg = {
287 .header.request = request,
288 };
289
290 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
291}
292
293static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
294 u32 request, u64 value)
295{
296 struct vhost_user_msg msg = {
297 .header.request = request,
298 .header.size = sizeof(msg.payload.integer),
299 .payload.integer = value,
300 };
301
302 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
303}
304
305static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
306{
307 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
308}
309
310static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
311 u64 *features)
312{
313 int rc = vhost_user_send_no_payload(vu_dev, true,
314 VHOST_USER_GET_FEATURES);
315
316 if (rc)
317 return rc;
318 return vhost_user_recv_u64(vu_dev, features);
319}
320
321static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
322 u64 features)
323{
324 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
325}
326
327static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
328 u64 *protocol_features)
329{
330 int rc = vhost_user_send_no_payload(vu_dev, true,
331 VHOST_USER_GET_PROTOCOL_FEATURES);
332
333 if (rc)
334 return rc;
335 return vhost_user_recv_u64(vu_dev, protocol_features);
336}
337
338static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
339 u64 protocol_features)
340{
341 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
342 protocol_features);
343}
344
345static void vhost_user_reply(struct virtio_uml_device *vu_dev,
346 struct vhost_user_msg *msg, int response)
347{
348 struct vhost_user_msg reply = {
349 .payload.integer = response,
350 };
351 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
352 int rc;
353
354 reply.header = msg->header;
355 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
356 reply.header.flags |= VHOST_USER_FLAG_REPLY;
357 reply.header.size = sizeof(reply.payload.integer);
358
359 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
360
361 if (rc)
362 vu_err(vu_dev,
363 "sending reply to slave request failed: %d (size %zu)\n",
364 rc, size);
365}
366
367static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
368 struct time_travel_event *ev)
369{
370 struct virtqueue *vq;
371 int response = 1;
372 struct {
373 struct vhost_user_msg msg;
374 u8 extra_payload[512];
375 } msg;
376 int rc;
377 irqreturn_t irq_rc = IRQ_NONE;
378
379 while (1) {
380 rc = vhost_user_recv_req(vu_dev, &msg.msg,
381 sizeof(msg.msg.payload) +
382 sizeof(msg.extra_payload));
383 if (rc)
384 break;
385
386 switch (msg.msg.header.request) {
387 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
388 vu_dev->config_changed_irq = true;
389 response = 0;
390 break;
391 case VHOST_USER_SLAVE_VRING_CALL:
392 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
393 if (vq->index == msg.msg.payload.vring_state.index) {
394 response = 0;
395 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
396 break;
397 }
398 }
399 break;
400 case VHOST_USER_SLAVE_IOTLB_MSG:
401 /* not supported - VIRTIO_F_ACCESS_PLATFORM */
402 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
403 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
404 default:
405 vu_err(vu_dev, "unexpected slave request %d\n",
406 msg.msg.header.request);
407 }
408
409 if (ev && !vu_dev->suspended)
410 time_travel_add_irq_event(ev);
411
412 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
413 vhost_user_reply(vu_dev, &msg.msg, response);
414 irq_rc = IRQ_HANDLED;
415 };
416 /* mask EAGAIN as we try non-blocking read until socket is empty */
417 vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc;
418 return irq_rc;
419}
420
421static irqreturn_t vu_req_interrupt(int irq, void *data)
422{
423 struct virtio_uml_device *vu_dev = data;
424 irqreturn_t ret = IRQ_HANDLED;
425
426 if (!um_irq_timetravel_handler_used())
427 ret = vu_req_read_message(vu_dev, NULL);
428
429 if (vu_dev->recv_rc) {
430 vhost_user_check_reset(vu_dev, vu_dev->recv_rc);
431 } else if (vu_dev->vq_irq_vq_map) {
432 struct virtqueue *vq;
433
434 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
435 if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
436 vring_interrupt(0 /* ignored */, vq);
437 }
438 vu_dev->vq_irq_vq_map = 0;
439 } else if (vu_dev->config_changed_irq) {
440 virtio_config_changed(&vu_dev->vdev);
441 vu_dev->config_changed_irq = false;
442 }
443
444 return ret;
445}
446
447static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
448 struct time_travel_event *ev)
449{
450 vu_req_read_message(data, ev);
451}
452
453static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
454{
455 int rc, req_fds[2];
456
457 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
458 rc = os_pipe(req_fds, true, true);
459 if (rc < 0)
460 return rc;
461 vu_dev->req_fd = req_fds[0];
462
463 rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
464 vu_req_interrupt, IRQF_SHARED,
465 vu_dev->pdev->name, vu_dev,
466 vu_req_interrupt_comm_handler);
467 if (rc < 0)
468 goto err_close;
469
470 vu_dev->irq = rc;
471
472 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
473 req_fds[1]);
474 if (rc)
475 goto err_free_irq;
476
477 goto out;
478
479err_free_irq:
480 um_free_irq(vu_dev->irq, vu_dev);
481err_close:
482 os_close_file(req_fds[0]);
483out:
484 /* Close unused write end of request fds */
485 os_close_file(req_fds[1]);
486 return rc;
487}
488
489static int vhost_user_init(struct virtio_uml_device *vu_dev)
490{
491 int rc = vhost_user_set_owner(vu_dev);
492
493 if (rc)
494 return rc;
495 rc = vhost_user_get_features(vu_dev, &vu_dev->features);
496 if (rc)
497 return rc;
498
499 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
500 rc = vhost_user_get_protocol_features(vu_dev,
501 &vu_dev->protocol_features);
502 if (rc)
503 return rc;
504 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
505 rc = vhost_user_set_protocol_features(vu_dev,
506 vu_dev->protocol_features);
507 if (rc)
508 return rc;
509 }
510
511 if (vu_dev->protocol_features &
512 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
513 rc = vhost_user_init_slave_req(vu_dev);
514 if (rc)
515 return rc;
516 }
517
518 return 0;
519}
520
521static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
522 u32 offset, void *buf, u32 len)
523{
524 u32 cfg_size = offset + len;
525 struct vhost_user_msg *msg;
526 size_t payload_size = sizeof(msg->payload.config) + cfg_size;
527 size_t msg_size = sizeof(msg->header) + payload_size;
528 int rc;
529
530 if (!(vu_dev->protocol_features &
531 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
532 return;
533
534 msg = kzalloc(msg_size, GFP_KERNEL);
535 if (!msg)
536 return;
537 msg->header.request = VHOST_USER_GET_CONFIG;
538 msg->header.size = payload_size;
539 msg->payload.config.offset = 0;
540 msg->payload.config.size = cfg_size;
541
542 rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
543 if (rc) {
544 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
545 rc);
546 goto free;
547 }
548
549 rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
550 if (rc) {
551 vu_err(vu_dev,
552 "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
553 rc);
554 goto free;
555 }
556
557 if (msg->header.size != payload_size ||
558 msg->payload.config.size != cfg_size) {
559 rc = -EPROTO;
560 vu_err(vu_dev,
561 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
562 msg->header.size, payload_size,
563 msg->payload.config.size, cfg_size);
564 goto free;
565 }
566 memcpy(buf, msg->payload.config.payload + offset, len);
567
568free:
569 kfree(msg);
570}
571
572static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
573 u32 offset, const void *buf, u32 len)
574{
575 struct vhost_user_msg *msg;
576 size_t payload_size = sizeof(msg->payload.config) + len;
577 size_t msg_size = sizeof(msg->header) + payload_size;
578 int rc;
579
580 if (!(vu_dev->protocol_features &
581 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
582 return;
583
584 msg = kzalloc(msg_size, GFP_KERNEL);
585 if (!msg)
586 return;
587 msg->header.request = VHOST_USER_SET_CONFIG;
588 msg->header.size = payload_size;
589 msg->payload.config.offset = offset;
590 msg->payload.config.size = len;
591 memcpy(msg->payload.config.payload, buf, len);
592
593 rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
594 if (rc)
595 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
596 rc);
597
598 kfree(msg);
599}
600
601static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
602 struct vhost_user_mem_region *region_out)
603{
604 unsigned long long mem_offset;
605 int rc = phys_mapping(addr, &mem_offset);
606
607 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
608 return -EFAULT;
609 *fd_out = rc;
610 region_out->guest_addr = addr;
611 region_out->user_addr = addr;
612 region_out->size = size;
613 region_out->mmap_offset = mem_offset;
614
615 /* Ensure mapping is valid for the entire region */
616 rc = phys_mapping(addr + size - 1, &mem_offset);
617 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
618 addr + size - 1, rc, *fd_out))
619 return -EFAULT;
620 return 0;
621}
622
623static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
624{
625 struct vhost_user_msg msg = {
626 .header.request = VHOST_USER_SET_MEM_TABLE,
627 .header.size = sizeof(msg.payload.mem_regions),
628 .payload.mem_regions.num = 1,
629 };
630 unsigned long reserved = uml_reserved - uml_physmem;
631 int fds[2];
632 int rc;
633
634 /*
635 * This is a bit tricky, see also the comment with setup_physmem().
636 *
637 * Essentially, setup_physmem() uses a file to mmap() our physmem,
638 * but the code and data we *already* have is omitted. To us, this
639 * is no difference, since they both become part of our address
640 * space and memory consumption. To somebody looking in from the
641 * outside, however, it is different because the part of our memory
642 * consumption that's already part of the binary (code/data) is not
643 * mapped from the file, so it's not visible to another mmap from
644 * the file descriptor.
645 *
646 * Thus, don't advertise this space to the vhost-user slave. This
647 * means that the slave will likely abort or similar when we give
648 * it an address from the hidden range, since it's not marked as
649 * a valid address, but at least that way we detect the issue and
650 * don't just have the slave read an all-zeroes buffer from the
651 * shared memory file, or write something there that we can never
652 * see (depending on the direction of the virtqueue traffic.)
653 *
654 * Since we usually don't want to use .text for virtio buffers,
655 * this effectively means that you cannot use
656 * 1) global variables, which are in the .bss and not in the shm
657 * file-backed memory
658 * 2) the stack in some processes, depending on where they have
659 * their stack (or maybe only no interrupt stack?)
660 *
661 * The stack is already not typically valid for DMA, so this isn't
662 * much of a restriction, but global variables might be encountered.
663 *
664 * It might be possible to fix it by copying around the data that's
665 * between bss_start and where we map the file now, but it's not
666 * something that you typically encounter with virtio drivers, so
667 * it didn't seem worthwhile.
668 */
669 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
670 &fds[0],
671 &msg.payload.mem_regions.regions[0]);
672
673 if (rc < 0)
674 return rc;
675 if (highmem) {
676 msg.payload.mem_regions.num++;
677 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
678 &fds[1], &msg.payload.mem_regions.regions[1]);
679 if (rc < 0)
680 return rc;
681 }
682
683 return vhost_user_send(vu_dev, false, &msg, fds,
684 msg.payload.mem_regions.num);
685}
686
687static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
688 u32 request, u32 index, u32 num)
689{
690 struct vhost_user_msg msg = {
691 .header.request = request,
692 .header.size = sizeof(msg.payload.vring_state),
693 .payload.vring_state.index = index,
694 .payload.vring_state.num = num,
695 };
696
697 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
698}
699
700static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
701 u32 index, u32 num)
702{
703 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
704 index, num);
705}
706
707static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
708 u32 index, u32 offset)
709{
710 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
711 index, offset);
712}
713
714static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
715 u32 index, u64 desc, u64 used, u64 avail,
716 u64 log)
717{
718 struct vhost_user_msg msg = {
719 .header.request = VHOST_USER_SET_VRING_ADDR,
720 .header.size = sizeof(msg.payload.vring_addr),
721 .payload.vring_addr.index = index,
722 .payload.vring_addr.desc = desc,
723 .payload.vring_addr.used = used,
724 .payload.vring_addr.avail = avail,
725 .payload.vring_addr.log = log,
726 };
727
728 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
729}
730
731static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
732 u32 request, int index, int fd)
733{
734 struct vhost_user_msg msg = {
735 .header.request = request,
736 .header.size = sizeof(msg.payload.integer),
737 .payload.integer = index,
738 };
739
740 if (index & ~VHOST_USER_VRING_INDEX_MASK)
741 return -EINVAL;
742 if (fd < 0) {
743 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
744 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
745 }
746 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
747}
748
749static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
750 int index, int fd)
751{
752 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
753 index, fd);
754}
755
756static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
757 int index, int fd)
758{
759 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
760 index, fd);
761}
762
763static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
764 u32 index, bool enable)
765{
766 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
767 return 0;
768
769 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
770 index, enable);
771}
772
773
774/* Virtio interface */
775
776static bool vu_notify(struct virtqueue *vq)
777{
778 struct virtio_uml_vq_info *info = vq->priv;
779 const uint64_t n = 1;
780 int rc;
781
782 if (info->suspended)
783 return true;
784
785 time_travel_propagate_time();
786
787 if (info->kick_fd < 0) {
788 struct virtio_uml_device *vu_dev;
789
790 vu_dev = to_virtio_uml_device(vq->vdev);
791
792 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
793 vq->index, 0) == 0;
794 }
795
796 do {
797 rc = os_write_file(info->kick_fd, &n, sizeof(n));
798 } while (rc == -EINTR);
799 return !WARN(rc != sizeof(n), "write returned %d\n", rc);
800}
801
802static irqreturn_t vu_interrupt(int irq, void *opaque)
803{
804 struct virtqueue *vq = opaque;
805 struct virtio_uml_vq_info *info = vq->priv;
806 uint64_t n;
807 int rc;
808 irqreturn_t ret = IRQ_NONE;
809
810 do {
811 rc = os_read_file(info->call_fd, &n, sizeof(n));
812 if (rc == sizeof(n))
813 ret |= vring_interrupt(irq, vq);
814 } while (rc == sizeof(n) || rc == -EINTR);
815 WARN(rc != -EAGAIN, "read returned %d\n", rc);
816 return ret;
817}
818
819
820static void vu_get(struct virtio_device *vdev, unsigned offset,
821 void *buf, unsigned len)
822{
823 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
824
825 vhost_user_get_config(vu_dev, offset, buf, len);
826}
827
828static void vu_set(struct virtio_device *vdev, unsigned offset,
829 const void *buf, unsigned len)
830{
831 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
832
833 vhost_user_set_config(vu_dev, offset, buf, len);
834}
835
836static u8 vu_get_status(struct virtio_device *vdev)
837{
838 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
839
840 return vu_dev->status;
841}
842
843static void vu_set_status(struct virtio_device *vdev, u8 status)
844{
845 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
846
847 vu_dev->status = status;
848}
849
850static void vu_reset(struct virtio_device *vdev)
851{
852 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
853
854 vu_dev->status = 0;
855}
856
857static void vu_del_vq(struct virtqueue *vq)
858{
859 struct virtio_uml_vq_info *info = vq->priv;
860
861 if (info->call_fd >= 0) {
862 struct virtio_uml_device *vu_dev;
863
864 vu_dev = to_virtio_uml_device(vq->vdev);
865
866 um_free_irq(vu_dev->irq, vq);
867 os_close_file(info->call_fd);
868 }
869
870 if (info->kick_fd >= 0)
871 os_close_file(info->kick_fd);
872
873 vring_del_virtqueue(vq);
874 kfree(info);
875}
876
877static void vu_del_vqs(struct virtio_device *vdev)
878{
879 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
880 struct virtqueue *vq, *n;
881 u64 features;
882
883 /* Note: reverse order as a workaround to a decoding bug in snabb */
884 list_for_each_entry_reverse(vq, &vdev->vqs, list)
885 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
886
887 /* Ensure previous messages have been processed */
888 WARN_ON(vhost_user_get_features(vu_dev, &features));
889
890 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
891 vu_del_vq(vq);
892}
893
894static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
895 struct virtqueue *vq)
896{
897 struct virtio_uml_vq_info *info = vq->priv;
898 int call_fds[2];
899 int rc;
900
901 /* no call FD needed/desired in this case */
902 if (vu_dev->protocol_features &
903 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
904 vu_dev->protocol_features &
905 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
906 info->call_fd = -1;
907 return 0;
908 }
909
910 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
911 rc = os_pipe(call_fds, true, true);
912 if (rc < 0)
913 return rc;
914
915 info->call_fd = call_fds[0];
916 rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
917 vu_interrupt, IRQF_SHARED, info->name, vq);
918 if (rc < 0)
919 goto close_both;
920
921 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
922 if (rc)
923 goto release_irq;
924
925 goto out;
926
927release_irq:
928 um_free_irq(vu_dev->irq, vq);
929close_both:
930 os_close_file(call_fds[0]);
931out:
932 /* Close (unused) write end of call fds */
933 os_close_file(call_fds[1]);
934
935 return rc;
936}
937
938static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
939 unsigned index, vq_callback_t *callback,
940 const char *name, bool ctx)
941{
942 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
943 struct platform_device *pdev = vu_dev->pdev;
944 struct virtio_uml_vq_info *info;
945 struct virtqueue *vq;
946 int num = MAX_SUPPORTED_QUEUE_SIZE;
947 int rc;
948
949 info = kzalloc(sizeof(*info), GFP_KERNEL);
950 if (!info) {
951 rc = -ENOMEM;
952 goto error_kzalloc;
953 }
954 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
955 pdev->id, name);
956
957 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
958 ctx, vu_notify, callback, info->name);
959 if (!vq) {
960 rc = -ENOMEM;
961 goto error_create;
962 }
963 vq->priv = info;
964 vq->num_max = num;
965 num = virtqueue_get_vring_size(vq);
966
967 if (vu_dev->protocol_features &
968 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
969 info->kick_fd = -1;
970 } else {
971 rc = os_eventfd(0, 0);
972 if (rc < 0)
973 goto error_kick;
974 info->kick_fd = rc;
975 }
976
977 rc = vu_setup_vq_call_fd(vu_dev, vq);
978 if (rc)
979 goto error_call;
980
981 rc = vhost_user_set_vring_num(vu_dev, index, num);
982 if (rc)
983 goto error_setup;
984
985 rc = vhost_user_set_vring_base(vu_dev, index, 0);
986 if (rc)
987 goto error_setup;
988
989 rc = vhost_user_set_vring_addr(vu_dev, index,
990 virtqueue_get_desc_addr(vq),
991 virtqueue_get_used_addr(vq),
992 virtqueue_get_avail_addr(vq),
993 (u64) -1);
994 if (rc)
995 goto error_setup;
996
997 return vq;
998
999error_setup:
1000 if (info->call_fd >= 0) {
1001 um_free_irq(vu_dev->irq, vq);
1002 os_close_file(info->call_fd);
1003 }
1004error_call:
1005 if (info->kick_fd >= 0)
1006 os_close_file(info->kick_fd);
1007error_kick:
1008 vring_del_virtqueue(vq);
1009error_create:
1010 kfree(info);
1011error_kzalloc:
1012 return ERR_PTR(rc);
1013}
1014
1015static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
1016 struct virtqueue *vqs[], vq_callback_t *callbacks[],
1017 const char * const names[], const bool *ctx,
1018 struct irq_affinity *desc)
1019{
1020 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1021 int i, queue_idx = 0, rc;
1022 struct virtqueue *vq;
1023
1024 /* not supported for now */
1025 if (WARN_ON(nvqs > 64))
1026 return -EINVAL;
1027
1028 rc = vhost_user_set_mem_table(vu_dev);
1029 if (rc)
1030 return rc;
1031
1032 for (i = 0; i < nvqs; ++i) {
1033 if (!names[i]) {
1034 vqs[i] = NULL;
1035 continue;
1036 }
1037
1038 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1039 ctx ? ctx[i] : false);
1040 if (IS_ERR(vqs[i])) {
1041 rc = PTR_ERR(vqs[i]);
1042 goto error_setup;
1043 }
1044 }
1045
1046 list_for_each_entry(vq, &vdev->vqs, list) {
1047 struct virtio_uml_vq_info *info = vq->priv;
1048
1049 if (info->kick_fd >= 0) {
1050 rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1051 info->kick_fd);
1052 if (rc)
1053 goto error_setup;
1054 }
1055
1056 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1057 if (rc)
1058 goto error_setup;
1059 }
1060
1061 return 0;
1062
1063error_setup:
1064 vu_del_vqs(vdev);
1065 return rc;
1066}
1067
1068static u64 vu_get_features(struct virtio_device *vdev)
1069{
1070 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1071
1072 return vu_dev->features;
1073}
1074
1075static int vu_finalize_features(struct virtio_device *vdev)
1076{
1077 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1078 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1079
1080 vring_transport_features(vdev);
1081 vu_dev->features = vdev->features | supported;
1082
1083 return vhost_user_set_features(vu_dev, vu_dev->features);
1084}
1085
1086static const char *vu_bus_name(struct virtio_device *vdev)
1087{
1088 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1089
1090 return vu_dev->pdev->name;
1091}
1092
1093static const struct virtio_config_ops virtio_uml_config_ops = {
1094 .get = vu_get,
1095 .set = vu_set,
1096 .get_status = vu_get_status,
1097 .set_status = vu_set_status,
1098 .reset = vu_reset,
1099 .find_vqs = vu_find_vqs,
1100 .del_vqs = vu_del_vqs,
1101 .get_features = vu_get_features,
1102 .finalize_features = vu_finalize_features,
1103 .bus_name = vu_bus_name,
1104};
1105
1106static void virtio_uml_release_dev(struct device *d)
1107{
1108 struct virtio_device *vdev =
1109 container_of(d, struct virtio_device, dev);
1110 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1111
1112 time_travel_propagate_time();
1113
1114 /* might not have been opened due to not negotiating the feature */
1115 if (vu_dev->req_fd >= 0) {
1116 um_free_irq(vu_dev->irq, vu_dev);
1117 os_close_file(vu_dev->req_fd);
1118 }
1119
1120 os_close_file(vu_dev->sock);
1121 kfree(vu_dev);
1122}
1123
1124void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
1125 bool no_vq_suspend)
1126{
1127 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1128
1129 if (WARN_ON(vdev->config != &virtio_uml_config_ops))
1130 return;
1131
1132 vu_dev->no_vq_suspend = no_vq_suspend;
1133 dev_info(&vdev->dev, "%sabled VQ suspend\n",
1134 no_vq_suspend ? "dis" : "en");
1135}
1136
1137static void vu_of_conn_broken(struct work_struct *wk)
1138{
1139 /*
1140 * We can't remove the device from the devicetree so the only thing we
1141 * can do is warn.
1142 */
1143 WARN_ON(1);
1144}
1145
1146/* Platform device */
1147
1148static struct virtio_uml_platform_data *
1149virtio_uml_create_pdata(struct platform_device *pdev)
1150{
1151 struct device_node *np = pdev->dev.of_node;
1152 struct virtio_uml_platform_data *pdata;
1153 int ret;
1154
1155 if (!np)
1156 return ERR_PTR(-EINVAL);
1157
1158 pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
1159 if (!pdata)
1160 return ERR_PTR(-ENOMEM);
1161
1162 INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken);
1163 pdata->pdev = pdev;
1164
1165 ret = of_property_read_string(np, "socket-path", &pdata->socket_path);
1166 if (ret)
1167 return ERR_PTR(ret);
1168
1169 ret = of_property_read_u32(np, "virtio-device-id",
1170 &pdata->virtio_device_id);
1171 if (ret)
1172 return ERR_PTR(ret);
1173
1174 return pdata;
1175}
1176
1177static int virtio_uml_probe(struct platform_device *pdev)
1178{
1179 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1180 struct virtio_uml_device *vu_dev;
1181 int rc;
1182
1183 if (!pdata) {
1184 pdata = virtio_uml_create_pdata(pdev);
1185 if (IS_ERR(pdata))
1186 return PTR_ERR(pdata);
1187 }
1188
1189 vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
1190 if (!vu_dev)
1191 return -ENOMEM;
1192
1193 vu_dev->pdata = pdata;
1194 vu_dev->vdev.dev.parent = &pdev->dev;
1195 vu_dev->vdev.dev.release = virtio_uml_release_dev;
1196 vu_dev->vdev.config = &virtio_uml_config_ops;
1197 vu_dev->vdev.id.device = pdata->virtio_device_id;
1198 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1199 vu_dev->pdev = pdev;
1200 vu_dev->req_fd = -1;
1201
1202 time_travel_propagate_time();
1203
1204 do {
1205 rc = os_connect_socket(pdata->socket_path);
1206 } while (rc == -EINTR);
1207 if (rc < 0)
1208 goto error_free;
1209 vu_dev->sock = rc;
1210
1211 spin_lock_init(&vu_dev->sock_lock);
1212
1213 rc = vhost_user_init(vu_dev);
1214 if (rc)
1215 goto error_init;
1216
1217 platform_set_drvdata(pdev, vu_dev);
1218
1219 device_set_wakeup_capable(&vu_dev->vdev.dev, true);
1220
1221 rc = register_virtio_device(&vu_dev->vdev);
1222 if (rc)
1223 put_device(&vu_dev->vdev.dev);
1224 vu_dev->registered = 1;
1225 return rc;
1226
1227error_init:
1228 os_close_file(vu_dev->sock);
1229error_free:
1230 kfree(vu_dev);
1231 return rc;
1232}
1233
1234static int virtio_uml_remove(struct platform_device *pdev)
1235{
1236 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1237
1238 unregister_virtio_device(&vu_dev->vdev);
1239 return 0;
1240}
1241
1242/* Command line device list */
1243
1244static void vu_cmdline_release_dev(struct device *d)
1245{
1246}
1247
1248static struct device vu_cmdline_parent = {
1249 .init_name = "virtio-uml-cmdline",
1250 .release = vu_cmdline_release_dev,
1251};
1252
1253static bool vu_cmdline_parent_registered;
1254static int vu_cmdline_id;
1255
1256static int vu_unregister_cmdline_device(struct device *dev, void *data)
1257{
1258 struct platform_device *pdev = to_platform_device(dev);
1259 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1260
1261 kfree(pdata->socket_path);
1262 platform_device_unregister(pdev);
1263 return 0;
1264}
1265
1266static void vu_conn_broken(struct work_struct *wk)
1267{
1268 struct virtio_uml_platform_data *pdata;
1269
1270 pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1271 vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1272}
1273
1274static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1275{
1276 const char *ids = strchr(device, ':');
1277 unsigned int virtio_device_id;
1278 int processed, consumed, err;
1279 char *socket_path;
1280 struct virtio_uml_platform_data pdata, *ppdata;
1281 struct platform_device *pdev;
1282
1283 if (!ids || ids == device)
1284 return -EINVAL;
1285
1286 processed = sscanf(ids, ":%u%n:%d%n",
1287 &virtio_device_id, &consumed,
1288 &vu_cmdline_id, &consumed);
1289
1290 if (processed < 1 || ids[consumed])
1291 return -EINVAL;
1292
1293 if (!vu_cmdline_parent_registered) {
1294 err = device_register(&vu_cmdline_parent);
1295 if (err) {
1296 pr_err("Failed to register parent device!\n");
1297 put_device(&vu_cmdline_parent);
1298 return err;
1299 }
1300 vu_cmdline_parent_registered = true;
1301 }
1302
1303 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1304 if (!socket_path)
1305 return -ENOMEM;
1306
1307 pdata.virtio_device_id = (u32) virtio_device_id;
1308 pdata.socket_path = socket_path;
1309
1310 pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1311 vu_cmdline_id, virtio_device_id, socket_path);
1312
1313 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1314 vu_cmdline_id++, &pdata,
1315 sizeof(pdata));
1316 err = PTR_ERR_OR_ZERO(pdev);
1317 if (err)
1318 goto free;
1319
1320 ppdata = pdev->dev.platform_data;
1321 ppdata->pdev = pdev;
1322 INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1323
1324 return 0;
1325
1326free:
1327 kfree(socket_path);
1328 return err;
1329}
1330
1331static int vu_cmdline_get_device(struct device *dev, void *data)
1332{
1333 struct platform_device *pdev = to_platform_device(dev);
1334 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1335 char *buffer = data;
1336 unsigned int len = strlen(buffer);
1337
1338 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1339 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1340 return 0;
1341}
1342
1343static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1344{
1345 buffer[0] = '\0';
1346 if (vu_cmdline_parent_registered)
1347 device_for_each_child(&vu_cmdline_parent, buffer,
1348 vu_cmdline_get_device);
1349 return strlen(buffer) + 1;
1350}
1351
1352static const struct kernel_param_ops vu_cmdline_param_ops = {
1353 .set = vu_cmdline_set,
1354 .get = vu_cmdline_get,
1355};
1356
1357device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1358__uml_help(vu_cmdline_param_ops,
1359"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1360" Configure a virtio device over a vhost-user socket.\n"
1361" See virtio_ids.h for a list of possible virtio device id values.\n"
1362" Optionally use a specific platform_device id.\n\n"
1363);
1364
1365
1366static void vu_unregister_cmdline_devices(void)
1367{
1368 if (vu_cmdline_parent_registered) {
1369 device_for_each_child(&vu_cmdline_parent, NULL,
1370 vu_unregister_cmdline_device);
1371 device_unregister(&vu_cmdline_parent);
1372 vu_cmdline_parent_registered = false;
1373 }
1374}
1375
1376/* Platform driver */
1377
1378static const struct of_device_id virtio_uml_match[] = {
1379 { .compatible = "virtio,uml", },
1380 { }
1381};
1382MODULE_DEVICE_TABLE(of, virtio_uml_match);
1383
1384static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
1385{
1386 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1387
1388 if (!vu_dev->no_vq_suspend) {
1389 struct virtqueue *vq;
1390
1391 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1392 struct virtio_uml_vq_info *info = vq->priv;
1393
1394 info->suspended = true;
1395 vhost_user_set_vring_enable(vu_dev, vq->index, false);
1396 }
1397 }
1398
1399 if (!device_may_wakeup(&vu_dev->vdev.dev)) {
1400 vu_dev->suspended = true;
1401 return 0;
1402 }
1403
1404 return irq_set_irq_wake(vu_dev->irq, 1);
1405}
1406
1407static int virtio_uml_resume(struct platform_device *pdev)
1408{
1409 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1410
1411 if (!vu_dev->no_vq_suspend) {
1412 struct virtqueue *vq;
1413
1414 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1415 struct virtio_uml_vq_info *info = vq->priv;
1416
1417 info->suspended = false;
1418 vhost_user_set_vring_enable(vu_dev, vq->index, true);
1419 }
1420 }
1421
1422 vu_dev->suspended = false;
1423
1424 if (!device_may_wakeup(&vu_dev->vdev.dev))
1425 return 0;
1426
1427 return irq_set_irq_wake(vu_dev->irq, 0);
1428}
1429
1430static struct platform_driver virtio_uml_driver = {
1431 .probe = virtio_uml_probe,
1432 .remove = virtio_uml_remove,
1433 .driver = {
1434 .name = "virtio-uml",
1435 .of_match_table = virtio_uml_match,
1436 },
1437 .suspend = virtio_uml_suspend,
1438 .resume = virtio_uml_resume,
1439};
1440
1441static int __init virtio_uml_init(void)
1442{
1443 return platform_driver_register(&virtio_uml_driver);
1444}
1445
1446static void __exit virtio_uml_exit(void)
1447{
1448 platform_driver_unregister(&virtio_uml_driver);
1449 vu_unregister_cmdline_devices();
1450}
1451
1452module_init(virtio_uml_init);
1453module_exit(virtio_uml_exit);
1454__uml_exitcall(virtio_uml_exit);
1455
1456MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1457MODULE_LICENSE("GPL");
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Virtio vhost-user driver
4 *
5 * Copyright(c) 2019 Intel Corporation
6 *
7 * This driver allows virtio devices to be used over a vhost-user socket.
8 *
9 * Guest devices can be instantiated by kernel module or command line
10 * parameters. One device will be created for each parameter. Syntax:
11 *
12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13 * where:
14 * <socket> := vhost-user socket path to connect
15 * <virtio_id> := virtio device id (as in virtio_ids.h)
16 * <platform_id> := (optional) platform device id
17 *
18 * example:
19 * virtio_uml.device=/var/uml.socket:1
20 *
21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22 */
23#include <linux/module.h>
24#include <linux/platform_device.h>
25#include <linux/slab.h>
26#include <linux/virtio.h>
27#include <linux/virtio_config.h>
28#include <linux/virtio_ring.h>
29#include <linux/time-internal.h>
30#include <shared/as-layout.h>
31#include <irq_kern.h>
32#include <init.h>
33#include <os.h>
34#include "vhost_user.h"
35
36#define MAX_SUPPORTED_QUEUE_SIZE 256
37
38#define to_virtio_uml_device(_vdev) \
39 container_of(_vdev, struct virtio_uml_device, vdev)
40
41struct virtio_uml_platform_data {
42 u32 virtio_device_id;
43 const char *socket_path;
44 struct work_struct conn_broken_wk;
45 struct platform_device *pdev;
46};
47
48struct virtio_uml_device {
49 struct virtio_device vdev;
50 struct platform_device *pdev;
51
52 spinlock_t sock_lock;
53 int sock, req_fd, irq;
54 u64 features;
55 u64 protocol_features;
56 u8 status;
57 u8 registered:1;
58 u8 suspended:1;
59 u8 no_vq_suspend:1;
60
61 u8 config_changed_irq:1;
62 uint64_t vq_irq_vq_map;
63};
64
65struct virtio_uml_vq_info {
66 int kick_fd, call_fd;
67 char name[32];
68 bool suspended;
69};
70
71extern unsigned long long physmem_size, highmem;
72
73#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
74
75/* Vhost-user protocol */
76
77static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
78 const int *fds, unsigned int fds_num)
79{
80 int rc;
81
82 do {
83 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
84 if (rc > 0) {
85 buf += rc;
86 len -= rc;
87 fds = NULL;
88 fds_num = 0;
89 }
90 } while (len && (rc >= 0 || rc == -EINTR));
91
92 if (rc < 0)
93 return rc;
94 return 0;
95}
96
97static int full_read(int fd, void *buf, int len, bool abortable)
98{
99 int rc;
100
101 if (!len)
102 return 0;
103
104 do {
105 rc = os_read_file(fd, buf, len);
106 if (rc > 0) {
107 buf += rc;
108 len -= rc;
109 }
110 } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
111
112 if (rc < 0)
113 return rc;
114 if (rc == 0)
115 return -ECONNRESET;
116 return 0;
117}
118
119static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
120{
121 return full_read(fd, msg, sizeof(msg->header), true);
122}
123
124static int vhost_user_recv(struct virtio_uml_device *vu_dev,
125 int fd, struct vhost_user_msg *msg,
126 size_t max_payload_size, bool wait)
127{
128 size_t size;
129 int rc;
130
131 /*
132 * In virtio time-travel mode, we're handling all the vhost-user
133 * FDs by polling them whenever appropriate. However, we may get
134 * into a situation where we're sending out an interrupt message
135 * to a device (e.g. a net device) and need to handle a simulation
136 * time message while doing so, e.g. one that tells us to update
137 * our idea of how long we can run without scheduling.
138 *
139 * Thus, we need to not just read() from the given fd, but need
140 * to also handle messages for the simulation time - this function
141 * does that for us while waiting for the given fd to be readable.
142 */
143 if (wait)
144 time_travel_wait_readable(fd);
145
146 rc = vhost_user_recv_header(fd, msg);
147
148 if (rc == -ECONNRESET && vu_dev->registered) {
149 struct virtio_uml_platform_data *pdata;
150
151 pdata = vu_dev->pdev->dev.platform_data;
152
153 virtio_break_device(&vu_dev->vdev);
154 schedule_work(&pdata->conn_broken_wk);
155 }
156 if (rc)
157 return rc;
158 size = msg->header.size;
159 if (size > max_payload_size)
160 return -EPROTO;
161 return full_read(fd, &msg->payload, size, false);
162}
163
164static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
165 struct vhost_user_msg *msg,
166 size_t max_payload_size)
167{
168 int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
169 max_payload_size, true);
170
171 if (rc)
172 return rc;
173
174 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
175 return -EPROTO;
176
177 return 0;
178}
179
180static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
181 u64 *value)
182{
183 struct vhost_user_msg msg;
184 int rc = vhost_user_recv_resp(vu_dev, &msg,
185 sizeof(msg.payload.integer));
186
187 if (rc)
188 return rc;
189 if (msg.header.size != sizeof(msg.payload.integer))
190 return -EPROTO;
191 *value = msg.payload.integer;
192 return 0;
193}
194
195static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
196 struct vhost_user_msg *msg,
197 size_t max_payload_size)
198{
199 int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
200 max_payload_size, false);
201
202 if (rc)
203 return rc;
204
205 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
206 VHOST_USER_VERSION)
207 return -EPROTO;
208
209 return 0;
210}
211
212static int vhost_user_send(struct virtio_uml_device *vu_dev,
213 bool need_response, struct vhost_user_msg *msg,
214 int *fds, size_t num_fds)
215{
216 size_t size = sizeof(msg->header) + msg->header.size;
217 unsigned long flags;
218 bool request_ack;
219 int rc;
220
221 msg->header.flags |= VHOST_USER_VERSION;
222
223 /*
224 * The need_response flag indicates that we already need a response,
225 * e.g. to read the features. In these cases, don't request an ACK as
226 * it is meaningless. Also request an ACK only if supported.
227 */
228 request_ack = !need_response;
229 if (!(vu_dev->protocol_features &
230 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
231 request_ack = false;
232
233 if (request_ack)
234 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
235
236 spin_lock_irqsave(&vu_dev->sock_lock, flags);
237 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
238 if (rc < 0)
239 goto out;
240
241 if (request_ack) {
242 uint64_t status;
243
244 rc = vhost_user_recv_u64(vu_dev, &status);
245 if (rc)
246 goto out;
247
248 if (status) {
249 vu_err(vu_dev, "slave reports error: %llu\n", status);
250 rc = -EIO;
251 goto out;
252 }
253 }
254
255out:
256 spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
257 return rc;
258}
259
260static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
261 bool need_response, u32 request)
262{
263 struct vhost_user_msg msg = {
264 .header.request = request,
265 };
266
267 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
268}
269
270static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
271 u32 request, int fd)
272{
273 struct vhost_user_msg msg = {
274 .header.request = request,
275 };
276
277 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
278}
279
280static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
281 u32 request, u64 value)
282{
283 struct vhost_user_msg msg = {
284 .header.request = request,
285 .header.size = sizeof(msg.payload.integer),
286 .payload.integer = value,
287 };
288
289 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
290}
291
292static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
293{
294 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
295}
296
297static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
298 u64 *features)
299{
300 int rc = vhost_user_send_no_payload(vu_dev, true,
301 VHOST_USER_GET_FEATURES);
302
303 if (rc)
304 return rc;
305 return vhost_user_recv_u64(vu_dev, features);
306}
307
308static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
309 u64 features)
310{
311 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
312}
313
314static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
315 u64 *protocol_features)
316{
317 int rc = vhost_user_send_no_payload(vu_dev, true,
318 VHOST_USER_GET_PROTOCOL_FEATURES);
319
320 if (rc)
321 return rc;
322 return vhost_user_recv_u64(vu_dev, protocol_features);
323}
324
325static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
326 u64 protocol_features)
327{
328 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
329 protocol_features);
330}
331
332static void vhost_user_reply(struct virtio_uml_device *vu_dev,
333 struct vhost_user_msg *msg, int response)
334{
335 struct vhost_user_msg reply = {
336 .payload.integer = response,
337 };
338 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
339 int rc;
340
341 reply.header = msg->header;
342 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
343 reply.header.flags |= VHOST_USER_FLAG_REPLY;
344 reply.header.size = sizeof(reply.payload.integer);
345
346 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
347
348 if (rc)
349 vu_err(vu_dev,
350 "sending reply to slave request failed: %d (size %zu)\n",
351 rc, size);
352}
353
354static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
355 struct time_travel_event *ev)
356{
357 struct virtqueue *vq;
358 int response = 1;
359 struct {
360 struct vhost_user_msg msg;
361 u8 extra_payload[512];
362 } msg;
363 int rc;
364
365 rc = vhost_user_recv_req(vu_dev, &msg.msg,
366 sizeof(msg.msg.payload) +
367 sizeof(msg.extra_payload));
368
369 if (rc)
370 return IRQ_NONE;
371
372 switch (msg.msg.header.request) {
373 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
374 vu_dev->config_changed_irq = true;
375 response = 0;
376 break;
377 case VHOST_USER_SLAVE_VRING_CALL:
378 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
379 if (vq->index == msg.msg.payload.vring_state.index) {
380 response = 0;
381 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
382 break;
383 }
384 }
385 break;
386 case VHOST_USER_SLAVE_IOTLB_MSG:
387 /* not supported - VIRTIO_F_ACCESS_PLATFORM */
388 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
389 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
390 default:
391 vu_err(vu_dev, "unexpected slave request %d\n",
392 msg.msg.header.request);
393 }
394
395 if (ev && !vu_dev->suspended)
396 time_travel_add_irq_event(ev);
397
398 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
399 vhost_user_reply(vu_dev, &msg.msg, response);
400
401 return IRQ_HANDLED;
402}
403
404static irqreturn_t vu_req_interrupt(int irq, void *data)
405{
406 struct virtio_uml_device *vu_dev = data;
407 irqreturn_t ret = IRQ_HANDLED;
408
409 if (!um_irq_timetravel_handler_used())
410 ret = vu_req_read_message(vu_dev, NULL);
411
412 if (vu_dev->vq_irq_vq_map) {
413 struct virtqueue *vq;
414
415 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
416 if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
417 vring_interrupt(0 /* ignored */, vq);
418 }
419 vu_dev->vq_irq_vq_map = 0;
420 } else if (vu_dev->config_changed_irq) {
421 virtio_config_changed(&vu_dev->vdev);
422 vu_dev->config_changed_irq = false;
423 }
424
425 return ret;
426}
427
428static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
429 struct time_travel_event *ev)
430{
431 vu_req_read_message(data, ev);
432}
433
434static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
435{
436 int rc, req_fds[2];
437
438 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
439 rc = os_pipe(req_fds, true, true);
440 if (rc < 0)
441 return rc;
442 vu_dev->req_fd = req_fds[0];
443
444 rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
445 vu_req_interrupt, IRQF_SHARED,
446 vu_dev->pdev->name, vu_dev,
447 vu_req_interrupt_comm_handler);
448 if (rc < 0)
449 goto err_close;
450
451 vu_dev->irq = rc;
452
453 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
454 req_fds[1]);
455 if (rc)
456 goto err_free_irq;
457
458 goto out;
459
460err_free_irq:
461 um_free_irq(vu_dev->irq, vu_dev);
462err_close:
463 os_close_file(req_fds[0]);
464out:
465 /* Close unused write end of request fds */
466 os_close_file(req_fds[1]);
467 return rc;
468}
469
470static int vhost_user_init(struct virtio_uml_device *vu_dev)
471{
472 int rc = vhost_user_set_owner(vu_dev);
473
474 if (rc)
475 return rc;
476 rc = vhost_user_get_features(vu_dev, &vu_dev->features);
477 if (rc)
478 return rc;
479
480 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
481 rc = vhost_user_get_protocol_features(vu_dev,
482 &vu_dev->protocol_features);
483 if (rc)
484 return rc;
485 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
486 rc = vhost_user_set_protocol_features(vu_dev,
487 vu_dev->protocol_features);
488 if (rc)
489 return rc;
490 }
491
492 if (vu_dev->protocol_features &
493 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
494 rc = vhost_user_init_slave_req(vu_dev);
495 if (rc)
496 return rc;
497 }
498
499 return 0;
500}
501
502static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
503 u32 offset, void *buf, u32 len)
504{
505 u32 cfg_size = offset + len;
506 struct vhost_user_msg *msg;
507 size_t payload_size = sizeof(msg->payload.config) + cfg_size;
508 size_t msg_size = sizeof(msg->header) + payload_size;
509 int rc;
510
511 if (!(vu_dev->protocol_features &
512 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
513 return;
514
515 msg = kzalloc(msg_size, GFP_KERNEL);
516 if (!msg)
517 return;
518 msg->header.request = VHOST_USER_GET_CONFIG;
519 msg->header.size = payload_size;
520 msg->payload.config.offset = 0;
521 msg->payload.config.size = cfg_size;
522
523 rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
524 if (rc) {
525 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
526 rc);
527 goto free;
528 }
529
530 rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
531 if (rc) {
532 vu_err(vu_dev,
533 "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
534 rc);
535 goto free;
536 }
537
538 if (msg->header.size != payload_size ||
539 msg->payload.config.size != cfg_size) {
540 rc = -EPROTO;
541 vu_err(vu_dev,
542 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
543 msg->header.size, payload_size,
544 msg->payload.config.size, cfg_size);
545 goto free;
546 }
547 memcpy(buf, msg->payload.config.payload + offset, len);
548
549free:
550 kfree(msg);
551}
552
553static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
554 u32 offset, const void *buf, u32 len)
555{
556 struct vhost_user_msg *msg;
557 size_t payload_size = sizeof(msg->payload.config) + len;
558 size_t msg_size = sizeof(msg->header) + payload_size;
559 int rc;
560
561 if (!(vu_dev->protocol_features &
562 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
563 return;
564
565 msg = kzalloc(msg_size, GFP_KERNEL);
566 if (!msg)
567 return;
568 msg->header.request = VHOST_USER_SET_CONFIG;
569 msg->header.size = payload_size;
570 msg->payload.config.offset = offset;
571 msg->payload.config.size = len;
572 memcpy(msg->payload.config.payload, buf, len);
573
574 rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
575 if (rc)
576 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
577 rc);
578
579 kfree(msg);
580}
581
582static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
583 struct vhost_user_mem_region *region_out)
584{
585 unsigned long long mem_offset;
586 int rc = phys_mapping(addr, &mem_offset);
587
588 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
589 return -EFAULT;
590 *fd_out = rc;
591 region_out->guest_addr = addr;
592 region_out->user_addr = addr;
593 region_out->size = size;
594 region_out->mmap_offset = mem_offset;
595
596 /* Ensure mapping is valid for the entire region */
597 rc = phys_mapping(addr + size - 1, &mem_offset);
598 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
599 addr + size - 1, rc, *fd_out))
600 return -EFAULT;
601 return 0;
602}
603
604static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
605{
606 struct vhost_user_msg msg = {
607 .header.request = VHOST_USER_SET_MEM_TABLE,
608 .header.size = sizeof(msg.payload.mem_regions),
609 .payload.mem_regions.num = 1,
610 };
611 unsigned long reserved = uml_reserved - uml_physmem;
612 int fds[2];
613 int rc;
614
615 /*
616 * This is a bit tricky, see also the comment with setup_physmem().
617 *
618 * Essentially, setup_physmem() uses a file to mmap() our physmem,
619 * but the code and data we *already* have is omitted. To us, this
620 * is no difference, since they both become part of our address
621 * space and memory consumption. To somebody looking in from the
622 * outside, however, it is different because the part of our memory
623 * consumption that's already part of the binary (code/data) is not
624 * mapped from the file, so it's not visible to another mmap from
625 * the file descriptor.
626 *
627 * Thus, don't advertise this space to the vhost-user slave. This
628 * means that the slave will likely abort or similar when we give
629 * it an address from the hidden range, since it's not marked as
630 * a valid address, but at least that way we detect the issue and
631 * don't just have the slave read an all-zeroes buffer from the
632 * shared memory file, or write something there that we can never
633 * see (depending on the direction of the virtqueue traffic.)
634 *
635 * Since we usually don't want to use .text for virtio buffers,
636 * this effectively means that you cannot use
637 * 1) global variables, which are in the .bss and not in the shm
638 * file-backed memory
639 * 2) the stack in some processes, depending on where they have
640 * their stack (or maybe only no interrupt stack?)
641 *
642 * The stack is already not typically valid for DMA, so this isn't
643 * much of a restriction, but global variables might be encountered.
644 *
645 * It might be possible to fix it by copying around the data that's
646 * between bss_start and where we map the file now, but it's not
647 * something that you typically encounter with virtio drivers, so
648 * it didn't seem worthwhile.
649 */
650 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
651 &fds[0],
652 &msg.payload.mem_regions.regions[0]);
653
654 if (rc < 0)
655 return rc;
656 if (highmem) {
657 msg.payload.mem_regions.num++;
658 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
659 &fds[1], &msg.payload.mem_regions.regions[1]);
660 if (rc < 0)
661 return rc;
662 }
663
664 return vhost_user_send(vu_dev, false, &msg, fds,
665 msg.payload.mem_regions.num);
666}
667
668static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
669 u32 request, u32 index, u32 num)
670{
671 struct vhost_user_msg msg = {
672 .header.request = request,
673 .header.size = sizeof(msg.payload.vring_state),
674 .payload.vring_state.index = index,
675 .payload.vring_state.num = num,
676 };
677
678 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
679}
680
681static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
682 u32 index, u32 num)
683{
684 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
685 index, num);
686}
687
688static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
689 u32 index, u32 offset)
690{
691 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
692 index, offset);
693}
694
695static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
696 u32 index, u64 desc, u64 used, u64 avail,
697 u64 log)
698{
699 struct vhost_user_msg msg = {
700 .header.request = VHOST_USER_SET_VRING_ADDR,
701 .header.size = sizeof(msg.payload.vring_addr),
702 .payload.vring_addr.index = index,
703 .payload.vring_addr.desc = desc,
704 .payload.vring_addr.used = used,
705 .payload.vring_addr.avail = avail,
706 .payload.vring_addr.log = log,
707 };
708
709 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
710}
711
712static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
713 u32 request, int index, int fd)
714{
715 struct vhost_user_msg msg = {
716 .header.request = request,
717 .header.size = sizeof(msg.payload.integer),
718 .payload.integer = index,
719 };
720
721 if (index & ~VHOST_USER_VRING_INDEX_MASK)
722 return -EINVAL;
723 if (fd < 0) {
724 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
725 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
726 }
727 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
728}
729
730static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
731 int index, int fd)
732{
733 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
734 index, fd);
735}
736
737static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
738 int index, int fd)
739{
740 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
741 index, fd);
742}
743
744static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
745 u32 index, bool enable)
746{
747 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
748 return 0;
749
750 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
751 index, enable);
752}
753
754
755/* Virtio interface */
756
757static bool vu_notify(struct virtqueue *vq)
758{
759 struct virtio_uml_vq_info *info = vq->priv;
760 const uint64_t n = 1;
761 int rc;
762
763 if (info->suspended)
764 return true;
765
766 time_travel_propagate_time();
767
768 if (info->kick_fd < 0) {
769 struct virtio_uml_device *vu_dev;
770
771 vu_dev = to_virtio_uml_device(vq->vdev);
772
773 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
774 vq->index, 0) == 0;
775 }
776
777 do {
778 rc = os_write_file(info->kick_fd, &n, sizeof(n));
779 } while (rc == -EINTR);
780 return !WARN(rc != sizeof(n), "write returned %d\n", rc);
781}
782
783static irqreturn_t vu_interrupt(int irq, void *opaque)
784{
785 struct virtqueue *vq = opaque;
786 struct virtio_uml_vq_info *info = vq->priv;
787 uint64_t n;
788 int rc;
789 irqreturn_t ret = IRQ_NONE;
790
791 do {
792 rc = os_read_file(info->call_fd, &n, sizeof(n));
793 if (rc == sizeof(n))
794 ret |= vring_interrupt(irq, vq);
795 } while (rc == sizeof(n) || rc == -EINTR);
796 WARN(rc != -EAGAIN, "read returned %d\n", rc);
797 return ret;
798}
799
800
801static void vu_get(struct virtio_device *vdev, unsigned offset,
802 void *buf, unsigned len)
803{
804 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
805
806 vhost_user_get_config(vu_dev, offset, buf, len);
807}
808
809static void vu_set(struct virtio_device *vdev, unsigned offset,
810 const void *buf, unsigned len)
811{
812 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
813
814 vhost_user_set_config(vu_dev, offset, buf, len);
815}
816
817static u8 vu_get_status(struct virtio_device *vdev)
818{
819 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
820
821 return vu_dev->status;
822}
823
824static void vu_set_status(struct virtio_device *vdev, u8 status)
825{
826 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
827
828 vu_dev->status = status;
829}
830
831static void vu_reset(struct virtio_device *vdev)
832{
833 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
834
835 vu_dev->status = 0;
836}
837
838static void vu_del_vq(struct virtqueue *vq)
839{
840 struct virtio_uml_vq_info *info = vq->priv;
841
842 if (info->call_fd >= 0) {
843 struct virtio_uml_device *vu_dev;
844
845 vu_dev = to_virtio_uml_device(vq->vdev);
846
847 um_free_irq(vu_dev->irq, vq);
848 os_close_file(info->call_fd);
849 }
850
851 if (info->kick_fd >= 0)
852 os_close_file(info->kick_fd);
853
854 vring_del_virtqueue(vq);
855 kfree(info);
856}
857
858static void vu_del_vqs(struct virtio_device *vdev)
859{
860 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
861 struct virtqueue *vq, *n;
862 u64 features;
863
864 /* Note: reverse order as a workaround to a decoding bug in snabb */
865 list_for_each_entry_reverse(vq, &vdev->vqs, list)
866 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
867
868 /* Ensure previous messages have been processed */
869 WARN_ON(vhost_user_get_features(vu_dev, &features));
870
871 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
872 vu_del_vq(vq);
873}
874
875static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
876 struct virtqueue *vq)
877{
878 struct virtio_uml_vq_info *info = vq->priv;
879 int call_fds[2];
880 int rc;
881
882 /* no call FD needed/desired in this case */
883 if (vu_dev->protocol_features &
884 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
885 vu_dev->protocol_features &
886 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
887 info->call_fd = -1;
888 return 0;
889 }
890
891 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
892 rc = os_pipe(call_fds, true, true);
893 if (rc < 0)
894 return rc;
895
896 info->call_fd = call_fds[0];
897 rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
898 vu_interrupt, IRQF_SHARED, info->name, vq);
899 if (rc < 0)
900 goto close_both;
901
902 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
903 if (rc)
904 goto release_irq;
905
906 goto out;
907
908release_irq:
909 um_free_irq(vu_dev->irq, vq);
910close_both:
911 os_close_file(call_fds[0]);
912out:
913 /* Close (unused) write end of call fds */
914 os_close_file(call_fds[1]);
915
916 return rc;
917}
918
919static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
920 unsigned index, vq_callback_t *callback,
921 const char *name, bool ctx)
922{
923 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
924 struct platform_device *pdev = vu_dev->pdev;
925 struct virtio_uml_vq_info *info;
926 struct virtqueue *vq;
927 int num = MAX_SUPPORTED_QUEUE_SIZE;
928 int rc;
929
930 info = kzalloc(sizeof(*info), GFP_KERNEL);
931 if (!info) {
932 rc = -ENOMEM;
933 goto error_kzalloc;
934 }
935 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
936 pdev->id, name);
937
938 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
939 ctx, vu_notify, callback, info->name);
940 if (!vq) {
941 rc = -ENOMEM;
942 goto error_create;
943 }
944 vq->priv = info;
945 num = virtqueue_get_vring_size(vq);
946
947 if (vu_dev->protocol_features &
948 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
949 info->kick_fd = -1;
950 } else {
951 rc = os_eventfd(0, 0);
952 if (rc < 0)
953 goto error_kick;
954 info->kick_fd = rc;
955 }
956
957 rc = vu_setup_vq_call_fd(vu_dev, vq);
958 if (rc)
959 goto error_call;
960
961 rc = vhost_user_set_vring_num(vu_dev, index, num);
962 if (rc)
963 goto error_setup;
964
965 rc = vhost_user_set_vring_base(vu_dev, index, 0);
966 if (rc)
967 goto error_setup;
968
969 rc = vhost_user_set_vring_addr(vu_dev, index,
970 virtqueue_get_desc_addr(vq),
971 virtqueue_get_used_addr(vq),
972 virtqueue_get_avail_addr(vq),
973 (u64) -1);
974 if (rc)
975 goto error_setup;
976
977 return vq;
978
979error_setup:
980 if (info->call_fd >= 0) {
981 um_free_irq(vu_dev->irq, vq);
982 os_close_file(info->call_fd);
983 }
984error_call:
985 if (info->kick_fd >= 0)
986 os_close_file(info->kick_fd);
987error_kick:
988 vring_del_virtqueue(vq);
989error_create:
990 kfree(info);
991error_kzalloc:
992 return ERR_PTR(rc);
993}
994
995static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
996 struct virtqueue *vqs[], vq_callback_t *callbacks[],
997 const char * const names[], const bool *ctx,
998 struct irq_affinity *desc)
999{
1000 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1001 int i, queue_idx = 0, rc;
1002 struct virtqueue *vq;
1003
1004 /* not supported for now */
1005 if (WARN_ON(nvqs > 64))
1006 return -EINVAL;
1007
1008 rc = vhost_user_set_mem_table(vu_dev);
1009 if (rc)
1010 return rc;
1011
1012 for (i = 0; i < nvqs; ++i) {
1013 if (!names[i]) {
1014 vqs[i] = NULL;
1015 continue;
1016 }
1017
1018 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1019 ctx ? ctx[i] : false);
1020 if (IS_ERR(vqs[i])) {
1021 rc = PTR_ERR(vqs[i]);
1022 goto error_setup;
1023 }
1024 }
1025
1026 list_for_each_entry(vq, &vdev->vqs, list) {
1027 struct virtio_uml_vq_info *info = vq->priv;
1028
1029 if (info->kick_fd >= 0) {
1030 rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1031 info->kick_fd);
1032 if (rc)
1033 goto error_setup;
1034 }
1035
1036 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1037 if (rc)
1038 goto error_setup;
1039 }
1040
1041 return 0;
1042
1043error_setup:
1044 vu_del_vqs(vdev);
1045 return rc;
1046}
1047
1048static u64 vu_get_features(struct virtio_device *vdev)
1049{
1050 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1051
1052 return vu_dev->features;
1053}
1054
1055static int vu_finalize_features(struct virtio_device *vdev)
1056{
1057 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1058 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1059
1060 vring_transport_features(vdev);
1061 vu_dev->features = vdev->features | supported;
1062
1063 return vhost_user_set_features(vu_dev, vu_dev->features);
1064}
1065
1066static const char *vu_bus_name(struct virtio_device *vdev)
1067{
1068 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1069
1070 return vu_dev->pdev->name;
1071}
1072
1073static const struct virtio_config_ops virtio_uml_config_ops = {
1074 .get = vu_get,
1075 .set = vu_set,
1076 .get_status = vu_get_status,
1077 .set_status = vu_set_status,
1078 .reset = vu_reset,
1079 .find_vqs = vu_find_vqs,
1080 .del_vqs = vu_del_vqs,
1081 .get_features = vu_get_features,
1082 .finalize_features = vu_finalize_features,
1083 .bus_name = vu_bus_name,
1084};
1085
1086static void virtio_uml_release_dev(struct device *d)
1087{
1088 struct virtio_device *vdev =
1089 container_of(d, struct virtio_device, dev);
1090 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1091
1092 /* might not have been opened due to not negotiating the feature */
1093 if (vu_dev->req_fd >= 0) {
1094 um_free_irq(vu_dev->irq, vu_dev);
1095 os_close_file(vu_dev->req_fd);
1096 }
1097
1098 os_close_file(vu_dev->sock);
1099 kfree(vu_dev);
1100}
1101
1102void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
1103 bool no_vq_suspend)
1104{
1105 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1106
1107 if (WARN_ON(vdev->config != &virtio_uml_config_ops))
1108 return;
1109
1110 vu_dev->no_vq_suspend = no_vq_suspend;
1111 dev_info(&vdev->dev, "%sabled VQ suspend\n",
1112 no_vq_suspend ? "dis" : "en");
1113}
1114
1115/* Platform device */
1116
1117static int virtio_uml_probe(struct platform_device *pdev)
1118{
1119 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1120 struct virtio_uml_device *vu_dev;
1121 int rc;
1122
1123 if (!pdata)
1124 return -EINVAL;
1125
1126 vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
1127 if (!vu_dev)
1128 return -ENOMEM;
1129
1130 vu_dev->vdev.dev.parent = &pdev->dev;
1131 vu_dev->vdev.dev.release = virtio_uml_release_dev;
1132 vu_dev->vdev.config = &virtio_uml_config_ops;
1133 vu_dev->vdev.id.device = pdata->virtio_device_id;
1134 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1135 vu_dev->pdev = pdev;
1136 vu_dev->req_fd = -1;
1137
1138 do {
1139 rc = os_connect_socket(pdata->socket_path);
1140 } while (rc == -EINTR);
1141 if (rc < 0)
1142 goto error_free;
1143 vu_dev->sock = rc;
1144
1145 spin_lock_init(&vu_dev->sock_lock);
1146
1147 rc = vhost_user_init(vu_dev);
1148 if (rc)
1149 goto error_init;
1150
1151 platform_set_drvdata(pdev, vu_dev);
1152
1153 device_set_wakeup_capable(&vu_dev->vdev.dev, true);
1154
1155 rc = register_virtio_device(&vu_dev->vdev);
1156 if (rc)
1157 put_device(&vu_dev->vdev.dev);
1158 vu_dev->registered = 1;
1159 return rc;
1160
1161error_init:
1162 os_close_file(vu_dev->sock);
1163error_free:
1164 kfree(vu_dev);
1165 return rc;
1166}
1167
1168static int virtio_uml_remove(struct platform_device *pdev)
1169{
1170 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1171
1172 unregister_virtio_device(&vu_dev->vdev);
1173 return 0;
1174}
1175
1176/* Command line device list */
1177
1178static void vu_cmdline_release_dev(struct device *d)
1179{
1180}
1181
1182static struct device vu_cmdline_parent = {
1183 .init_name = "virtio-uml-cmdline",
1184 .release = vu_cmdline_release_dev,
1185};
1186
1187static bool vu_cmdline_parent_registered;
1188static int vu_cmdline_id;
1189
1190static int vu_unregister_cmdline_device(struct device *dev, void *data)
1191{
1192 struct platform_device *pdev = to_platform_device(dev);
1193 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1194
1195 kfree(pdata->socket_path);
1196 platform_device_unregister(pdev);
1197 return 0;
1198}
1199
1200static void vu_conn_broken(struct work_struct *wk)
1201{
1202 struct virtio_uml_platform_data *pdata;
1203
1204 pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1205 vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1206}
1207
1208static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1209{
1210 const char *ids = strchr(device, ':');
1211 unsigned int virtio_device_id;
1212 int processed, consumed, err;
1213 char *socket_path;
1214 struct virtio_uml_platform_data pdata, *ppdata;
1215 struct platform_device *pdev;
1216
1217 if (!ids || ids == device)
1218 return -EINVAL;
1219
1220 processed = sscanf(ids, ":%u%n:%d%n",
1221 &virtio_device_id, &consumed,
1222 &vu_cmdline_id, &consumed);
1223
1224 if (processed < 1 || ids[consumed])
1225 return -EINVAL;
1226
1227 if (!vu_cmdline_parent_registered) {
1228 err = device_register(&vu_cmdline_parent);
1229 if (err) {
1230 pr_err("Failed to register parent device!\n");
1231 put_device(&vu_cmdline_parent);
1232 return err;
1233 }
1234 vu_cmdline_parent_registered = true;
1235 }
1236
1237 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1238 if (!socket_path)
1239 return -ENOMEM;
1240
1241 pdata.virtio_device_id = (u32) virtio_device_id;
1242 pdata.socket_path = socket_path;
1243
1244 pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1245 vu_cmdline_id, virtio_device_id, socket_path);
1246
1247 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1248 vu_cmdline_id++, &pdata,
1249 sizeof(pdata));
1250 err = PTR_ERR_OR_ZERO(pdev);
1251 if (err)
1252 goto free;
1253
1254 ppdata = pdev->dev.platform_data;
1255 ppdata->pdev = pdev;
1256 INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1257
1258 return 0;
1259
1260free:
1261 kfree(socket_path);
1262 return err;
1263}
1264
1265static int vu_cmdline_get_device(struct device *dev, void *data)
1266{
1267 struct platform_device *pdev = to_platform_device(dev);
1268 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1269 char *buffer = data;
1270 unsigned int len = strlen(buffer);
1271
1272 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1273 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1274 return 0;
1275}
1276
1277static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1278{
1279 buffer[0] = '\0';
1280 if (vu_cmdline_parent_registered)
1281 device_for_each_child(&vu_cmdline_parent, buffer,
1282 vu_cmdline_get_device);
1283 return strlen(buffer) + 1;
1284}
1285
1286static const struct kernel_param_ops vu_cmdline_param_ops = {
1287 .set = vu_cmdline_set,
1288 .get = vu_cmdline_get,
1289};
1290
1291device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1292__uml_help(vu_cmdline_param_ops,
1293"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1294" Configure a virtio device over a vhost-user socket.\n"
1295" See virtio_ids.h for a list of possible virtio device id values.\n"
1296" Optionally use a specific platform_device id.\n\n"
1297);
1298
1299
1300static void vu_unregister_cmdline_devices(void)
1301{
1302 if (vu_cmdline_parent_registered) {
1303 device_for_each_child(&vu_cmdline_parent, NULL,
1304 vu_unregister_cmdline_device);
1305 device_unregister(&vu_cmdline_parent);
1306 vu_cmdline_parent_registered = false;
1307 }
1308}
1309
1310/* Platform driver */
1311
1312static const struct of_device_id virtio_uml_match[] = {
1313 { .compatible = "virtio,uml", },
1314 { }
1315};
1316MODULE_DEVICE_TABLE(of, virtio_uml_match);
1317
1318static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
1319{
1320 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1321
1322 if (!vu_dev->no_vq_suspend) {
1323 struct virtqueue *vq;
1324
1325 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1326 struct virtio_uml_vq_info *info = vq->priv;
1327
1328 info->suspended = true;
1329 vhost_user_set_vring_enable(vu_dev, vq->index, false);
1330 }
1331 }
1332
1333 if (!device_may_wakeup(&vu_dev->vdev.dev)) {
1334 vu_dev->suspended = true;
1335 return 0;
1336 }
1337
1338 return irq_set_irq_wake(vu_dev->irq, 1);
1339}
1340
1341static int virtio_uml_resume(struct platform_device *pdev)
1342{
1343 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1344
1345 if (!vu_dev->no_vq_suspend) {
1346 struct virtqueue *vq;
1347
1348 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1349 struct virtio_uml_vq_info *info = vq->priv;
1350
1351 info->suspended = false;
1352 vhost_user_set_vring_enable(vu_dev, vq->index, true);
1353 }
1354 }
1355
1356 vu_dev->suspended = false;
1357
1358 if (!device_may_wakeup(&vu_dev->vdev.dev))
1359 return 0;
1360
1361 return irq_set_irq_wake(vu_dev->irq, 0);
1362}
1363
1364static struct platform_driver virtio_uml_driver = {
1365 .probe = virtio_uml_probe,
1366 .remove = virtio_uml_remove,
1367 .driver = {
1368 .name = "virtio-uml",
1369 .of_match_table = virtio_uml_match,
1370 },
1371 .suspend = virtio_uml_suspend,
1372 .resume = virtio_uml_resume,
1373};
1374
1375static int __init virtio_uml_init(void)
1376{
1377 return platform_driver_register(&virtio_uml_driver);
1378}
1379
1380static void __exit virtio_uml_exit(void)
1381{
1382 platform_driver_unregister(&virtio_uml_driver);
1383 vu_unregister_cmdline_devices();
1384}
1385
1386module_init(virtio_uml_init);
1387module_exit(virtio_uml_exit);
1388__uml_exitcall(virtio_uml_exit);
1389
1390MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1391MODULE_LICENSE("GPL");