Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Virtio vhost-user driver
4 *
5 * Copyright(c) 2019 Intel Corporation
6 *
7 * This driver allows virtio devices to be used over a vhost-user socket.
8 *
9 * Guest devices can be instantiated by kernel module or command line
10 * parameters. One device will be created for each parameter. Syntax:
11 *
12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13 * where:
14 * <socket> := vhost-user socket path to connect
15 * <virtio_id> := virtio device id (as in virtio_ids.h)
16 * <platform_id> := (optional) platform device id
17 *
18 * example:
19 * virtio_uml.device=/var/uml.socket:1
20 *
21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22 */
23#include <linux/module.h>
24#include <linux/of.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/virtio.h>
28#include <linux/virtio_config.h>
29#include <linux/virtio_ring.h>
30#include <linux/time-internal.h>
31#include <linux/virtio-uml.h>
32#include <shared/as-layout.h>
33#include <irq_kern.h>
34#include <init.h>
35#include <os.h>
36#include "vhost_user.h"
37
38#define MAX_SUPPORTED_QUEUE_SIZE 256
39
40#define to_virtio_uml_device(_vdev) \
41 container_of(_vdev, struct virtio_uml_device, vdev)
42
43struct virtio_uml_platform_data {
44 u32 virtio_device_id;
45 const char *socket_path;
46 struct work_struct conn_broken_wk;
47 struct platform_device *pdev;
48};
49
50struct virtio_uml_device {
51 struct virtio_device vdev;
52 struct platform_device *pdev;
53 struct virtio_uml_platform_data *pdata;
54
55 spinlock_t sock_lock;
56 int sock, req_fd, irq;
57 u64 features;
58 u64 protocol_features;
59 u64 max_vqs;
60 u8 status;
61 u8 registered:1;
62 u8 suspended:1;
63 u8 no_vq_suspend:1;
64
65 u8 config_changed_irq:1;
66 uint64_t vq_irq_vq_map;
67 int recv_rc;
68};
69
70struct virtio_uml_vq_info {
71 int kick_fd, call_fd;
72 char name[32];
73 bool suspended;
74};
75
76#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
77
78/* Vhost-user protocol */
79
80static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
81 const int *fds, unsigned int fds_num)
82{
83 int rc;
84
85 do {
86 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
87 if (rc > 0) {
88 buf += rc;
89 len -= rc;
90 fds = NULL;
91 fds_num = 0;
92 }
93 } while (len && (rc >= 0 || rc == -EINTR));
94
95 if (rc < 0)
96 return rc;
97 return 0;
98}
99
100static int full_read(int fd, void *buf, int len, bool abortable)
101{
102 int rc;
103
104 if (!len)
105 return 0;
106
107 do {
108 rc = os_read_file(fd, buf, len);
109 if (rc > 0) {
110 buf += rc;
111 len -= rc;
112 }
113 } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
114
115 if (rc < 0)
116 return rc;
117 if (rc == 0)
118 return -ECONNRESET;
119 return 0;
120}
121
122static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
123{
124 return full_read(fd, msg, sizeof(msg->header), true);
125}
126
127static int vhost_user_recv(struct virtio_uml_device *vu_dev,
128 int fd, struct vhost_user_msg *msg,
129 size_t max_payload_size, bool wait)
130{
131 size_t size;
132 int rc;
133
134 /*
135 * In virtio time-travel mode, we're handling all the vhost-user
136 * FDs by polling them whenever appropriate. However, we may get
137 * into a situation where we're sending out an interrupt message
138 * to a device (e.g. a net device) and need to handle a simulation
139 * time message while doing so, e.g. one that tells us to update
140 * our idea of how long we can run without scheduling.
141 *
142 * Thus, we need to not just read() from the given fd, but need
143 * to also handle messages for the simulation time - this function
144 * does that for us while waiting for the given fd to be readable.
145 */
146 if (wait)
147 time_travel_wait_readable(fd);
148
149 rc = vhost_user_recv_header(fd, msg);
150
151 if (rc)
152 return rc;
153 size = msg->header.size;
154 if (size > max_payload_size)
155 return -EPROTO;
156 return full_read(fd, &msg->payload, size, false);
157}
158
159static void vhost_user_check_reset(struct virtio_uml_device *vu_dev,
160 int rc)
161{
162 struct virtio_uml_platform_data *pdata = vu_dev->pdata;
163
164 if (rc != -ECONNRESET)
165 return;
166
167 if (!vu_dev->registered)
168 return;
169
170 vu_dev->registered = 0;
171
172 schedule_work(&pdata->conn_broken_wk);
173}
174
175static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
176 struct vhost_user_msg *msg,
177 size_t max_payload_size)
178{
179 int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
180 max_payload_size, true);
181
182 if (rc) {
183 vhost_user_check_reset(vu_dev, rc);
184 return rc;
185 }
186
187 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
188 return -EPROTO;
189
190 return 0;
191}
192
193static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
194 u64 *value)
195{
196 struct vhost_user_msg msg;
197 int rc = vhost_user_recv_resp(vu_dev, &msg,
198 sizeof(msg.payload.integer));
199
200 if (rc)
201 return rc;
202 if (msg.header.size != sizeof(msg.payload.integer))
203 return -EPROTO;
204 *value = msg.payload.integer;
205 return 0;
206}
207
208static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
209 struct vhost_user_msg *msg,
210 size_t max_payload_size)
211{
212 int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
213 max_payload_size, false);
214
215 if (rc)
216 return rc;
217
218 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
219 VHOST_USER_VERSION)
220 return -EPROTO;
221
222 return 0;
223}
224
225static int vhost_user_send(struct virtio_uml_device *vu_dev,
226 bool need_response, struct vhost_user_msg *msg,
227 int *fds, size_t num_fds)
228{
229 size_t size = sizeof(msg->header) + msg->header.size;
230 unsigned long flags;
231 bool request_ack;
232 int rc;
233
234 msg->header.flags |= VHOST_USER_VERSION;
235
236 /*
237 * The need_response flag indicates that we already need a response,
238 * e.g. to read the features. In these cases, don't request an ACK as
239 * it is meaningless. Also request an ACK only if supported.
240 */
241 request_ack = !need_response;
242 if (!(vu_dev->protocol_features &
243 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
244 request_ack = false;
245
246 if (request_ack)
247 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
248
249 spin_lock_irqsave(&vu_dev->sock_lock, flags);
250 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
251 if (rc < 0)
252 goto out;
253
254 if (request_ack) {
255 uint64_t status;
256
257 rc = vhost_user_recv_u64(vu_dev, &status);
258 if (rc)
259 goto out;
260
261 if (status) {
262 vu_err(vu_dev, "slave reports error: %llu\n", status);
263 rc = -EIO;
264 goto out;
265 }
266 }
267
268out:
269 spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
270 return rc;
271}
272
273static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
274 bool need_response, u32 request)
275{
276 struct vhost_user_msg msg = {
277 .header.request = request,
278 };
279
280 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
281}
282
283static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
284 u32 request, int fd)
285{
286 struct vhost_user_msg msg = {
287 .header.request = request,
288 };
289
290 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
291}
292
293static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
294 u32 request, u64 value)
295{
296 struct vhost_user_msg msg = {
297 .header.request = request,
298 .header.size = sizeof(msg.payload.integer),
299 .payload.integer = value,
300 };
301
302 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
303}
304
305static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
306{
307 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
308}
309
310static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
311 u64 *features)
312{
313 int rc = vhost_user_send_no_payload(vu_dev, true,
314 VHOST_USER_GET_FEATURES);
315
316 if (rc)
317 return rc;
318 return vhost_user_recv_u64(vu_dev, features);
319}
320
321static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
322 u64 features)
323{
324 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
325}
326
327static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
328 u64 *protocol_features)
329{
330 int rc = vhost_user_send_no_payload(vu_dev, true,
331 VHOST_USER_GET_PROTOCOL_FEATURES);
332
333 if (rc)
334 return rc;
335 return vhost_user_recv_u64(vu_dev, protocol_features);
336}
337
338static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
339 u64 protocol_features)
340{
341 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
342 protocol_features);
343}
344
345static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev,
346 u64 *queue_num)
347{
348 int rc = vhost_user_send_no_payload(vu_dev, true,
349 VHOST_USER_GET_QUEUE_NUM);
350
351 if (rc)
352 return rc;
353 return vhost_user_recv_u64(vu_dev, queue_num);
354}
355
356static void vhost_user_reply(struct virtio_uml_device *vu_dev,
357 struct vhost_user_msg *msg, int response)
358{
359 struct vhost_user_msg reply = {
360 .payload.integer = response,
361 };
362 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
363 int rc;
364
365 reply.header = msg->header;
366 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
367 reply.header.flags |= VHOST_USER_FLAG_REPLY;
368 reply.header.size = sizeof(reply.payload.integer);
369
370 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
371
372 if (rc)
373 vu_err(vu_dev,
374 "sending reply to slave request failed: %d (size %zu)\n",
375 rc, size);
376}
377
378static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
379 struct time_travel_event *ev)
380{
381 struct virtqueue *vq;
382 int response = 1;
383 struct {
384 struct vhost_user_msg msg;
385 u8 extra_payload[512];
386 } msg;
387 int rc;
388 irqreturn_t irq_rc = IRQ_NONE;
389
390 while (1) {
391 rc = vhost_user_recv_req(vu_dev, &msg.msg,
392 sizeof(msg.msg.payload) +
393 sizeof(msg.extra_payload));
394 if (rc)
395 break;
396
397 switch (msg.msg.header.request) {
398 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
399 vu_dev->config_changed_irq = true;
400 response = 0;
401 break;
402 case VHOST_USER_SLAVE_VRING_CALL:
403 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
404 if (vq->index == msg.msg.payload.vring_state.index) {
405 response = 0;
406 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
407 break;
408 }
409 }
410 break;
411 case VHOST_USER_SLAVE_IOTLB_MSG:
412 /* not supported - VIRTIO_F_ACCESS_PLATFORM */
413 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
414 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
415 default:
416 vu_err(vu_dev, "unexpected slave request %d\n",
417 msg.msg.header.request);
418 }
419
420 if (ev && !vu_dev->suspended)
421 time_travel_add_irq_event(ev);
422
423 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
424 vhost_user_reply(vu_dev, &msg.msg, response);
425 irq_rc = IRQ_HANDLED;
426 }
427 /* mask EAGAIN as we try non-blocking read until socket is empty */
428 vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc;
429 return irq_rc;
430}
431
432static irqreturn_t vu_req_interrupt(int irq, void *data)
433{
434 struct virtio_uml_device *vu_dev = data;
435 irqreturn_t ret = IRQ_HANDLED;
436
437 if (!um_irq_timetravel_handler_used())
438 ret = vu_req_read_message(vu_dev, NULL);
439
440 if (vu_dev->recv_rc) {
441 vhost_user_check_reset(vu_dev, vu_dev->recv_rc);
442 } else if (vu_dev->vq_irq_vq_map) {
443 struct virtqueue *vq;
444
445 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
446 if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
447 vring_interrupt(0 /* ignored */, vq);
448 }
449 vu_dev->vq_irq_vq_map = 0;
450 } else if (vu_dev->config_changed_irq) {
451 virtio_config_changed(&vu_dev->vdev);
452 vu_dev->config_changed_irq = false;
453 }
454
455 return ret;
456}
457
458static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
459 struct time_travel_event *ev)
460{
461 vu_req_read_message(data, ev);
462}
463
464static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
465{
466 int rc, req_fds[2];
467
468 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
469 rc = os_pipe(req_fds, true, true);
470 if (rc < 0)
471 return rc;
472 vu_dev->req_fd = req_fds[0];
473
474 rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
475 vu_req_interrupt, IRQF_SHARED,
476 vu_dev->pdev->name, vu_dev,
477 vu_req_interrupt_comm_handler);
478 if (rc < 0)
479 goto err_close;
480
481 vu_dev->irq = rc;
482
483 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
484 req_fds[1]);
485 if (rc)
486 goto err_free_irq;
487
488 goto out;
489
490err_free_irq:
491 um_free_irq(vu_dev->irq, vu_dev);
492err_close:
493 os_close_file(req_fds[0]);
494out:
495 /* Close unused write end of request fds */
496 os_close_file(req_fds[1]);
497 return rc;
498}
499
500static int vhost_user_init(struct virtio_uml_device *vu_dev)
501{
502 int rc = vhost_user_set_owner(vu_dev);
503
504 if (rc)
505 return rc;
506 rc = vhost_user_get_features(vu_dev, &vu_dev->features);
507 if (rc)
508 return rc;
509
510 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
511 rc = vhost_user_get_protocol_features(vu_dev,
512 &vu_dev->protocol_features);
513 if (rc)
514 return rc;
515 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
516 rc = vhost_user_set_protocol_features(vu_dev,
517 vu_dev->protocol_features);
518 if (rc)
519 return rc;
520 }
521
522 if (vu_dev->protocol_features &
523 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
524 rc = vhost_user_init_slave_req(vu_dev);
525 if (rc)
526 return rc;
527 }
528
529 if (vu_dev->protocol_features &
530 BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) {
531 rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs);
532 if (rc)
533 return rc;
534 } else {
535 vu_dev->max_vqs = U64_MAX;
536 }
537
538 return 0;
539}
540
541static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
542 u32 offset, void *buf, u32 len)
543{
544 u32 cfg_size = offset + len;
545 struct vhost_user_msg *msg;
546 size_t payload_size = sizeof(msg->payload.config) + cfg_size;
547 size_t msg_size = sizeof(msg->header) + payload_size;
548 int rc;
549
550 if (!(vu_dev->protocol_features &
551 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
552 return;
553
554 msg = kzalloc(msg_size, GFP_KERNEL);
555 if (!msg)
556 return;
557 msg->header.request = VHOST_USER_GET_CONFIG;
558 msg->header.size = payload_size;
559 msg->payload.config.offset = 0;
560 msg->payload.config.size = cfg_size;
561
562 rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
563 if (rc) {
564 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
565 rc);
566 goto free;
567 }
568
569 rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
570 if (rc) {
571 vu_err(vu_dev,
572 "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
573 rc);
574 goto free;
575 }
576
577 if (msg->header.size != payload_size ||
578 msg->payload.config.size != cfg_size) {
579 rc = -EPROTO;
580 vu_err(vu_dev,
581 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
582 msg->header.size, payload_size,
583 msg->payload.config.size, cfg_size);
584 goto free;
585 }
586 memcpy(buf, msg->payload.config.payload + offset, len);
587
588free:
589 kfree(msg);
590}
591
592static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
593 u32 offset, const void *buf, u32 len)
594{
595 struct vhost_user_msg *msg;
596 size_t payload_size = sizeof(msg->payload.config) + len;
597 size_t msg_size = sizeof(msg->header) + payload_size;
598 int rc;
599
600 if (!(vu_dev->protocol_features &
601 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
602 return;
603
604 msg = kzalloc(msg_size, GFP_KERNEL);
605 if (!msg)
606 return;
607 msg->header.request = VHOST_USER_SET_CONFIG;
608 msg->header.size = payload_size;
609 msg->payload.config.offset = offset;
610 msg->payload.config.size = len;
611 memcpy(msg->payload.config.payload, buf, len);
612
613 rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
614 if (rc)
615 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
616 rc);
617
618 kfree(msg);
619}
620
621static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
622 struct vhost_user_mem_region *region_out)
623{
624 unsigned long long mem_offset;
625 int rc = phys_mapping(addr, &mem_offset);
626
627 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
628 return -EFAULT;
629 *fd_out = rc;
630 region_out->guest_addr = addr;
631 region_out->user_addr = addr;
632 region_out->size = size;
633 region_out->mmap_offset = mem_offset;
634
635 /* Ensure mapping is valid for the entire region */
636 rc = phys_mapping(addr + size - 1, &mem_offset);
637 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
638 addr + size - 1, rc, *fd_out))
639 return -EFAULT;
640 return 0;
641}
642
643static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
644{
645 struct vhost_user_msg msg = {
646 .header.request = VHOST_USER_SET_MEM_TABLE,
647 .header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]),
648 .payload.mem_regions.num = 1,
649 };
650 unsigned long reserved = uml_reserved - uml_physmem;
651 int fds[2];
652 int rc;
653
654 /*
655 * This is a bit tricky, see also the comment with setup_physmem().
656 *
657 * Essentially, setup_physmem() uses a file to mmap() our physmem,
658 * but the code and data we *already* have is omitted. To us, this
659 * is no difference, since they both become part of our address
660 * space and memory consumption. To somebody looking in from the
661 * outside, however, it is different because the part of our memory
662 * consumption that's already part of the binary (code/data) is not
663 * mapped from the file, so it's not visible to another mmap from
664 * the file descriptor.
665 *
666 * Thus, don't advertise this space to the vhost-user slave. This
667 * means that the slave will likely abort or similar when we give
668 * it an address from the hidden range, since it's not marked as
669 * a valid address, but at least that way we detect the issue and
670 * don't just have the slave read an all-zeroes buffer from the
671 * shared memory file, or write something there that we can never
672 * see (depending on the direction of the virtqueue traffic.)
673 *
674 * Since we usually don't want to use .text for virtio buffers,
675 * this effectively means that you cannot use
676 * 1) global variables, which are in the .bss and not in the shm
677 * file-backed memory
678 * 2) the stack in some processes, depending on where they have
679 * their stack (or maybe only no interrupt stack?)
680 *
681 * The stack is already not typically valid for DMA, so this isn't
682 * much of a restriction, but global variables might be encountered.
683 *
684 * It might be possible to fix it by copying around the data that's
685 * between bss_start and where we map the file now, but it's not
686 * something that you typically encounter with virtio drivers, so
687 * it didn't seem worthwhile.
688 */
689 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
690 &fds[0],
691 &msg.payload.mem_regions.regions[0]);
692
693 if (rc < 0)
694 return rc;
695
696 return vhost_user_send(vu_dev, false, &msg, fds,
697 msg.payload.mem_regions.num);
698}
699
700static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
701 u32 request, u32 index, u32 num)
702{
703 struct vhost_user_msg msg = {
704 .header.request = request,
705 .header.size = sizeof(msg.payload.vring_state),
706 .payload.vring_state.index = index,
707 .payload.vring_state.num = num,
708 };
709
710 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
711}
712
713static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
714 u32 index, u32 num)
715{
716 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
717 index, num);
718}
719
720static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
721 u32 index, u32 offset)
722{
723 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
724 index, offset);
725}
726
727static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
728 u32 index, u64 desc, u64 used, u64 avail,
729 u64 log)
730{
731 struct vhost_user_msg msg = {
732 .header.request = VHOST_USER_SET_VRING_ADDR,
733 .header.size = sizeof(msg.payload.vring_addr),
734 .payload.vring_addr.index = index,
735 .payload.vring_addr.desc = desc,
736 .payload.vring_addr.used = used,
737 .payload.vring_addr.avail = avail,
738 .payload.vring_addr.log = log,
739 };
740
741 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
742}
743
744static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
745 u32 request, int index, int fd)
746{
747 struct vhost_user_msg msg = {
748 .header.request = request,
749 .header.size = sizeof(msg.payload.integer),
750 .payload.integer = index,
751 };
752
753 if (index & ~VHOST_USER_VRING_INDEX_MASK)
754 return -EINVAL;
755 if (fd < 0) {
756 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
757 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
758 }
759 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
760}
761
762static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
763 int index, int fd)
764{
765 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
766 index, fd);
767}
768
769static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
770 int index, int fd)
771{
772 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
773 index, fd);
774}
775
776static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
777 u32 index, bool enable)
778{
779 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
780 return 0;
781
782 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
783 index, enable);
784}
785
786
787/* Virtio interface */
788
789static bool vu_notify(struct virtqueue *vq)
790{
791 struct virtio_uml_vq_info *info = vq->priv;
792 const uint64_t n = 1;
793 int rc;
794
795 if (info->suspended)
796 return true;
797
798 time_travel_propagate_time();
799
800 if (info->kick_fd < 0) {
801 struct virtio_uml_device *vu_dev;
802
803 vu_dev = to_virtio_uml_device(vq->vdev);
804
805 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
806 vq->index, 0) == 0;
807 }
808
809 do {
810 rc = os_write_file(info->kick_fd, &n, sizeof(n));
811 } while (rc == -EINTR);
812 return !WARN(rc != sizeof(n), "write returned %d\n", rc);
813}
814
815static irqreturn_t vu_interrupt(int irq, void *opaque)
816{
817 struct virtqueue *vq = opaque;
818 struct virtio_uml_vq_info *info = vq->priv;
819 uint64_t n;
820 int rc;
821 irqreturn_t ret = IRQ_NONE;
822
823 do {
824 rc = os_read_file(info->call_fd, &n, sizeof(n));
825 if (rc == sizeof(n))
826 ret |= vring_interrupt(irq, vq);
827 } while (rc == sizeof(n) || rc == -EINTR);
828 WARN(rc != -EAGAIN, "read returned %d\n", rc);
829 return ret;
830}
831
832
833static void vu_get(struct virtio_device *vdev, unsigned offset,
834 void *buf, unsigned len)
835{
836 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
837
838 vhost_user_get_config(vu_dev, offset, buf, len);
839}
840
841static void vu_set(struct virtio_device *vdev, unsigned offset,
842 const void *buf, unsigned len)
843{
844 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
845
846 vhost_user_set_config(vu_dev, offset, buf, len);
847}
848
849static u8 vu_get_status(struct virtio_device *vdev)
850{
851 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
852
853 return vu_dev->status;
854}
855
856static void vu_set_status(struct virtio_device *vdev, u8 status)
857{
858 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
859
860 vu_dev->status = status;
861}
862
863static void vu_reset(struct virtio_device *vdev)
864{
865 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
866
867 vu_dev->status = 0;
868}
869
870static void vu_del_vq(struct virtqueue *vq)
871{
872 struct virtio_uml_vq_info *info = vq->priv;
873
874 if (info->call_fd >= 0) {
875 struct virtio_uml_device *vu_dev;
876
877 vu_dev = to_virtio_uml_device(vq->vdev);
878
879 um_free_irq(vu_dev->irq, vq);
880 os_close_file(info->call_fd);
881 }
882
883 if (info->kick_fd >= 0)
884 os_close_file(info->kick_fd);
885
886 vring_del_virtqueue(vq);
887 kfree(info);
888}
889
890static void vu_del_vqs(struct virtio_device *vdev)
891{
892 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
893 struct virtqueue *vq, *n;
894 u64 features;
895
896 /* Note: reverse order as a workaround to a decoding bug in snabb */
897 list_for_each_entry_reverse(vq, &vdev->vqs, list)
898 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
899
900 /* Ensure previous messages have been processed */
901 WARN_ON(vhost_user_get_features(vu_dev, &features));
902
903 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
904 vu_del_vq(vq);
905}
906
907static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
908 struct virtqueue *vq)
909{
910 struct virtio_uml_vq_info *info = vq->priv;
911 int call_fds[2];
912 int rc, irq;
913
914 /* no call FD needed/desired in this case */
915 if (vu_dev->protocol_features &
916 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
917 vu_dev->protocol_features &
918 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
919 info->call_fd = -1;
920 return 0;
921 }
922
923 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
924 rc = os_pipe(call_fds, true, true);
925 if (rc < 0)
926 return rc;
927
928 info->call_fd = call_fds[0];
929 irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
930 vu_interrupt, IRQF_SHARED, info->name, vq);
931 if (irq < 0) {
932 rc = irq;
933 goto close_both;
934 }
935
936 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
937 if (rc)
938 goto release_irq;
939
940 vu_dev->irq = irq;
941
942 goto out;
943
944release_irq:
945 um_free_irq(irq, vq);
946close_both:
947 os_close_file(call_fds[0]);
948out:
949 /* Close (unused) write end of call fds */
950 os_close_file(call_fds[1]);
951
952 return rc;
953}
954
955static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
956 unsigned index, vq_callback_t *callback,
957 const char *name, bool ctx)
958{
959 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
960 struct platform_device *pdev = vu_dev->pdev;
961 struct virtio_uml_vq_info *info;
962 struct virtqueue *vq;
963 int num = MAX_SUPPORTED_QUEUE_SIZE;
964 int rc;
965
966 info = kzalloc(sizeof(*info), GFP_KERNEL);
967 if (!info) {
968 rc = -ENOMEM;
969 goto error_kzalloc;
970 }
971 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
972 pdev->id, name);
973
974 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
975 ctx, vu_notify, callback, info->name);
976 if (!vq) {
977 rc = -ENOMEM;
978 goto error_create;
979 }
980 vq->priv = info;
981 vq->num_max = num;
982 num = virtqueue_get_vring_size(vq);
983
984 if (vu_dev->protocol_features &
985 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
986 info->kick_fd = -1;
987 } else {
988 rc = os_eventfd(0, 0);
989 if (rc < 0)
990 goto error_kick;
991 info->kick_fd = rc;
992 }
993
994 rc = vu_setup_vq_call_fd(vu_dev, vq);
995 if (rc)
996 goto error_call;
997
998 rc = vhost_user_set_vring_num(vu_dev, index, num);
999 if (rc)
1000 goto error_setup;
1001
1002 rc = vhost_user_set_vring_base(vu_dev, index, 0);
1003 if (rc)
1004 goto error_setup;
1005
1006 rc = vhost_user_set_vring_addr(vu_dev, index,
1007 virtqueue_get_desc_addr(vq),
1008 virtqueue_get_used_addr(vq),
1009 virtqueue_get_avail_addr(vq),
1010 (u64) -1);
1011 if (rc)
1012 goto error_setup;
1013
1014 return vq;
1015
1016error_setup:
1017 if (info->call_fd >= 0) {
1018 um_free_irq(vu_dev->irq, vq);
1019 os_close_file(info->call_fd);
1020 }
1021error_call:
1022 if (info->kick_fd >= 0)
1023 os_close_file(info->kick_fd);
1024error_kick:
1025 vring_del_virtqueue(vq);
1026error_create:
1027 kfree(info);
1028error_kzalloc:
1029 return ERR_PTR(rc);
1030}
1031
1032static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
1033 struct virtqueue *vqs[],
1034 struct virtqueue_info vqs_info[],
1035 struct irq_affinity *desc)
1036{
1037 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1038 int i, queue_idx = 0, rc;
1039 struct virtqueue *vq;
1040
1041 /* not supported for now */
1042 if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs,
1043 "%d VQs requested, only up to 64 or %lld supported\n",
1044 nvqs, vu_dev->max_vqs))
1045 return -EINVAL;
1046
1047 rc = vhost_user_set_mem_table(vu_dev);
1048 if (rc)
1049 return rc;
1050
1051 for (i = 0; i < nvqs; ++i) {
1052 struct virtqueue_info *vqi = &vqs_info[i];
1053
1054 if (!vqi->name) {
1055 vqs[i] = NULL;
1056 continue;
1057 }
1058
1059 vqs[i] = vu_setup_vq(vdev, queue_idx++, vqi->callback,
1060 vqi->name, vqi->ctx);
1061 if (IS_ERR(vqs[i])) {
1062 rc = PTR_ERR(vqs[i]);
1063 goto error_setup;
1064 }
1065 }
1066
1067 list_for_each_entry(vq, &vdev->vqs, list) {
1068 struct virtio_uml_vq_info *info = vq->priv;
1069
1070 if (info->kick_fd >= 0) {
1071 rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1072 info->kick_fd);
1073 if (rc)
1074 goto error_setup;
1075 }
1076
1077 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1078 if (rc)
1079 goto error_setup;
1080 }
1081
1082 return 0;
1083
1084error_setup:
1085 vu_del_vqs(vdev);
1086 return rc;
1087}
1088
1089static u64 vu_get_features(struct virtio_device *vdev)
1090{
1091 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1092
1093 return vu_dev->features;
1094}
1095
1096static int vu_finalize_features(struct virtio_device *vdev)
1097{
1098 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1099 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1100
1101 vring_transport_features(vdev);
1102 vu_dev->features = vdev->features | supported;
1103
1104 return vhost_user_set_features(vu_dev, vu_dev->features);
1105}
1106
1107static const char *vu_bus_name(struct virtio_device *vdev)
1108{
1109 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1110
1111 return vu_dev->pdev->name;
1112}
1113
1114static const struct virtio_config_ops virtio_uml_config_ops = {
1115 .get = vu_get,
1116 .set = vu_set,
1117 .get_status = vu_get_status,
1118 .set_status = vu_set_status,
1119 .reset = vu_reset,
1120 .find_vqs = vu_find_vqs,
1121 .del_vqs = vu_del_vqs,
1122 .get_features = vu_get_features,
1123 .finalize_features = vu_finalize_features,
1124 .bus_name = vu_bus_name,
1125};
1126
1127static void virtio_uml_release_dev(struct device *d)
1128{
1129 struct virtio_device *vdev =
1130 container_of(d, struct virtio_device, dev);
1131 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1132
1133 time_travel_propagate_time();
1134
1135 /* might not have been opened due to not negotiating the feature */
1136 if (vu_dev->req_fd >= 0) {
1137 um_free_irq(vu_dev->irq, vu_dev);
1138 os_close_file(vu_dev->req_fd);
1139 }
1140
1141 os_close_file(vu_dev->sock);
1142 kfree(vu_dev);
1143}
1144
1145void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
1146 bool no_vq_suspend)
1147{
1148 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1149
1150 if (WARN_ON(vdev->config != &virtio_uml_config_ops))
1151 return;
1152
1153 vu_dev->no_vq_suspend = no_vq_suspend;
1154 dev_info(&vdev->dev, "%sabled VQ suspend\n",
1155 no_vq_suspend ? "dis" : "en");
1156}
1157
1158static void vu_of_conn_broken(struct work_struct *wk)
1159{
1160 struct virtio_uml_platform_data *pdata;
1161 struct virtio_uml_device *vu_dev;
1162
1163 pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1164
1165 vu_dev = platform_get_drvdata(pdata->pdev);
1166
1167 virtio_break_device(&vu_dev->vdev);
1168
1169 /*
1170 * We can't remove the device from the devicetree so the only thing we
1171 * can do is warn.
1172 */
1173 WARN_ON(1);
1174}
1175
1176/* Platform device */
1177
1178static struct virtio_uml_platform_data *
1179virtio_uml_create_pdata(struct platform_device *pdev)
1180{
1181 struct device_node *np = pdev->dev.of_node;
1182 struct virtio_uml_platform_data *pdata;
1183 int ret;
1184
1185 if (!np)
1186 return ERR_PTR(-EINVAL);
1187
1188 pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
1189 if (!pdata)
1190 return ERR_PTR(-ENOMEM);
1191
1192 INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken);
1193 pdata->pdev = pdev;
1194
1195 ret = of_property_read_string(np, "socket-path", &pdata->socket_path);
1196 if (ret)
1197 return ERR_PTR(ret);
1198
1199 ret = of_property_read_u32(np, "virtio-device-id",
1200 &pdata->virtio_device_id);
1201 if (ret)
1202 return ERR_PTR(ret);
1203
1204 return pdata;
1205}
1206
1207static int virtio_uml_probe(struct platform_device *pdev)
1208{
1209 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1210 struct virtio_uml_device *vu_dev;
1211 int rc;
1212
1213 if (!pdata) {
1214 pdata = virtio_uml_create_pdata(pdev);
1215 if (IS_ERR(pdata))
1216 return PTR_ERR(pdata);
1217 }
1218
1219 vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
1220 if (!vu_dev)
1221 return -ENOMEM;
1222
1223 vu_dev->pdata = pdata;
1224 vu_dev->vdev.dev.parent = &pdev->dev;
1225 vu_dev->vdev.dev.release = virtio_uml_release_dev;
1226 vu_dev->vdev.config = &virtio_uml_config_ops;
1227 vu_dev->vdev.id.device = pdata->virtio_device_id;
1228 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1229 vu_dev->pdev = pdev;
1230 vu_dev->req_fd = -1;
1231 vu_dev->irq = UM_IRQ_ALLOC;
1232
1233 time_travel_propagate_time();
1234
1235 do {
1236 rc = os_connect_socket(pdata->socket_path);
1237 } while (rc == -EINTR);
1238 if (rc < 0)
1239 goto error_free;
1240 vu_dev->sock = rc;
1241
1242 spin_lock_init(&vu_dev->sock_lock);
1243
1244 rc = vhost_user_init(vu_dev);
1245 if (rc)
1246 goto error_init;
1247
1248 platform_set_drvdata(pdev, vu_dev);
1249
1250 device_set_wakeup_capable(&vu_dev->vdev.dev, true);
1251
1252 rc = register_virtio_device(&vu_dev->vdev);
1253 if (rc)
1254 put_device(&vu_dev->vdev.dev);
1255 vu_dev->registered = 1;
1256 return rc;
1257
1258error_init:
1259 os_close_file(vu_dev->sock);
1260error_free:
1261 kfree(vu_dev);
1262 return rc;
1263}
1264
1265static void virtio_uml_remove(struct platform_device *pdev)
1266{
1267 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1268
1269 unregister_virtio_device(&vu_dev->vdev);
1270}
1271
1272/* Command line device list */
1273
1274static void vu_cmdline_release_dev(struct device *d)
1275{
1276}
1277
1278static struct device vu_cmdline_parent = {
1279 .init_name = "virtio-uml-cmdline",
1280 .release = vu_cmdline_release_dev,
1281};
1282
1283static bool vu_cmdline_parent_registered;
1284static int vu_cmdline_id;
1285
1286static int vu_unregister_cmdline_device(struct device *dev, void *data)
1287{
1288 struct platform_device *pdev = to_platform_device(dev);
1289 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1290
1291 kfree(pdata->socket_path);
1292 platform_device_unregister(pdev);
1293 return 0;
1294}
1295
1296static void vu_conn_broken(struct work_struct *wk)
1297{
1298 struct virtio_uml_platform_data *pdata;
1299 struct virtio_uml_device *vu_dev;
1300
1301 pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1302
1303 vu_dev = platform_get_drvdata(pdata->pdev);
1304
1305 virtio_break_device(&vu_dev->vdev);
1306
1307 vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1308}
1309
1310static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1311{
1312 const char *ids = strchr(device, ':');
1313 unsigned int virtio_device_id;
1314 int processed, consumed, err;
1315 char *socket_path;
1316 struct virtio_uml_platform_data pdata, *ppdata;
1317 struct platform_device *pdev;
1318
1319 if (!ids || ids == device)
1320 return -EINVAL;
1321
1322 processed = sscanf(ids, ":%u%n:%d%n",
1323 &virtio_device_id, &consumed,
1324 &vu_cmdline_id, &consumed);
1325
1326 if (processed < 1 || ids[consumed])
1327 return -EINVAL;
1328
1329 if (!vu_cmdline_parent_registered) {
1330 err = device_register(&vu_cmdline_parent);
1331 if (err) {
1332 pr_err("Failed to register parent device!\n");
1333 put_device(&vu_cmdline_parent);
1334 return err;
1335 }
1336 vu_cmdline_parent_registered = true;
1337 }
1338
1339 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1340 if (!socket_path)
1341 return -ENOMEM;
1342
1343 pdata.virtio_device_id = (u32) virtio_device_id;
1344 pdata.socket_path = socket_path;
1345
1346 pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1347 vu_cmdline_id, virtio_device_id, socket_path);
1348
1349 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1350 vu_cmdline_id++, &pdata,
1351 sizeof(pdata));
1352 err = PTR_ERR_OR_ZERO(pdev);
1353 if (err)
1354 goto free;
1355
1356 ppdata = pdev->dev.platform_data;
1357 ppdata->pdev = pdev;
1358 INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1359
1360 return 0;
1361
1362free:
1363 kfree(socket_path);
1364 return err;
1365}
1366
1367static int vu_cmdline_get_device(struct device *dev, void *data)
1368{
1369 struct platform_device *pdev = to_platform_device(dev);
1370 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1371 char *buffer = data;
1372 unsigned int len = strlen(buffer);
1373
1374 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1375 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1376 return 0;
1377}
1378
1379static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1380{
1381 buffer[0] = '\0';
1382 if (vu_cmdline_parent_registered)
1383 device_for_each_child(&vu_cmdline_parent, buffer,
1384 vu_cmdline_get_device);
1385 return strlen(buffer) + 1;
1386}
1387
1388static const struct kernel_param_ops vu_cmdline_param_ops = {
1389 .set = vu_cmdline_set,
1390 .get = vu_cmdline_get,
1391};
1392
1393device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1394__uml_help(vu_cmdline_param_ops,
1395"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1396" Configure a virtio device over a vhost-user socket.\n"
1397" See virtio_ids.h for a list of possible virtio device id values.\n"
1398" Optionally use a specific platform_device id.\n\n"
1399);
1400
1401
1402static void vu_unregister_cmdline_devices(void)
1403{
1404 if (vu_cmdline_parent_registered) {
1405 device_for_each_child(&vu_cmdline_parent, NULL,
1406 vu_unregister_cmdline_device);
1407 device_unregister(&vu_cmdline_parent);
1408 vu_cmdline_parent_registered = false;
1409 }
1410}
1411
1412/* Platform driver */
1413
1414static const struct of_device_id virtio_uml_match[] = {
1415 { .compatible = "virtio,uml", },
1416 { }
1417};
1418MODULE_DEVICE_TABLE(of, virtio_uml_match);
1419
1420static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
1421{
1422 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1423
1424 if (!vu_dev->no_vq_suspend) {
1425 struct virtqueue *vq;
1426
1427 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1428 struct virtio_uml_vq_info *info = vq->priv;
1429
1430 info->suspended = true;
1431 vhost_user_set_vring_enable(vu_dev, vq->index, false);
1432 }
1433 }
1434
1435 if (!device_may_wakeup(&vu_dev->vdev.dev)) {
1436 vu_dev->suspended = true;
1437 return 0;
1438 }
1439
1440 return irq_set_irq_wake(vu_dev->irq, 1);
1441}
1442
1443static int virtio_uml_resume(struct platform_device *pdev)
1444{
1445 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1446
1447 if (!vu_dev->no_vq_suspend) {
1448 struct virtqueue *vq;
1449
1450 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1451 struct virtio_uml_vq_info *info = vq->priv;
1452
1453 info->suspended = false;
1454 vhost_user_set_vring_enable(vu_dev, vq->index, true);
1455 }
1456 }
1457
1458 vu_dev->suspended = false;
1459
1460 if (!device_may_wakeup(&vu_dev->vdev.dev))
1461 return 0;
1462
1463 return irq_set_irq_wake(vu_dev->irq, 0);
1464}
1465
1466static struct platform_driver virtio_uml_driver = {
1467 .probe = virtio_uml_probe,
1468 .remove = virtio_uml_remove,
1469 .driver = {
1470 .name = "virtio-uml",
1471 .of_match_table = virtio_uml_match,
1472 },
1473 .suspend = virtio_uml_suspend,
1474 .resume = virtio_uml_resume,
1475};
1476
1477static int __init virtio_uml_init(void)
1478{
1479 return platform_driver_register(&virtio_uml_driver);
1480}
1481
1482static void __exit virtio_uml_exit(void)
1483{
1484 platform_driver_unregister(&virtio_uml_driver);
1485 vu_unregister_cmdline_devices();
1486}
1487
1488module_init(virtio_uml_init);
1489module_exit(virtio_uml_exit);
1490__uml_exitcall(virtio_uml_exit);
1491
1492MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1493MODULE_LICENSE("GPL");
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Virtio vhost-user driver
4 *
5 * Copyright(c) 2019 Intel Corporation
6 *
7 * This module allows virtio devices to be used over a vhost-user socket.
8 *
9 * Guest devices can be instantiated by kernel module or command line
10 * parameters. One device will be created for each parameter. Syntax:
11 *
12 * [virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
13 * where:
14 * <socket> := vhost-user socket path to connect
15 * <virtio_id> := virtio device id (as in virtio_ids.h)
16 * <platform_id> := (optional) platform device id
17 *
18 * example:
19 * virtio_uml.device=/var/uml.socket:1
20 *
21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22 */
23#include <linux/module.h>
24#include <linux/platform_device.h>
25#include <linux/slab.h>
26#include <linux/virtio.h>
27#include <linux/virtio_config.h>
28#include <linux/virtio_ring.h>
29#include <shared/as-layout.h>
30#include <irq_kern.h>
31#include <init.h>
32#include <os.h>
33#include "vhost_user.h"
34
35/* Workaround due to a conflict between irq_user.h and irqreturn.h */
36#ifdef IRQ_NONE
37#undef IRQ_NONE
38#endif
39
40#define MAX_SUPPORTED_QUEUE_SIZE 256
41
42#define to_virtio_uml_device(_vdev) \
43 container_of(_vdev, struct virtio_uml_device, vdev)
44
45struct virtio_uml_device {
46 struct virtio_device vdev;
47 struct platform_device *pdev;
48
49 int sock, req_fd;
50 u64 features;
51 u64 protocol_features;
52 u8 status;
53};
54
55struct virtio_uml_vq_info {
56 int kick_fd, call_fd;
57 char name[32];
58};
59
60extern unsigned long long physmem_size, highmem;
61
62#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, __VA_ARGS__)
63
64/* Vhost-user protocol */
65
66static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
67 const int *fds, unsigned int fds_num)
68{
69 int rc;
70
71 do {
72 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
73 if (rc > 0) {
74 buf += rc;
75 len -= rc;
76 fds = NULL;
77 fds_num = 0;
78 }
79 } while (len && (rc >= 0 || rc == -EINTR));
80
81 if (rc < 0)
82 return rc;
83 return 0;
84}
85
86static int full_read(int fd, void *buf, int len)
87{
88 int rc;
89
90 do {
91 rc = os_read_file(fd, buf, len);
92 if (rc > 0) {
93 buf += rc;
94 len -= rc;
95 }
96 } while (len && (rc > 0 || rc == -EINTR));
97
98 if (rc < 0)
99 return rc;
100 if (rc == 0)
101 return -ECONNRESET;
102 return 0;
103}
104
105static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
106{
107 return full_read(fd, msg, sizeof(msg->header));
108}
109
110static int vhost_user_recv(int fd, struct vhost_user_msg *msg,
111 size_t max_payload_size)
112{
113 size_t size;
114 int rc = vhost_user_recv_header(fd, msg);
115
116 if (rc)
117 return rc;
118 size = msg->header.size;
119 if (size > max_payload_size)
120 return -EPROTO;
121 return full_read(fd, &msg->payload, size);
122}
123
124static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
125 struct vhost_user_msg *msg,
126 size_t max_payload_size)
127{
128 int rc = vhost_user_recv(vu_dev->sock, msg, max_payload_size);
129
130 if (rc)
131 return rc;
132
133 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
134 return -EPROTO;
135
136 return 0;
137}
138
139static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
140 u64 *value)
141{
142 struct vhost_user_msg msg;
143 int rc = vhost_user_recv_resp(vu_dev, &msg,
144 sizeof(msg.payload.integer));
145
146 if (rc)
147 return rc;
148 if (msg.header.size != sizeof(msg.payload.integer))
149 return -EPROTO;
150 *value = msg.payload.integer;
151 return 0;
152}
153
154static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
155 struct vhost_user_msg *msg,
156 size_t max_payload_size)
157{
158 int rc = vhost_user_recv(vu_dev->req_fd, msg, max_payload_size);
159
160 if (rc)
161 return rc;
162
163 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
164 VHOST_USER_VERSION)
165 return -EPROTO;
166
167 return 0;
168}
169
170static int vhost_user_send(struct virtio_uml_device *vu_dev,
171 bool need_response, struct vhost_user_msg *msg,
172 int *fds, size_t num_fds)
173{
174 size_t size = sizeof(msg->header) + msg->header.size;
175 bool request_ack;
176 int rc;
177
178 msg->header.flags |= VHOST_USER_VERSION;
179
180 /*
181 * The need_response flag indicates that we already need a response,
182 * e.g. to read the features. In these cases, don't request an ACK as
183 * it is meaningless. Also request an ACK only if supported.
184 */
185 request_ack = !need_response;
186 if (!(vu_dev->protocol_features &
187 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
188 request_ack = false;
189
190 if (request_ack)
191 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
192
193 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
194 if (rc < 0)
195 return rc;
196
197 if (request_ack) {
198 uint64_t status;
199
200 rc = vhost_user_recv_u64(vu_dev, &status);
201 if (rc)
202 return rc;
203
204 if (status) {
205 vu_err(vu_dev, "slave reports error: %llu\n", status);
206 return -EIO;
207 }
208 }
209
210 return 0;
211}
212
213static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
214 bool need_response, u32 request)
215{
216 struct vhost_user_msg msg = {
217 .header.request = request,
218 };
219
220 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
221}
222
223static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
224 u32 request, int fd)
225{
226 struct vhost_user_msg msg = {
227 .header.request = request,
228 };
229
230 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
231}
232
233static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
234 u32 request, u64 value)
235{
236 struct vhost_user_msg msg = {
237 .header.request = request,
238 .header.size = sizeof(msg.payload.integer),
239 .payload.integer = value,
240 };
241
242 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
243}
244
245static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
246{
247 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
248}
249
250static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
251 u64 *features)
252{
253 int rc = vhost_user_send_no_payload(vu_dev, true,
254 VHOST_USER_GET_FEATURES);
255
256 if (rc)
257 return rc;
258 return vhost_user_recv_u64(vu_dev, features);
259}
260
261static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
262 u64 features)
263{
264 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
265}
266
267static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
268 u64 *protocol_features)
269{
270 int rc = vhost_user_send_no_payload(vu_dev, true,
271 VHOST_USER_GET_PROTOCOL_FEATURES);
272
273 if (rc)
274 return rc;
275 return vhost_user_recv_u64(vu_dev, protocol_features);
276}
277
278static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
279 u64 protocol_features)
280{
281 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
282 protocol_features);
283}
284
285static void vhost_user_reply(struct virtio_uml_device *vu_dev,
286 struct vhost_user_msg *msg, int response)
287{
288 struct vhost_user_msg reply = {
289 .payload.integer = response,
290 };
291 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
292 int rc;
293
294 reply.header = msg->header;
295 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
296 reply.header.flags |= VHOST_USER_FLAG_REPLY;
297 reply.header.size = sizeof(reply.payload.integer);
298
299 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
300
301 if (rc)
302 vu_err(vu_dev,
303 "sending reply to slave request failed: %d (size %zu)\n",
304 rc, size);
305}
306
307static irqreturn_t vu_req_interrupt(int irq, void *data)
308{
309 struct virtio_uml_device *vu_dev = data;
310 int response = 1;
311 struct {
312 struct vhost_user_msg msg;
313 u8 extra_payload[512];
314 } msg;
315 int rc;
316
317 rc = vhost_user_recv_req(vu_dev, &msg.msg,
318 sizeof(msg.msg.payload) +
319 sizeof(msg.extra_payload));
320
321 if (rc)
322 return IRQ_NONE;
323
324 switch (msg.msg.header.request) {
325 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
326 virtio_config_changed(&vu_dev->vdev);
327 response = 0;
328 break;
329 case VHOST_USER_SLAVE_IOTLB_MSG:
330 /* not supported - VIRTIO_F_IOMMU_PLATFORM */
331 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
332 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
333 default:
334 vu_err(vu_dev, "unexpected slave request %d\n",
335 msg.msg.header.request);
336 }
337
338 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
339 vhost_user_reply(vu_dev, &msg.msg, response);
340
341 return IRQ_HANDLED;
342}
343
344static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
345{
346 int rc, req_fds[2];
347
348 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
349 rc = os_pipe(req_fds, true, true);
350 if (rc < 0)
351 return rc;
352 vu_dev->req_fd = req_fds[0];
353
354 rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
355 vu_req_interrupt, IRQF_SHARED,
356 vu_dev->pdev->name, vu_dev);
357 if (rc)
358 goto err_close;
359
360 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
361 req_fds[1]);
362 if (rc)
363 goto err_free_irq;
364
365 goto out;
366
367err_free_irq:
368 um_free_irq(VIRTIO_IRQ, vu_dev);
369err_close:
370 os_close_file(req_fds[0]);
371out:
372 /* Close unused write end of request fds */
373 os_close_file(req_fds[1]);
374 return rc;
375}
376
377static int vhost_user_init(struct virtio_uml_device *vu_dev)
378{
379 int rc = vhost_user_set_owner(vu_dev);
380
381 if (rc)
382 return rc;
383 rc = vhost_user_get_features(vu_dev, &vu_dev->features);
384 if (rc)
385 return rc;
386
387 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
388 rc = vhost_user_get_protocol_features(vu_dev,
389 &vu_dev->protocol_features);
390 if (rc)
391 return rc;
392 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
393 rc = vhost_user_set_protocol_features(vu_dev,
394 vu_dev->protocol_features);
395 if (rc)
396 return rc;
397 }
398
399 if (vu_dev->protocol_features &
400 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
401 rc = vhost_user_init_slave_req(vu_dev);
402 if (rc)
403 return rc;
404 }
405
406 return 0;
407}
408
409static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
410 u32 offset, void *buf, u32 len)
411{
412 u32 cfg_size = offset + len;
413 struct vhost_user_msg *msg;
414 size_t payload_size = sizeof(msg->payload.config) + cfg_size;
415 size_t msg_size = sizeof(msg->header) + payload_size;
416 int rc;
417
418 if (!(vu_dev->protocol_features &
419 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
420 return;
421
422 msg = kzalloc(msg_size, GFP_KERNEL);
423 if (!msg)
424 return;
425 msg->header.request = VHOST_USER_GET_CONFIG;
426 msg->header.size = payload_size;
427 msg->payload.config.offset = 0;
428 msg->payload.config.size = cfg_size;
429
430 rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
431 if (rc) {
432 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
433 rc);
434 goto free;
435 }
436
437 rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
438 if (rc) {
439 vu_err(vu_dev,
440 "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
441 rc);
442 goto free;
443 }
444
445 if (msg->header.size != payload_size ||
446 msg->payload.config.size != cfg_size) {
447 rc = -EPROTO;
448 vu_err(vu_dev,
449 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
450 msg->header.size, payload_size,
451 msg->payload.config.size, cfg_size);
452 goto free;
453 }
454 memcpy(buf, msg->payload.config.payload + offset, len);
455
456free:
457 kfree(msg);
458}
459
460static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
461 u32 offset, const void *buf, u32 len)
462{
463 struct vhost_user_msg *msg;
464 size_t payload_size = sizeof(msg->payload.config) + len;
465 size_t msg_size = sizeof(msg->header) + payload_size;
466 int rc;
467
468 if (!(vu_dev->protocol_features &
469 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
470 return;
471
472 msg = kzalloc(msg_size, GFP_KERNEL);
473 if (!msg)
474 return;
475 msg->header.request = VHOST_USER_SET_CONFIG;
476 msg->header.size = payload_size;
477 msg->payload.config.offset = offset;
478 msg->payload.config.size = len;
479 memcpy(msg->payload.config.payload, buf, len);
480
481 rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
482 if (rc)
483 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
484 rc);
485
486 kfree(msg);
487}
488
489static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
490 struct vhost_user_mem_region *region_out)
491{
492 unsigned long long mem_offset;
493 int rc = phys_mapping(addr, &mem_offset);
494
495 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
496 return -EFAULT;
497 *fd_out = rc;
498 region_out->guest_addr = addr;
499 region_out->user_addr = addr;
500 region_out->size = size;
501 region_out->mmap_offset = mem_offset;
502
503 /* Ensure mapping is valid for the entire region */
504 rc = phys_mapping(addr + size - 1, &mem_offset);
505 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
506 addr + size - 1, rc, *fd_out))
507 return -EFAULT;
508 return 0;
509}
510
511static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
512{
513 struct vhost_user_msg msg = {
514 .header.request = VHOST_USER_SET_MEM_TABLE,
515 .header.size = sizeof(msg.payload.mem_regions),
516 .payload.mem_regions.num = 1,
517 };
518 unsigned long reserved = uml_reserved - uml_physmem;
519 int fds[2];
520 int rc;
521
522 /*
523 * This is a bit tricky, see also the comment with setup_physmem().
524 *
525 * Essentially, setup_physmem() uses a file to mmap() our physmem,
526 * but the code and data we *already* have is omitted. To us, this
527 * is no difference, since they both become part of our address
528 * space and memory consumption. To somebody looking in from the
529 * outside, however, it is different because the part of our memory
530 * consumption that's already part of the binary (code/data) is not
531 * mapped from the file, so it's not visible to another mmap from
532 * the file descriptor.
533 *
534 * Thus, don't advertise this space to the vhost-user slave. This
535 * means that the slave will likely abort or similar when we give
536 * it an address from the hidden range, since it's not marked as
537 * a valid address, but at least that way we detect the issue and
538 * don't just have the slave read an all-zeroes buffer from the
539 * shared memory file, or write something there that we can never
540 * see (depending on the direction of the virtqueue traffic.)
541 *
542 * Since we usually don't want to use .text for virtio buffers,
543 * this effectively means that you cannot use
544 * 1) global variables, which are in the .bss and not in the shm
545 * file-backed memory
546 * 2) the stack in some processes, depending on where they have
547 * their stack (or maybe only no interrupt stack?)
548 *
549 * The stack is already not typically valid for DMA, so this isn't
550 * much of a restriction, but global variables might be encountered.
551 *
552 * It might be possible to fix it by copying around the data that's
553 * between bss_start and where we map the file now, but it's not
554 * something that you typically encounter with virtio drivers, so
555 * it didn't seem worthwhile.
556 */
557 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
558 &fds[0],
559 &msg.payload.mem_regions.regions[0]);
560
561 if (rc < 0)
562 return rc;
563 if (highmem) {
564 msg.payload.mem_regions.num++;
565 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
566 &fds[1], &msg.payload.mem_regions.regions[1]);
567 if (rc < 0)
568 return rc;
569 }
570
571 return vhost_user_send(vu_dev, false, &msg, fds,
572 msg.payload.mem_regions.num);
573}
574
575static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
576 u32 request, u32 index, u32 num)
577{
578 struct vhost_user_msg msg = {
579 .header.request = request,
580 .header.size = sizeof(msg.payload.vring_state),
581 .payload.vring_state.index = index,
582 .payload.vring_state.num = num,
583 };
584
585 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
586}
587
588static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
589 u32 index, u32 num)
590{
591 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
592 index, num);
593}
594
595static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
596 u32 index, u32 offset)
597{
598 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
599 index, offset);
600}
601
602static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
603 u32 index, u64 desc, u64 used, u64 avail,
604 u64 log)
605{
606 struct vhost_user_msg msg = {
607 .header.request = VHOST_USER_SET_VRING_ADDR,
608 .header.size = sizeof(msg.payload.vring_addr),
609 .payload.vring_addr.index = index,
610 .payload.vring_addr.desc = desc,
611 .payload.vring_addr.used = used,
612 .payload.vring_addr.avail = avail,
613 .payload.vring_addr.log = log,
614 };
615
616 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
617}
618
619static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
620 u32 request, int index, int fd)
621{
622 struct vhost_user_msg msg = {
623 .header.request = request,
624 .header.size = sizeof(msg.payload.integer),
625 .payload.integer = index,
626 };
627
628 if (index & ~VHOST_USER_VRING_INDEX_MASK)
629 return -EINVAL;
630 if (fd < 0) {
631 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
632 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
633 }
634 return vhost_user_send(vu_dev, false, &msg, &fd, 1);
635}
636
637static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
638 int index, int fd)
639{
640 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
641 index, fd);
642}
643
644static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
645 int index, int fd)
646{
647 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
648 index, fd);
649}
650
651static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
652 u32 index, bool enable)
653{
654 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
655 return 0;
656
657 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
658 index, enable);
659}
660
661
662/* Virtio interface */
663
664static bool vu_notify(struct virtqueue *vq)
665{
666 struct virtio_uml_vq_info *info = vq->priv;
667 const uint64_t n = 1;
668 int rc;
669
670 do {
671 rc = os_write_file(info->kick_fd, &n, sizeof(n));
672 } while (rc == -EINTR);
673 return !WARN(rc != sizeof(n), "write returned %d\n", rc);
674}
675
676static irqreturn_t vu_interrupt(int irq, void *opaque)
677{
678 struct virtqueue *vq = opaque;
679 struct virtio_uml_vq_info *info = vq->priv;
680 uint64_t n;
681 int rc;
682 irqreturn_t ret = IRQ_NONE;
683
684 do {
685 rc = os_read_file(info->call_fd, &n, sizeof(n));
686 if (rc == sizeof(n))
687 ret |= vring_interrupt(irq, vq);
688 } while (rc == sizeof(n) || rc == -EINTR);
689 WARN(rc != -EAGAIN, "read returned %d\n", rc);
690 return ret;
691}
692
693
694static void vu_get(struct virtio_device *vdev, unsigned offset,
695 void *buf, unsigned len)
696{
697 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
698
699 vhost_user_get_config(vu_dev, offset, buf, len);
700}
701
702static void vu_set(struct virtio_device *vdev, unsigned offset,
703 const void *buf, unsigned len)
704{
705 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
706
707 vhost_user_set_config(vu_dev, offset, buf, len);
708}
709
710static u8 vu_get_status(struct virtio_device *vdev)
711{
712 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
713
714 return vu_dev->status;
715}
716
717static void vu_set_status(struct virtio_device *vdev, u8 status)
718{
719 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
720
721 vu_dev->status = status;
722}
723
724static void vu_reset(struct virtio_device *vdev)
725{
726 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
727
728 vu_dev->status = 0;
729}
730
731static void vu_del_vq(struct virtqueue *vq)
732{
733 struct virtio_uml_vq_info *info = vq->priv;
734
735 um_free_irq(VIRTIO_IRQ, vq);
736
737 os_close_file(info->call_fd);
738 os_close_file(info->kick_fd);
739
740 vring_del_virtqueue(vq);
741 kfree(info);
742}
743
744static void vu_del_vqs(struct virtio_device *vdev)
745{
746 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
747 struct virtqueue *vq, *n;
748 u64 features;
749
750 /* Note: reverse order as a workaround to a decoding bug in snabb */
751 list_for_each_entry_reverse(vq, &vdev->vqs, list)
752 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
753
754 /* Ensure previous messages have been processed */
755 WARN_ON(vhost_user_get_features(vu_dev, &features));
756
757 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
758 vu_del_vq(vq);
759}
760
761static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
762 struct virtqueue *vq)
763{
764 struct virtio_uml_vq_info *info = vq->priv;
765 int call_fds[2];
766 int rc;
767
768 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
769 rc = os_pipe(call_fds, true, true);
770 if (rc < 0)
771 return rc;
772
773 info->call_fd = call_fds[0];
774 rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
775 vu_interrupt, IRQF_SHARED, info->name, vq);
776 if (rc)
777 goto close_both;
778
779 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
780 if (rc)
781 goto release_irq;
782
783 goto out;
784
785release_irq:
786 um_free_irq(VIRTIO_IRQ, vq);
787close_both:
788 os_close_file(call_fds[0]);
789out:
790 /* Close (unused) write end of call fds */
791 os_close_file(call_fds[1]);
792
793 return rc;
794}
795
796static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
797 unsigned index, vq_callback_t *callback,
798 const char *name, bool ctx)
799{
800 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
801 struct platform_device *pdev = vu_dev->pdev;
802 struct virtio_uml_vq_info *info;
803 struct virtqueue *vq;
804 int num = MAX_SUPPORTED_QUEUE_SIZE;
805 int rc;
806
807 info = kzalloc(sizeof(*info), GFP_KERNEL);
808 if (!info) {
809 rc = -ENOMEM;
810 goto error_kzalloc;
811 }
812 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
813 pdev->id, name);
814
815 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
816 ctx, vu_notify, callback, info->name);
817 if (!vq) {
818 rc = -ENOMEM;
819 goto error_create;
820 }
821 vq->priv = info;
822 num = virtqueue_get_vring_size(vq);
823
824 rc = os_eventfd(0, 0);
825 if (rc < 0)
826 goto error_kick;
827 info->kick_fd = rc;
828
829 rc = vu_setup_vq_call_fd(vu_dev, vq);
830 if (rc)
831 goto error_call;
832
833 rc = vhost_user_set_vring_num(vu_dev, index, num);
834 if (rc)
835 goto error_setup;
836
837 rc = vhost_user_set_vring_base(vu_dev, index, 0);
838 if (rc)
839 goto error_setup;
840
841 rc = vhost_user_set_vring_addr(vu_dev, index,
842 virtqueue_get_desc_addr(vq),
843 virtqueue_get_used_addr(vq),
844 virtqueue_get_avail_addr(vq),
845 (u64) -1);
846 if (rc)
847 goto error_setup;
848
849 return vq;
850
851error_setup:
852 um_free_irq(VIRTIO_IRQ, vq);
853 os_close_file(info->call_fd);
854error_call:
855 os_close_file(info->kick_fd);
856error_kick:
857 vring_del_virtqueue(vq);
858error_create:
859 kfree(info);
860error_kzalloc:
861 return ERR_PTR(rc);
862}
863
864static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
865 struct virtqueue *vqs[], vq_callback_t *callbacks[],
866 const char * const names[], const bool *ctx,
867 struct irq_affinity *desc)
868{
869 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
870 int i, queue_idx = 0, rc;
871 struct virtqueue *vq;
872
873 rc = vhost_user_set_mem_table(vu_dev);
874 if (rc)
875 return rc;
876
877 for (i = 0; i < nvqs; ++i) {
878 if (!names[i]) {
879 vqs[i] = NULL;
880 continue;
881 }
882
883 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
884 ctx ? ctx[i] : false);
885 if (IS_ERR(vqs[i])) {
886 rc = PTR_ERR(vqs[i]);
887 goto error_setup;
888 }
889 }
890
891 list_for_each_entry(vq, &vdev->vqs, list) {
892 struct virtio_uml_vq_info *info = vq->priv;
893
894 rc = vhost_user_set_vring_kick(vu_dev, vq->index,
895 info->kick_fd);
896 if (rc)
897 goto error_setup;
898
899 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
900 if (rc)
901 goto error_setup;
902 }
903
904 return 0;
905
906error_setup:
907 vu_del_vqs(vdev);
908 return rc;
909}
910
911static u64 vu_get_features(struct virtio_device *vdev)
912{
913 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
914
915 return vu_dev->features;
916}
917
918static int vu_finalize_features(struct virtio_device *vdev)
919{
920 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
921 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
922
923 vring_transport_features(vdev);
924 vu_dev->features = vdev->features | supported;
925
926 return vhost_user_set_features(vu_dev, vu_dev->features);
927}
928
929static const char *vu_bus_name(struct virtio_device *vdev)
930{
931 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
932
933 return vu_dev->pdev->name;
934}
935
936static const struct virtio_config_ops virtio_uml_config_ops = {
937 .get = vu_get,
938 .set = vu_set,
939 .get_status = vu_get_status,
940 .set_status = vu_set_status,
941 .reset = vu_reset,
942 .find_vqs = vu_find_vqs,
943 .del_vqs = vu_del_vqs,
944 .get_features = vu_get_features,
945 .finalize_features = vu_finalize_features,
946 .bus_name = vu_bus_name,
947};
948
949static void virtio_uml_release_dev(struct device *d)
950{
951 struct virtio_device *vdev =
952 container_of(d, struct virtio_device, dev);
953 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
954
955 /* might not have been opened due to not negotiating the feature */
956 if (vu_dev->req_fd >= 0) {
957 um_free_irq(VIRTIO_IRQ, vu_dev);
958 os_close_file(vu_dev->req_fd);
959 }
960
961 os_close_file(vu_dev->sock);
962}
963
964/* Platform device */
965
966struct virtio_uml_platform_data {
967 u32 virtio_device_id;
968 const char *socket_path;
969};
970
971static int virtio_uml_probe(struct platform_device *pdev)
972{
973 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
974 struct virtio_uml_device *vu_dev;
975 int rc;
976
977 if (!pdata)
978 return -EINVAL;
979
980 vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
981 if (!vu_dev)
982 return -ENOMEM;
983
984 vu_dev->vdev.dev.parent = &pdev->dev;
985 vu_dev->vdev.dev.release = virtio_uml_release_dev;
986 vu_dev->vdev.config = &virtio_uml_config_ops;
987 vu_dev->vdev.id.device = pdata->virtio_device_id;
988 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
989 vu_dev->pdev = pdev;
990 vu_dev->req_fd = -1;
991
992 do {
993 rc = os_connect_socket(pdata->socket_path);
994 } while (rc == -EINTR);
995 if (rc < 0)
996 return rc;
997 vu_dev->sock = rc;
998
999 rc = vhost_user_init(vu_dev);
1000 if (rc)
1001 goto error_init;
1002
1003 platform_set_drvdata(pdev, vu_dev);
1004
1005 rc = register_virtio_device(&vu_dev->vdev);
1006 if (rc)
1007 put_device(&vu_dev->vdev.dev);
1008 return rc;
1009
1010error_init:
1011 os_close_file(vu_dev->sock);
1012 return rc;
1013}
1014
1015static int virtio_uml_remove(struct platform_device *pdev)
1016{
1017 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1018
1019 unregister_virtio_device(&vu_dev->vdev);
1020 return 0;
1021}
1022
1023/* Command line device list */
1024
1025static void vu_cmdline_release_dev(struct device *d)
1026{
1027}
1028
1029static struct device vu_cmdline_parent = {
1030 .init_name = "virtio-uml-cmdline",
1031 .release = vu_cmdline_release_dev,
1032};
1033
1034static bool vu_cmdline_parent_registered;
1035static int vu_cmdline_id;
1036
1037static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1038{
1039 const char *ids = strchr(device, ':');
1040 unsigned int virtio_device_id;
1041 int processed, consumed, err;
1042 char *socket_path;
1043 struct virtio_uml_platform_data pdata;
1044 struct platform_device *pdev;
1045
1046 if (!ids || ids == device)
1047 return -EINVAL;
1048
1049 processed = sscanf(ids, ":%u%n:%d%n",
1050 &virtio_device_id, &consumed,
1051 &vu_cmdline_id, &consumed);
1052
1053 if (processed < 1 || ids[consumed])
1054 return -EINVAL;
1055
1056 if (!vu_cmdline_parent_registered) {
1057 err = device_register(&vu_cmdline_parent);
1058 if (err) {
1059 pr_err("Failed to register parent device!\n");
1060 put_device(&vu_cmdline_parent);
1061 return err;
1062 }
1063 vu_cmdline_parent_registered = true;
1064 }
1065
1066 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1067 if (!socket_path)
1068 return -ENOMEM;
1069
1070 pdata.virtio_device_id = (u32) virtio_device_id;
1071 pdata.socket_path = socket_path;
1072
1073 pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1074 vu_cmdline_id, virtio_device_id, socket_path);
1075
1076 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1077 vu_cmdline_id++, &pdata,
1078 sizeof(pdata));
1079 err = PTR_ERR_OR_ZERO(pdev);
1080 if (err)
1081 goto free;
1082 return 0;
1083
1084free:
1085 kfree(socket_path);
1086 return err;
1087}
1088
1089static int vu_cmdline_get_device(struct device *dev, void *data)
1090{
1091 struct platform_device *pdev = to_platform_device(dev);
1092 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1093 char *buffer = data;
1094 unsigned int len = strlen(buffer);
1095
1096 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1097 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1098 return 0;
1099}
1100
1101static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1102{
1103 buffer[0] = '\0';
1104 if (vu_cmdline_parent_registered)
1105 device_for_each_child(&vu_cmdline_parent, buffer,
1106 vu_cmdline_get_device);
1107 return strlen(buffer) + 1;
1108}
1109
1110static const struct kernel_param_ops vu_cmdline_param_ops = {
1111 .set = vu_cmdline_set,
1112 .get = vu_cmdline_get,
1113};
1114
1115device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1116__uml_help(vu_cmdline_param_ops,
1117"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1118" Configure a virtio device over a vhost-user socket.\n"
1119" See virtio_ids.h for a list of possible virtio device id values.\n"
1120" Optionally use a specific platform_device id.\n\n"
1121);
1122
1123
1124static int vu_unregister_cmdline_device(struct device *dev, void *data)
1125{
1126 struct platform_device *pdev = to_platform_device(dev);
1127 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1128
1129 kfree(pdata->socket_path);
1130 platform_device_unregister(pdev);
1131 return 0;
1132}
1133
1134static void vu_unregister_cmdline_devices(void)
1135{
1136 if (vu_cmdline_parent_registered) {
1137 device_for_each_child(&vu_cmdline_parent, NULL,
1138 vu_unregister_cmdline_device);
1139 device_unregister(&vu_cmdline_parent);
1140 vu_cmdline_parent_registered = false;
1141 }
1142}
1143
1144/* Platform driver */
1145
1146static const struct of_device_id virtio_uml_match[] = {
1147 { .compatible = "virtio,uml", },
1148 { }
1149};
1150MODULE_DEVICE_TABLE(of, virtio_uml_match);
1151
1152static struct platform_driver virtio_uml_driver = {
1153 .probe = virtio_uml_probe,
1154 .remove = virtio_uml_remove,
1155 .driver = {
1156 .name = "virtio-uml",
1157 .of_match_table = virtio_uml_match,
1158 },
1159};
1160
1161static int __init virtio_uml_init(void)
1162{
1163 return platform_driver_register(&virtio_uml_driver);
1164}
1165
1166static void __exit virtio_uml_exit(void)
1167{
1168 platform_driver_unregister(&virtio_uml_driver);
1169 vu_unregister_cmdline_devices();
1170}
1171
1172module_init(virtio_uml_init);
1173module_exit(virtio_uml_exit);
1174__uml_exitcall(virtio_uml_exit);
1175
1176MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1177MODULE_LICENSE("GPL");