Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
5 * Copyright (C) 2012-2014 Cisco Systems
6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7 * Copyright (C) 2019 Intel Corporation
8 */
9
10#include <linux/clockchips.h>
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/jiffies.h>
14#include <linux/mm.h>
15#include <linux/sched.h>
16#include <linux/spinlock.h>
17#include <linux/threads.h>
18#include <asm/irq.h>
19#include <asm/param.h>
20#include <kern_util.h>
21#include <os.h>
22#include <linux/time-internal.h>
23#include <linux/um_timetravel.h>
24#include <shared/init.h>
25
26#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
27enum time_travel_mode time_travel_mode;
28EXPORT_SYMBOL_GPL(time_travel_mode);
29
30static bool time_travel_start_set;
31static unsigned long long time_travel_start;
32static unsigned long long time_travel_time;
33static LIST_HEAD(time_travel_events);
34static LIST_HEAD(time_travel_irqs);
35static unsigned long long time_travel_timer_interval;
36static unsigned long long time_travel_next_event;
37static struct time_travel_event time_travel_timer_event;
38static int time_travel_ext_fd = -1;
39static unsigned int time_travel_ext_waiting;
40static bool time_travel_ext_prev_request_valid;
41static unsigned long long time_travel_ext_prev_request;
42static bool time_travel_ext_free_until_valid;
43static unsigned long long time_travel_ext_free_until;
44
45static void time_travel_set_time(unsigned long long ns)
46{
47 if (unlikely(ns < time_travel_time))
48 panic("time-travel: time goes backwards %lld -> %lld\n",
49 time_travel_time, ns);
50 else if (unlikely(ns >= S64_MAX))
51 panic("The system was going to sleep forever, aborting");
52
53 time_travel_time = ns;
54}
55
56enum time_travel_message_handling {
57 TTMH_IDLE,
58 TTMH_POLL,
59 TTMH_READ,
60};
61
62static void time_travel_handle_message(struct um_timetravel_msg *msg,
63 enum time_travel_message_handling mode)
64{
65 struct um_timetravel_msg resp = {
66 .op = UM_TIMETRAVEL_ACK,
67 };
68 int ret;
69
70 /*
71 * We can't unlock here, but interrupt signals with a timetravel_handler
72 * (see um_request_irq_tt) get to the timetravel_handler anyway.
73 */
74 if (mode != TTMH_READ) {
75 BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
76
77 while (os_poll(1, &time_travel_ext_fd) != 0) {
78 /* nothing */
79 }
80 }
81
82 ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
83
84 if (ret == 0)
85 panic("time-travel external link is broken\n");
86 if (ret != sizeof(*msg))
87 panic("invalid time-travel message - %d bytes\n", ret);
88
89 switch (msg->op) {
90 default:
91 WARN_ONCE(1, "time-travel: unexpected message %lld\n",
92 (unsigned long long)msg->op);
93 break;
94 case UM_TIMETRAVEL_ACK:
95 return;
96 case UM_TIMETRAVEL_RUN:
97 time_travel_set_time(msg->time);
98 break;
99 case UM_TIMETRAVEL_FREE_UNTIL:
100 time_travel_ext_free_until_valid = true;
101 time_travel_ext_free_until = msg->time;
102 break;
103 }
104
105 resp.seq = msg->seq;
106 os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
107}
108
109static u64 time_travel_ext_req(u32 op, u64 time)
110{
111 static int seq;
112 int mseq = ++seq;
113 struct um_timetravel_msg msg = {
114 .op = op,
115 .time = time,
116 .seq = mseq,
117 };
118
119 /*
120 * We need to block even the timetravel handlers of SIGIO here and
121 * only restore their use when we got the ACK - otherwise we may
122 * (will) get interrupted by that, try to queue the IRQ for future
123 * processing and thus send another request while we're still waiting
124 * for an ACK, but the peer doesn't know we got interrupted and will
125 * send the ACKs in the same order as the message, but we'd need to
126 * see them in the opposite order ...
127 *
128 * This wouldn't matter *too* much, but some ACKs carry the
129 * current time (for UM_TIMETRAVEL_GET) and getting another
130 * ACK without a time would confuse us a lot!
131 *
132 * The sequence number assignment that happens here lets us
133 * debug such message handling issues more easily.
134 */
135 block_signals_hard();
136 os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
137
138 while (msg.op != UM_TIMETRAVEL_ACK)
139 time_travel_handle_message(&msg, TTMH_READ);
140
141 if (msg.seq != mseq)
142 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
143 msg.op, msg.seq, mseq, msg.time);
144
145 if (op == UM_TIMETRAVEL_GET)
146 time_travel_set_time(msg.time);
147 unblock_signals_hard();
148
149 return msg.time;
150}
151
152void __time_travel_wait_readable(int fd)
153{
154 int fds[2] = { fd, time_travel_ext_fd };
155 int ret;
156
157 if (time_travel_mode != TT_MODE_EXTERNAL)
158 return;
159
160 while ((ret = os_poll(2, fds))) {
161 struct um_timetravel_msg msg;
162
163 if (ret == 1)
164 time_travel_handle_message(&msg, TTMH_READ);
165 }
166}
167EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
168
169static void time_travel_ext_update_request(unsigned long long time)
170{
171 if (time_travel_mode != TT_MODE_EXTERNAL)
172 return;
173
174 /* asked for exactly this time previously */
175 if (time_travel_ext_prev_request_valid &&
176 time == time_travel_ext_prev_request)
177 return;
178
179 /*
180 * if we're running and are allowed to run past the request
181 * then we don't need to update it either
182 */
183 if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
184 time < time_travel_ext_free_until)
185 return;
186
187 time_travel_ext_prev_request = time;
188 time_travel_ext_prev_request_valid = true;
189 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
190}
191
192void __time_travel_propagate_time(void)
193{
194 static unsigned long long last_propagated;
195
196 if (last_propagated == time_travel_time)
197 return;
198
199 time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
200 last_propagated = time_travel_time;
201}
202EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
203
204/* returns true if we must do a wait to the simtime device */
205static bool time_travel_ext_request(unsigned long long time)
206{
207 /*
208 * If we received an external sync point ("free until") then we
209 * don't have to request/wait for anything until then, unless
210 * we're already waiting.
211 */
212 if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
213 time < time_travel_ext_free_until)
214 return false;
215
216 time_travel_ext_update_request(time);
217 return true;
218}
219
220static void time_travel_ext_wait(bool idle)
221{
222 struct um_timetravel_msg msg = {
223 .op = UM_TIMETRAVEL_ACK,
224 };
225
226 time_travel_ext_prev_request_valid = false;
227 time_travel_ext_free_until_valid = false;
228 time_travel_ext_waiting++;
229
230 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
231
232 /*
233 * Here we are deep in the idle loop, so we have to break out of the
234 * kernel abstraction in a sense and implement this in terms of the
235 * UML system waiting on the VQ interrupt while sleeping, when we get
236 * the signal it'll call time_travel_ext_vq_notify_done() completing the
237 * call.
238 */
239 while (msg.op != UM_TIMETRAVEL_RUN)
240 time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
241
242 time_travel_ext_waiting--;
243
244 /* we might request more stuff while polling - reset when we run */
245 time_travel_ext_prev_request_valid = false;
246}
247
248static void time_travel_ext_get_time(void)
249{
250 time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
251}
252
253static void __time_travel_update_time(unsigned long long ns, bool idle)
254{
255 if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
256 time_travel_ext_wait(idle);
257 else
258 time_travel_set_time(ns);
259}
260
261static struct time_travel_event *time_travel_first_event(void)
262{
263 return list_first_entry_or_null(&time_travel_events,
264 struct time_travel_event,
265 list);
266}
267
268static void __time_travel_add_event(struct time_travel_event *e,
269 unsigned long long time)
270{
271 struct time_travel_event *tmp;
272 bool inserted = false;
273 unsigned long flags;
274
275 if (e->pending)
276 return;
277
278 e->pending = true;
279 e->time = time;
280
281 local_irq_save(flags);
282 list_for_each_entry(tmp, &time_travel_events, list) {
283 /*
284 * Add the new entry before one with higher time,
285 * or if they're equal and both on stack, because
286 * in that case we need to unwind the stack in the
287 * right order, and the later event (timer sleep
288 * or such) must be dequeued first.
289 */
290 if ((tmp->time > e->time) ||
291 (tmp->time == e->time && tmp->onstack && e->onstack)) {
292 list_add_tail(&e->list, &tmp->list);
293 inserted = true;
294 break;
295 }
296 }
297
298 if (!inserted)
299 list_add_tail(&e->list, &time_travel_events);
300
301 tmp = time_travel_first_event();
302 time_travel_ext_update_request(tmp->time);
303 time_travel_next_event = tmp->time;
304 local_irq_restore(flags);
305}
306
307static void time_travel_add_event(struct time_travel_event *e,
308 unsigned long long time)
309{
310 if (WARN_ON(!e->fn))
311 return;
312
313 __time_travel_add_event(e, time);
314}
315
316void time_travel_add_event_rel(struct time_travel_event *e,
317 unsigned long long delay_ns)
318{
319 time_travel_add_event(e, time_travel_time + delay_ns);
320}
321
322void time_travel_periodic_timer(struct time_travel_event *e)
323{
324 time_travel_add_event(&time_travel_timer_event,
325 time_travel_time + time_travel_timer_interval);
326 deliver_alarm();
327}
328
329void deliver_time_travel_irqs(void)
330{
331 struct time_travel_event *e;
332 unsigned long flags;
333
334 /*
335 * Don't do anything for most cases. Note that because here we have
336 * to disable IRQs (and re-enable later) we'll actually recurse at
337 * the end of the function, so this is strictly necessary.
338 */
339 if (likely(list_empty(&time_travel_irqs)))
340 return;
341
342 local_irq_save(flags);
343 irq_enter();
344 while ((e = list_first_entry_or_null(&time_travel_irqs,
345 struct time_travel_event,
346 list))) {
347 list_del(&e->list);
348 e->pending = false;
349 e->fn(e);
350 }
351 irq_exit();
352 local_irq_restore(flags);
353}
354
355static void time_travel_deliver_event(struct time_travel_event *e)
356{
357 if (e == &time_travel_timer_event) {
358 /*
359 * deliver_alarm() does the irq_enter/irq_exit
360 * by itself, so must handle it specially here
361 */
362 e->fn(e);
363 } else if (irqs_disabled()) {
364 list_add_tail(&e->list, &time_travel_irqs);
365 /*
366 * set pending again, it was set to false when the
367 * event was deleted from the original list, but
368 * now it's still pending until we deliver the IRQ.
369 */
370 e->pending = true;
371 } else {
372 unsigned long flags;
373
374 local_irq_save(flags);
375 irq_enter();
376 e->fn(e);
377 irq_exit();
378 local_irq_restore(flags);
379 }
380}
381
382bool time_travel_del_event(struct time_travel_event *e)
383{
384 unsigned long flags;
385
386 if (!e->pending)
387 return false;
388 local_irq_save(flags);
389 list_del(&e->list);
390 e->pending = false;
391 local_irq_restore(flags);
392 return true;
393}
394
395static void time_travel_update_time(unsigned long long next, bool idle)
396{
397 struct time_travel_event ne = {
398 .onstack = true,
399 };
400 struct time_travel_event *e;
401 bool finished = idle;
402
403 /* add it without a handler - we deal with that specifically below */
404 __time_travel_add_event(&ne, next);
405
406 do {
407 e = time_travel_first_event();
408
409 BUG_ON(!e);
410 __time_travel_update_time(e->time, idle);
411
412 /* new events may have been inserted while we were waiting */
413 if (e == time_travel_first_event()) {
414 BUG_ON(!time_travel_del_event(e));
415 BUG_ON(time_travel_time != e->time);
416
417 if (e == &ne) {
418 finished = true;
419 } else {
420 if (e->onstack)
421 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
422 time_travel_time, e->time, e);
423 time_travel_deliver_event(e);
424 }
425 }
426
427 e = time_travel_first_event();
428 if (e)
429 time_travel_ext_update_request(e->time);
430 } while (ne.pending && !finished);
431
432 time_travel_del_event(&ne);
433}
434
435void time_travel_ndelay(unsigned long nsec)
436{
437 time_travel_update_time(time_travel_time + nsec, false);
438}
439EXPORT_SYMBOL(time_travel_ndelay);
440
441void time_travel_add_irq_event(struct time_travel_event *e)
442{
443 BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
444
445 time_travel_ext_get_time();
446 /*
447 * We could model interrupt latency here, for now just
448 * don't have any latency at all and request the exact
449 * same time (again) to run the interrupt...
450 */
451 time_travel_add_event(e, time_travel_time);
452}
453EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
454
455static void time_travel_oneshot_timer(struct time_travel_event *e)
456{
457 deliver_alarm();
458}
459
460void time_travel_sleep(void)
461{
462 /*
463 * Wait "forever" (using S64_MAX because there are some potential
464 * wrapping issues, especially with the current TT_MODE_EXTERNAL
465 * controller application.
466 */
467 unsigned long long next = S64_MAX;
468
469 if (time_travel_mode == TT_MODE_BASIC)
470 os_timer_disable();
471
472 time_travel_update_time(next, true);
473
474 if (time_travel_mode == TT_MODE_BASIC &&
475 time_travel_timer_event.pending) {
476 if (time_travel_timer_event.fn == time_travel_periodic_timer) {
477 /*
478 * This is somewhat wrong - we should get the first
479 * one sooner like the os_timer_one_shot() below...
480 */
481 os_timer_set_interval(time_travel_timer_interval);
482 } else {
483 os_timer_one_shot(time_travel_timer_event.time - next);
484 }
485 }
486}
487
488static void time_travel_handle_real_alarm(void)
489{
490 time_travel_set_time(time_travel_next_event);
491
492 time_travel_del_event(&time_travel_timer_event);
493
494 if (time_travel_timer_event.fn == time_travel_periodic_timer)
495 time_travel_add_event(&time_travel_timer_event,
496 time_travel_time +
497 time_travel_timer_interval);
498}
499
500static void time_travel_set_interval(unsigned long long interval)
501{
502 time_travel_timer_interval = interval;
503}
504
505static int time_travel_connect_external(const char *socket)
506{
507 const char *sep;
508 unsigned long long id = (unsigned long long)-1;
509 int rc;
510
511 if ((sep = strchr(socket, ':'))) {
512 char buf[25] = {};
513 if (sep - socket > sizeof(buf) - 1)
514 goto invalid_number;
515
516 memcpy(buf, socket, sep - socket);
517 if (kstrtoull(buf, 0, &id)) {
518invalid_number:
519 panic("time-travel: invalid external ID in string '%s'\n",
520 socket);
521 return -EINVAL;
522 }
523
524 socket = sep + 1;
525 }
526
527 rc = os_connect_socket(socket);
528 if (rc < 0) {
529 panic("time-travel: failed to connect to external socket %s\n",
530 socket);
531 return rc;
532 }
533
534 time_travel_ext_fd = rc;
535
536 time_travel_ext_req(UM_TIMETRAVEL_START, id);
537
538 return 1;
539}
540
541static void time_travel_set_start(void)
542{
543 if (time_travel_start_set)
544 return;
545
546 switch (time_travel_mode) {
547 case TT_MODE_EXTERNAL:
548 time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
549 /* controller gave us the *current* time, so adjust by that */
550 time_travel_ext_get_time();
551 time_travel_start -= time_travel_time;
552 break;
553 case TT_MODE_INFCPU:
554 case TT_MODE_BASIC:
555 if (!time_travel_start_set)
556 time_travel_start = os_persistent_clock_emulation();
557 break;
558 case TT_MODE_OFF:
559 /* we just read the host clock with os_persistent_clock_emulation() */
560 break;
561 }
562
563 time_travel_start_set = true;
564}
565#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
566#define time_travel_start_set 0
567#define time_travel_start 0
568#define time_travel_time 0
569#define time_travel_ext_waiting 0
570
571static inline void time_travel_update_time(unsigned long long ns, bool retearly)
572{
573}
574
575static inline void time_travel_handle_real_alarm(void)
576{
577}
578
579static void time_travel_set_interval(unsigned long long interval)
580{
581}
582
583static inline void time_travel_set_start(void)
584{
585}
586
587/* fail link if this actually gets used */
588extern u64 time_travel_ext_req(u32 op, u64 time);
589
590/* these are empty macros so the struct/fn need not exist */
591#define time_travel_add_event(e, time) do { } while (0)
592/* externally not usable - redefine here so we can */
593#undef time_travel_del_event
594#define time_travel_del_event(e) do { } while (0)
595#endif
596
597void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
598{
599 unsigned long flags;
600
601 /*
602 * In basic time-travel mode we still get real interrupts
603 * (signals) but since we don't read time from the OS, we
604 * must update the simulated time here to the expiry when
605 * we get a signal.
606 * This is not the case in inf-cpu mode, since there we
607 * never get any real signals from the OS.
608 */
609 if (time_travel_mode == TT_MODE_BASIC)
610 time_travel_handle_real_alarm();
611
612 local_irq_save(flags);
613 do_IRQ(TIMER_IRQ, regs);
614 local_irq_restore(flags);
615}
616
617static int itimer_shutdown(struct clock_event_device *evt)
618{
619 if (time_travel_mode != TT_MODE_OFF)
620 time_travel_del_event(&time_travel_timer_event);
621
622 if (time_travel_mode != TT_MODE_INFCPU &&
623 time_travel_mode != TT_MODE_EXTERNAL)
624 os_timer_disable();
625
626 return 0;
627}
628
629static int itimer_set_periodic(struct clock_event_device *evt)
630{
631 unsigned long long interval = NSEC_PER_SEC / HZ;
632
633 if (time_travel_mode != TT_MODE_OFF) {
634 time_travel_del_event(&time_travel_timer_event);
635 time_travel_set_event_fn(&time_travel_timer_event,
636 time_travel_periodic_timer);
637 time_travel_set_interval(interval);
638 time_travel_add_event(&time_travel_timer_event,
639 time_travel_time + interval);
640 }
641
642 if (time_travel_mode != TT_MODE_INFCPU &&
643 time_travel_mode != TT_MODE_EXTERNAL)
644 os_timer_set_interval(interval);
645
646 return 0;
647}
648
649static int itimer_next_event(unsigned long delta,
650 struct clock_event_device *evt)
651{
652 delta += 1;
653
654 if (time_travel_mode != TT_MODE_OFF) {
655 time_travel_del_event(&time_travel_timer_event);
656 time_travel_set_event_fn(&time_travel_timer_event,
657 time_travel_oneshot_timer);
658 time_travel_add_event(&time_travel_timer_event,
659 time_travel_time + delta);
660 }
661
662 if (time_travel_mode != TT_MODE_INFCPU &&
663 time_travel_mode != TT_MODE_EXTERNAL)
664 return os_timer_one_shot(delta);
665
666 return 0;
667}
668
669static int itimer_one_shot(struct clock_event_device *evt)
670{
671 return itimer_next_event(0, evt);
672}
673
674static struct clock_event_device timer_clockevent = {
675 .name = "posix-timer",
676 .rating = 250,
677 .cpumask = cpu_possible_mask,
678 .features = CLOCK_EVT_FEAT_PERIODIC |
679 CLOCK_EVT_FEAT_ONESHOT,
680 .set_state_shutdown = itimer_shutdown,
681 .set_state_periodic = itimer_set_periodic,
682 .set_state_oneshot = itimer_one_shot,
683 .set_next_event = itimer_next_event,
684 .shift = 0,
685 .max_delta_ns = 0xffffffff,
686 .max_delta_ticks = 0xffffffff,
687 .min_delta_ns = TIMER_MIN_DELTA,
688 .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
689 .irq = 0,
690 .mult = 1,
691};
692
693static irqreturn_t um_timer(int irq, void *dev)
694{
695 if (get_current()->mm != NULL)
696 {
697 /* userspace - relay signal, results in correct userspace timers */
698 os_alarm_process(get_current()->mm->context.id.u.pid);
699 }
700
701 (*timer_clockevent.event_handler)(&timer_clockevent);
702
703 return IRQ_HANDLED;
704}
705
706static u64 timer_read(struct clocksource *cs)
707{
708 if (time_travel_mode != TT_MODE_OFF) {
709 /*
710 * We make reading the timer cost a bit so that we don't get
711 * stuck in loops that expect time to move more than the
712 * exact requested sleep amount, e.g. python's socket server,
713 * see https://bugs.python.org/issue37026.
714 *
715 * However, don't do that when we're in interrupt or such as
716 * then we might recurse into our own processing, and get to
717 * even more waiting, and that's not good - it messes up the
718 * "what do I do next" and onstack event we use to know when
719 * to return from time_travel_update_time().
720 */
721 if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
722 !time_travel_ext_waiting)
723 time_travel_update_time(time_travel_time +
724 TIMER_MULTIPLIER,
725 false);
726 return time_travel_time / TIMER_MULTIPLIER;
727 }
728
729 return os_nsecs() / TIMER_MULTIPLIER;
730}
731
732static struct clocksource timer_clocksource = {
733 .name = "timer",
734 .rating = 300,
735 .read = timer_read,
736 .mask = CLOCKSOURCE_MASK(64),
737 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
738};
739
740static void __init um_timer_setup(void)
741{
742 int err;
743
744 err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
745 if (err != 0)
746 printk(KERN_ERR "register_timer : request_irq failed - "
747 "errno = %d\n", -err);
748
749 err = os_timer_create();
750 if (err != 0) {
751 printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
752 return;
753 }
754
755 err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
756 if (err) {
757 printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
758 return;
759 }
760 clockevents_register_device(&timer_clockevent);
761}
762
763void read_persistent_clock64(struct timespec64 *ts)
764{
765 long long nsecs;
766
767 time_travel_set_start();
768
769 if (time_travel_mode != TT_MODE_OFF)
770 nsecs = time_travel_start + time_travel_time;
771 else
772 nsecs = os_persistent_clock_emulation();
773
774 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
775 nsecs % NSEC_PER_SEC);
776}
777
778void __init time_init(void)
779{
780 timer_set_signal_handler();
781 late_time_init = um_timer_setup;
782}
783
784#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
785unsigned long calibrate_delay_is_known(void)
786{
787 if (time_travel_mode == TT_MODE_INFCPU ||
788 time_travel_mode == TT_MODE_EXTERNAL)
789 return 1;
790 return 0;
791}
792
793int setup_time_travel(char *str)
794{
795 if (strcmp(str, "=inf-cpu") == 0) {
796 time_travel_mode = TT_MODE_INFCPU;
797 timer_clockevent.name = "time-travel-timer-infcpu";
798 timer_clocksource.name = "time-travel-clock";
799 return 1;
800 }
801
802 if (strncmp(str, "=ext:", 5) == 0) {
803 time_travel_mode = TT_MODE_EXTERNAL;
804 timer_clockevent.name = "time-travel-timer-external";
805 timer_clocksource.name = "time-travel-clock-external";
806 return time_travel_connect_external(str + 5);
807 }
808
809 if (!*str) {
810 time_travel_mode = TT_MODE_BASIC;
811 timer_clockevent.name = "time-travel-timer";
812 timer_clocksource.name = "time-travel-clock";
813 return 1;
814 }
815
816 return -EINVAL;
817}
818
819__setup("time-travel", setup_time_travel);
820__uml_help(setup_time_travel,
821"time-travel\n"
822"This option just enables basic time travel mode, in which the clock/timers\n"
823"inside the UML instance skip forward when there's nothing to do, rather than\n"
824"waiting for real time to elapse. However, instance CPU speed is limited by\n"
825"the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
826"clock (but quicker when there's nothing to do).\n"
827"\n"
828"time-travel=inf-cpu\n"
829"This enables time travel mode with infinite processing power, in which there\n"
830"are no wall clock timers, and any CPU processing happens - as seen from the\n"
831"guest - instantly. This can be useful for accurate simulation regardless of\n"
832"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
833"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
834"\n"
835"time-travel=ext:[ID:]/path/to/socket\n"
836"This enables time travel mode similar to =inf-cpu, except the system will\n"
837"use the given socket to coordinate with a central scheduler, in order to\n"
838"have more than one system simultaneously be on simulated time. The virtio\n"
839"driver code in UML knows about this so you can also simulate networks and\n"
840"devices using it, assuming the device has the right capabilities.\n"
841"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
842
843int setup_time_travel_start(char *str)
844{
845 int err;
846
847 err = kstrtoull(str, 0, &time_travel_start);
848 if (err)
849 return err;
850
851 time_travel_start_set = 1;
852 return 1;
853}
854
855__setup("time-travel-start", setup_time_travel_start);
856__uml_help(setup_time_travel_start,
857"time-travel-start=<seconds>\n"
858"Configure the UML instance's wall clock to start at this value rather than\n"
859"the host's wall clock at the time of UML boot.\n");
860#endif
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
5 * Copyright (C) 2012-2014 Cisco Systems
6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7 * Copyright (C) 2019 Intel Corporation
8 */
9
10#include <linux/clockchips.h>
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/jiffies.h>
14#include <linux/mm.h>
15#include <linux/sched.h>
16#include <linux/spinlock.h>
17#include <linux/threads.h>
18#include <asm/irq.h>
19#include <asm/param.h>
20#include <kern_util.h>
21#include <os.h>
22#include <linux/delay.h>
23#include <linux/time-internal.h>
24#include <linux/um_timetravel.h>
25#include <shared/init.h>
26
27#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
28#include <linux/sched/clock.h>
29
30enum time_travel_mode time_travel_mode;
31EXPORT_SYMBOL_GPL(time_travel_mode);
32
33static bool time_travel_start_set;
34static unsigned long long time_travel_start;
35static unsigned long long time_travel_time;
36static unsigned long long time_travel_shm_offset;
37static LIST_HEAD(time_travel_events);
38static LIST_HEAD(time_travel_irqs);
39static unsigned long long time_travel_timer_interval;
40static unsigned long long time_travel_next_event;
41static struct time_travel_event time_travel_timer_event;
42static int time_travel_ext_fd = -1;
43static unsigned int time_travel_ext_waiting;
44static bool time_travel_ext_prev_request_valid;
45static unsigned long long time_travel_ext_prev_request;
46static unsigned long long *time_travel_ext_free_until;
47static unsigned long long _time_travel_ext_free_until;
48static u16 time_travel_shm_id;
49static struct um_timetravel_schedshm *time_travel_shm;
50static union um_timetravel_schedshm_client *time_travel_shm_client;
51
52unsigned long tt_extra_sched_jiffies;
53
54notrace unsigned long long sched_clock(void)
55{
56 return (unsigned long long)(jiffies - INITIAL_JIFFIES +
57 tt_extra_sched_jiffies)
58 * (NSEC_PER_SEC / HZ);
59}
60
61static void time_travel_set_time(unsigned long long ns)
62{
63 if (unlikely(ns < time_travel_time))
64 panic("time-travel: time goes backwards %lld -> %lld\n",
65 time_travel_time, ns);
66 else if (unlikely(ns >= S64_MAX))
67 panic("The system was going to sleep forever, aborting");
68
69 time_travel_time = ns;
70}
71
72enum time_travel_message_handling {
73 TTMH_IDLE,
74 TTMH_POLL,
75 TTMH_READ,
76 TTMH_READ_START_ACK,
77};
78
79static u64 bc_message;
80int time_travel_should_print_bc_msg;
81
82void _time_travel_print_bc_msg(void)
83{
84 time_travel_should_print_bc_msg = 0;
85 printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
86}
87
88static void time_travel_setup_shm(int fd, u16 id)
89{
90 u32 len;
91
92 time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
93
94 if (!time_travel_shm)
95 goto out;
96
97 len = time_travel_shm->len;
98
99 if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
100 len < struct_size(time_travel_shm, clients, id + 1)) {
101 os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
102 time_travel_shm = NULL;
103 goto out;
104 }
105
106 time_travel_shm = os_mremap_rw_shared(time_travel_shm,
107 sizeof(*time_travel_shm),
108 len);
109 if (!time_travel_shm)
110 goto out;
111
112 time_travel_shm_offset = time_travel_shm->current_time;
113 time_travel_shm_client = &time_travel_shm->clients[id];
114 time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
115 time_travel_shm_id = id;
116 /* always look at that free_until from now on */
117 time_travel_ext_free_until = &time_travel_shm->free_until;
118out:
119 os_close_file(fd);
120}
121
122static void time_travel_handle_message(struct um_timetravel_msg *msg,
123 enum time_travel_message_handling mode)
124{
125 struct um_timetravel_msg resp = {
126 .op = UM_TIMETRAVEL_ACK,
127 };
128 int ret;
129
130 /*
131 * We can't unlock here, but interrupt signals with a timetravel_handler
132 * (see um_request_irq_tt) get to the timetravel_handler anyway.
133 */
134 if (mode != TTMH_READ) {
135 BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
136
137 while (os_poll(1, &time_travel_ext_fd) != 0) {
138 /* nothing */
139 }
140 }
141
142 if (unlikely(mode == TTMH_READ_START_ACK)) {
143 int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
144
145 ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
146 ARRAY_SIZE(fd), msg, sizeof(*msg));
147 if (ret == sizeof(*msg)) {
148 time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
149 msg->time & UM_TIMETRAVEL_START_ACK_ID);
150 /* we don't use the logging for now */
151 os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
152 }
153 } else {
154 ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
155 }
156
157 if (ret == 0)
158 panic("time-travel external link is broken\n");
159 if (ret != sizeof(*msg))
160 panic("invalid time-travel message - %d bytes\n", ret);
161
162 switch (msg->op) {
163 default:
164 WARN_ONCE(1, "time-travel: unexpected message %lld\n",
165 (unsigned long long)msg->op);
166 break;
167 case UM_TIMETRAVEL_ACK:
168 return;
169 case UM_TIMETRAVEL_RUN:
170 time_travel_set_time(msg->time);
171 if (time_travel_shm) {
172 /* no request right now since we're running */
173 time_travel_shm_client->flags &=
174 ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
175 /* no ack for shared memory RUN */
176 return;
177 }
178 break;
179 case UM_TIMETRAVEL_FREE_UNTIL:
180 /* not supposed to get this with shm, but ignore it */
181 if (time_travel_shm)
182 break;
183 time_travel_ext_free_until = &_time_travel_ext_free_until;
184 _time_travel_ext_free_until = msg->time;
185 break;
186 case UM_TIMETRAVEL_BROADCAST:
187 bc_message = msg->time;
188 time_travel_should_print_bc_msg = 1;
189 break;
190 }
191
192 resp.seq = msg->seq;
193 os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
194}
195
196static u64 time_travel_ext_req(u32 op, u64 time)
197{
198 static int seq;
199 int mseq = ++seq;
200 struct um_timetravel_msg msg = {
201 .op = op,
202 .time = time,
203 .seq = mseq,
204 };
205
206 /*
207 * We need to block even the timetravel handlers of SIGIO here and
208 * only restore their use when we got the ACK - otherwise we may
209 * (will) get interrupted by that, try to queue the IRQ for future
210 * processing and thus send another request while we're still waiting
211 * for an ACK, but the peer doesn't know we got interrupted and will
212 * send the ACKs in the same order as the message, but we'd need to
213 * see them in the opposite order ...
214 *
215 * This wouldn't matter *too* much, but some ACKs carry the
216 * current time (for UM_TIMETRAVEL_GET) and getting another
217 * ACK without a time would confuse us a lot!
218 *
219 * The sequence number assignment that happens here lets us
220 * debug such message handling issues more easily.
221 */
222 block_signals_hard();
223 os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
224
225 /* no ACK expected for WAIT in shared memory mode */
226 if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
227 goto done;
228
229 while (msg.op != UM_TIMETRAVEL_ACK)
230 time_travel_handle_message(&msg,
231 op == UM_TIMETRAVEL_START ?
232 TTMH_READ_START_ACK :
233 TTMH_READ);
234
235 if (msg.seq != mseq)
236 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
237 msg.op, msg.seq, mseq, msg.time);
238
239 if (op == UM_TIMETRAVEL_GET)
240 time_travel_set_time(msg.time);
241done:
242 unblock_signals_hard();
243
244 return msg.time;
245}
246
247void __time_travel_wait_readable(int fd)
248{
249 int fds[2] = { fd, time_travel_ext_fd };
250 int ret;
251
252 if (time_travel_mode != TT_MODE_EXTERNAL)
253 return;
254
255 while ((ret = os_poll(2, fds))) {
256 struct um_timetravel_msg msg;
257
258 if (ret == 1)
259 time_travel_handle_message(&msg, TTMH_READ);
260 }
261}
262EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
263
264static void time_travel_ext_update_request(unsigned long long time)
265{
266 if (time_travel_mode != TT_MODE_EXTERNAL)
267 return;
268
269 /* asked for exactly this time previously */
270 if (time_travel_ext_prev_request_valid &&
271 time == time_travel_ext_prev_request)
272 return;
273
274 /*
275 * if we're running and are allowed to run past the request
276 * then we don't need to update it either
277 *
278 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
279 * to shared memory, and for non-shm the offset is 0.
280 */
281 if (!time_travel_ext_waiting && time_travel_ext_free_until &&
282 time < (*time_travel_ext_free_until - time_travel_shm_offset))
283 return;
284
285 time_travel_ext_prev_request = time;
286 time_travel_ext_prev_request_valid = true;
287
288 if (time_travel_shm) {
289 union um_timetravel_schedshm_client *running;
290
291 running = &time_travel_shm->clients[time_travel_shm->running_id];
292
293 if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
294 time_travel_shm_client->flags |=
295 UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
296 time += time_travel_shm_offset;
297 time_travel_shm_client->req_time = time;
298 if (time < time_travel_shm->free_until)
299 time_travel_shm->free_until = time;
300 return;
301 }
302 }
303
304 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
305}
306
307void __time_travel_propagate_time(void)
308{
309 static unsigned long long last_propagated;
310
311 if (time_travel_shm) {
312 if (time_travel_shm->running_id != time_travel_shm_id)
313 panic("time-travel: setting time while not running\n");
314 time_travel_shm->current_time = time_travel_time +
315 time_travel_shm_offset;
316 return;
317 }
318
319 if (last_propagated == time_travel_time)
320 return;
321
322 time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
323 last_propagated = time_travel_time;
324}
325EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
326
327/* returns true if we must do a wait to the simtime device */
328static bool time_travel_ext_request(unsigned long long time)
329{
330 /*
331 * If we received an external sync point ("free until") then we
332 * don't have to request/wait for anything until then, unless
333 * we're already waiting.
334 *
335 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
336 * to shared memory, and for non-shm the offset is 0.
337 */
338 if (!time_travel_ext_waiting && time_travel_ext_free_until &&
339 time < (*time_travel_ext_free_until - time_travel_shm_offset))
340 return false;
341
342 time_travel_ext_update_request(time);
343 return true;
344}
345
346static void time_travel_ext_wait(bool idle)
347{
348 struct um_timetravel_msg msg = {
349 .op = UM_TIMETRAVEL_ACK,
350 };
351
352 time_travel_ext_prev_request_valid = false;
353 if (!time_travel_shm)
354 time_travel_ext_free_until = NULL;
355 time_travel_ext_waiting++;
356
357 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
358
359 /*
360 * Here we are deep in the idle loop, so we have to break out of the
361 * kernel abstraction in a sense and implement this in terms of the
362 * UML system waiting on the VQ interrupt while sleeping, when we get
363 * the signal it'll call time_travel_ext_vq_notify_done() completing the
364 * call.
365 */
366 while (msg.op != UM_TIMETRAVEL_RUN)
367 time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
368
369 time_travel_ext_waiting--;
370
371 /* we might request more stuff while polling - reset when we run */
372 time_travel_ext_prev_request_valid = false;
373}
374
375static void time_travel_ext_get_time(void)
376{
377 if (time_travel_shm)
378 time_travel_set_time(time_travel_shm->current_time -
379 time_travel_shm_offset);
380 else
381 time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
382}
383
384static void __time_travel_update_time(unsigned long long ns, bool idle)
385{
386 if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
387 time_travel_ext_wait(idle);
388 else
389 time_travel_set_time(ns);
390}
391
392static struct time_travel_event *time_travel_first_event(void)
393{
394 return list_first_entry_or_null(&time_travel_events,
395 struct time_travel_event,
396 list);
397}
398
399static void __time_travel_add_event(struct time_travel_event *e,
400 unsigned long long time)
401{
402 struct time_travel_event *tmp;
403 bool inserted = false;
404 unsigned long flags;
405
406 if (e->pending)
407 return;
408
409 e->pending = true;
410 e->time = time;
411
412 local_irq_save(flags);
413 list_for_each_entry(tmp, &time_travel_events, list) {
414 /*
415 * Add the new entry before one with higher time,
416 * or if they're equal and both on stack, because
417 * in that case we need to unwind the stack in the
418 * right order, and the later event (timer sleep
419 * or such) must be dequeued first.
420 */
421 if ((tmp->time > e->time) ||
422 (tmp->time == e->time && tmp->onstack && e->onstack)) {
423 list_add_tail(&e->list, &tmp->list);
424 inserted = true;
425 break;
426 }
427 }
428
429 if (!inserted)
430 list_add_tail(&e->list, &time_travel_events);
431
432 tmp = time_travel_first_event();
433 time_travel_ext_update_request(tmp->time);
434 time_travel_next_event = tmp->time;
435 local_irq_restore(flags);
436}
437
438static void time_travel_add_event(struct time_travel_event *e,
439 unsigned long long time)
440{
441 if (WARN_ON(!e->fn))
442 return;
443
444 __time_travel_add_event(e, time);
445}
446
447void time_travel_add_event_rel(struct time_travel_event *e,
448 unsigned long long delay_ns)
449{
450 time_travel_add_event(e, time_travel_time + delay_ns);
451}
452
453static void time_travel_periodic_timer(struct time_travel_event *e)
454{
455 time_travel_add_event(&time_travel_timer_event,
456 time_travel_time + time_travel_timer_interval);
457
458 /* clock tick; decrease extra jiffies by keeping sched_clock constant */
459 if (tt_extra_sched_jiffies > 0)
460 tt_extra_sched_jiffies -= 1;
461
462 deliver_alarm();
463}
464
465void deliver_time_travel_irqs(void)
466{
467 struct time_travel_event *e;
468 unsigned long flags;
469
470 /*
471 * Don't do anything for most cases. Note that because here we have
472 * to disable IRQs (and re-enable later) we'll actually recurse at
473 * the end of the function, so this is strictly necessary.
474 */
475 if (likely(list_empty(&time_travel_irqs)))
476 return;
477
478 local_irq_save(flags);
479 irq_enter();
480 while ((e = list_first_entry_or_null(&time_travel_irqs,
481 struct time_travel_event,
482 list))) {
483 list_del(&e->list);
484 e->pending = false;
485 e->fn(e);
486 }
487 irq_exit();
488 local_irq_restore(flags);
489}
490
491static void time_travel_deliver_event(struct time_travel_event *e)
492{
493 if (e == &time_travel_timer_event) {
494 /*
495 * deliver_alarm() does the irq_enter/irq_exit
496 * by itself, so must handle it specially here
497 */
498 e->fn(e);
499 } else if (irqs_disabled()) {
500 list_add_tail(&e->list, &time_travel_irqs);
501 /*
502 * set pending again, it was set to false when the
503 * event was deleted from the original list, but
504 * now it's still pending until we deliver the IRQ.
505 */
506 e->pending = true;
507 } else {
508 unsigned long flags;
509
510 local_irq_save(flags);
511 irq_enter();
512 e->fn(e);
513 irq_exit();
514 local_irq_restore(flags);
515 }
516}
517
518bool time_travel_del_event(struct time_travel_event *e)
519{
520 unsigned long flags;
521
522 if (!e->pending)
523 return false;
524 local_irq_save(flags);
525 list_del(&e->list);
526 e->pending = false;
527 local_irq_restore(flags);
528 return true;
529}
530
531static void time_travel_update_time(unsigned long long next, bool idle)
532{
533 struct time_travel_event ne = {
534 .onstack = true,
535 };
536 struct time_travel_event *e;
537 bool finished = idle;
538
539 /* add it without a handler - we deal with that specifically below */
540 __time_travel_add_event(&ne, next);
541
542 do {
543 e = time_travel_first_event();
544
545 BUG_ON(!e);
546 __time_travel_update_time(e->time, idle);
547
548 /* new events may have been inserted while we were waiting */
549 if (e == time_travel_first_event()) {
550 BUG_ON(!time_travel_del_event(e));
551 BUG_ON(time_travel_time != e->time);
552
553 if (e == &ne) {
554 finished = true;
555 } else {
556 if (e->onstack)
557 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
558 time_travel_time, e->time, e);
559 time_travel_deliver_event(e);
560 }
561 }
562
563 e = time_travel_first_event();
564 if (e)
565 time_travel_ext_update_request(e->time);
566 } while (ne.pending && !finished);
567
568 time_travel_del_event(&ne);
569}
570
571static void time_travel_update_time_rel(unsigned long long offs)
572{
573 unsigned long flags;
574
575 /*
576 * Disable interrupts before calculating the new time so
577 * that a real timer interrupt (signal) can't happen at
578 * a bad time e.g. after we read time_travel_time but
579 * before we've completed updating the time.
580 */
581 local_irq_save(flags);
582 time_travel_update_time(time_travel_time + offs, false);
583 local_irq_restore(flags);
584}
585
586void time_travel_ndelay(unsigned long nsec)
587{
588 /*
589 * Not strictly needed to use _rel() version since this is
590 * only used in INFCPU/EXT modes, but it doesn't hurt and
591 * is more readable too.
592 */
593 time_travel_update_time_rel(nsec);
594}
595EXPORT_SYMBOL(time_travel_ndelay);
596
597void time_travel_add_irq_event(struct time_travel_event *e)
598{
599 BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
600
601 time_travel_ext_get_time();
602 /*
603 * We could model interrupt latency here, for now just
604 * don't have any latency at all and request the exact
605 * same time (again) to run the interrupt...
606 */
607 time_travel_add_event(e, time_travel_time);
608}
609EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
610
611static void time_travel_oneshot_timer(struct time_travel_event *e)
612{
613 /* clock tick; decrease extra jiffies by keeping sched_clock constant */
614 if (tt_extra_sched_jiffies > 0)
615 tt_extra_sched_jiffies -= 1;
616
617 deliver_alarm();
618}
619
620void time_travel_sleep(void)
621{
622 /*
623 * Wait "forever" (using S64_MAX because there are some potential
624 * wrapping issues, especially with the current TT_MODE_EXTERNAL
625 * controller application.
626 */
627 unsigned long long next = S64_MAX;
628
629 if (time_travel_mode == TT_MODE_BASIC)
630 os_timer_disable();
631
632 time_travel_update_time(next, true);
633
634 if (time_travel_mode == TT_MODE_BASIC &&
635 time_travel_timer_event.pending) {
636 if (time_travel_timer_event.fn == time_travel_periodic_timer) {
637 /*
638 * This is somewhat wrong - we should get the first
639 * one sooner like the os_timer_one_shot() below...
640 */
641 os_timer_set_interval(time_travel_timer_interval);
642 } else {
643 os_timer_one_shot(time_travel_timer_event.time - next);
644 }
645 }
646}
647
648static void time_travel_handle_real_alarm(void)
649{
650 time_travel_set_time(time_travel_next_event);
651
652 time_travel_del_event(&time_travel_timer_event);
653
654 if (time_travel_timer_event.fn == time_travel_periodic_timer)
655 time_travel_add_event(&time_travel_timer_event,
656 time_travel_time +
657 time_travel_timer_interval);
658}
659
660static void time_travel_set_interval(unsigned long long interval)
661{
662 time_travel_timer_interval = interval;
663}
664
665static int time_travel_connect_external(const char *socket)
666{
667 const char *sep;
668 unsigned long long id = (unsigned long long)-1;
669 int rc;
670
671 if ((sep = strchr(socket, ':'))) {
672 char buf[25] = {};
673 if (sep - socket > sizeof(buf) - 1)
674 goto invalid_number;
675
676 memcpy(buf, socket, sep - socket);
677 if (kstrtoull(buf, 0, &id)) {
678invalid_number:
679 panic("time-travel: invalid external ID in string '%s'\n",
680 socket);
681 return -EINVAL;
682 }
683
684 socket = sep + 1;
685 }
686
687 rc = os_connect_socket(socket);
688 if (rc < 0) {
689 panic("time-travel: failed to connect to external socket %s\n",
690 socket);
691 return rc;
692 }
693
694 time_travel_ext_fd = rc;
695
696 time_travel_ext_req(UM_TIMETRAVEL_START, id);
697
698 return 1;
699}
700
701static void time_travel_set_start(void)
702{
703 if (time_travel_start_set)
704 return;
705
706 switch (time_travel_mode) {
707 case TT_MODE_EXTERNAL:
708 time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
709 /* controller gave us the *current* time, so adjust by that */
710 time_travel_ext_get_time();
711 time_travel_start -= time_travel_time;
712 break;
713 case TT_MODE_INFCPU:
714 case TT_MODE_BASIC:
715 if (!time_travel_start_set)
716 time_travel_start = os_persistent_clock_emulation();
717 break;
718 case TT_MODE_OFF:
719 /* we just read the host clock with os_persistent_clock_emulation() */
720 break;
721 }
722
723 time_travel_start_set = true;
724}
725#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
726#define time_travel_start_set 0
727#define time_travel_start 0
728#define time_travel_time 0
729#define time_travel_ext_waiting 0
730
731static inline void time_travel_update_time(unsigned long long ns, bool idle)
732{
733}
734
735static inline void time_travel_update_time_rel(unsigned long long offs)
736{
737}
738
739static inline void time_travel_handle_real_alarm(void)
740{
741}
742
743static void time_travel_set_interval(unsigned long long interval)
744{
745}
746
747static inline void time_travel_set_start(void)
748{
749}
750
751/* fail link if this actually gets used */
752extern u64 time_travel_ext_req(u32 op, u64 time);
753
754/* these are empty macros so the struct/fn need not exist */
755#define time_travel_add_event(e, time) do { } while (0)
756/* externally not usable - redefine here so we can */
757#undef time_travel_del_event
758#define time_travel_del_event(e) do { } while (0)
759#endif
760
761void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
762{
763 unsigned long flags;
764
765 /*
766 * In basic time-travel mode we still get real interrupts
767 * (signals) but since we don't read time from the OS, we
768 * must update the simulated time here to the expiry when
769 * we get a signal.
770 * This is not the case in inf-cpu mode, since there we
771 * never get any real signals from the OS.
772 */
773 if (time_travel_mode == TT_MODE_BASIC)
774 time_travel_handle_real_alarm();
775
776 local_irq_save(flags);
777 do_IRQ(TIMER_IRQ, regs);
778 local_irq_restore(flags);
779}
780
781static int itimer_shutdown(struct clock_event_device *evt)
782{
783 if (time_travel_mode != TT_MODE_OFF)
784 time_travel_del_event(&time_travel_timer_event);
785
786 if (time_travel_mode != TT_MODE_INFCPU &&
787 time_travel_mode != TT_MODE_EXTERNAL)
788 os_timer_disable();
789
790 return 0;
791}
792
793static int itimer_set_periodic(struct clock_event_device *evt)
794{
795 unsigned long long interval = NSEC_PER_SEC / HZ;
796
797 if (time_travel_mode != TT_MODE_OFF) {
798 time_travel_del_event(&time_travel_timer_event);
799 time_travel_set_event_fn(&time_travel_timer_event,
800 time_travel_periodic_timer);
801 time_travel_set_interval(interval);
802 time_travel_add_event(&time_travel_timer_event,
803 time_travel_time + interval);
804 }
805
806 if (time_travel_mode != TT_MODE_INFCPU &&
807 time_travel_mode != TT_MODE_EXTERNAL)
808 os_timer_set_interval(interval);
809
810 return 0;
811}
812
813static int itimer_next_event(unsigned long delta,
814 struct clock_event_device *evt)
815{
816 delta += 1;
817
818 if (time_travel_mode != TT_MODE_OFF) {
819 time_travel_del_event(&time_travel_timer_event);
820 time_travel_set_event_fn(&time_travel_timer_event,
821 time_travel_oneshot_timer);
822 time_travel_add_event(&time_travel_timer_event,
823 time_travel_time + delta);
824 }
825
826 if (time_travel_mode != TT_MODE_INFCPU &&
827 time_travel_mode != TT_MODE_EXTERNAL)
828 return os_timer_one_shot(delta);
829
830 return 0;
831}
832
833static int itimer_one_shot(struct clock_event_device *evt)
834{
835 return itimer_next_event(0, evt);
836}
837
838static struct clock_event_device timer_clockevent = {
839 .name = "posix-timer",
840 .rating = 250,
841 .cpumask = cpu_possible_mask,
842 .features = CLOCK_EVT_FEAT_PERIODIC |
843 CLOCK_EVT_FEAT_ONESHOT,
844 .set_state_shutdown = itimer_shutdown,
845 .set_state_periodic = itimer_set_periodic,
846 .set_state_oneshot = itimer_one_shot,
847 .set_next_event = itimer_next_event,
848 .shift = 0,
849 .max_delta_ns = 0xffffffff,
850 .max_delta_ticks = 0xffffffff,
851 .min_delta_ns = TIMER_MIN_DELTA,
852 .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
853 .irq = 0,
854 .mult = 1,
855};
856
857static irqreturn_t um_timer(int irq, void *dev)
858{
859 if (get_current()->mm != NULL)
860 {
861 /* userspace - relay signal, results in correct userspace timers */
862 os_alarm_process(get_current()->mm->context.id.pid);
863 }
864
865 (*timer_clockevent.event_handler)(&timer_clockevent);
866
867 return IRQ_HANDLED;
868}
869
870static u64 timer_read(struct clocksource *cs)
871{
872 if (time_travel_mode != TT_MODE_OFF) {
873 /*
874 * We make reading the timer cost a bit so that we don't get
875 * stuck in loops that expect time to move more than the
876 * exact requested sleep amount, e.g. python's socket server,
877 * see https://bugs.python.org/issue37026.
878 *
879 * However, don't do that when we're in interrupt or such as
880 * then we might recurse into our own processing, and get to
881 * even more waiting, and that's not good - it messes up the
882 * "what do I do next" and onstack event we use to know when
883 * to return from time_travel_update_time().
884 */
885 if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
886 !time_travel_ext_waiting)
887 time_travel_update_time_rel(TIMER_MULTIPLIER);
888 return time_travel_time / TIMER_MULTIPLIER;
889 }
890
891 return os_nsecs() / TIMER_MULTIPLIER;
892}
893
894static struct clocksource timer_clocksource = {
895 .name = "timer",
896 .rating = 300,
897 .read = timer_read,
898 .mask = CLOCKSOURCE_MASK(64),
899 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
900};
901
902static void __init um_timer_setup(void)
903{
904 int err;
905
906 err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
907 if (err != 0)
908 printk(KERN_ERR "register_timer : request_irq failed - "
909 "errno = %d\n", -err);
910
911 err = os_timer_create();
912 if (err != 0) {
913 printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
914 return;
915 }
916
917 err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
918 if (err) {
919 printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
920 return;
921 }
922 clockevents_register_device(&timer_clockevent);
923}
924
925void read_persistent_clock64(struct timespec64 *ts)
926{
927 long long nsecs;
928
929 time_travel_set_start();
930
931 if (time_travel_mode != TT_MODE_OFF)
932 nsecs = time_travel_start + time_travel_time;
933 else
934 nsecs = os_persistent_clock_emulation();
935
936 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
937 nsecs % NSEC_PER_SEC);
938}
939
940void __init time_init(void)
941{
942 timer_set_signal_handler();
943 late_time_init = um_timer_setup;
944}
945
946#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
947unsigned long calibrate_delay_is_known(void)
948{
949 if (time_travel_mode == TT_MODE_INFCPU ||
950 time_travel_mode == TT_MODE_EXTERNAL)
951 return 1;
952 return 0;
953}
954
955static int setup_time_travel(char *str)
956{
957 if (strcmp(str, "=inf-cpu") == 0) {
958 time_travel_mode = TT_MODE_INFCPU;
959 timer_clockevent.name = "time-travel-timer-infcpu";
960 timer_clocksource.name = "time-travel-clock";
961 return 1;
962 }
963
964 if (strncmp(str, "=ext:", 5) == 0) {
965 time_travel_mode = TT_MODE_EXTERNAL;
966 timer_clockevent.name = "time-travel-timer-external";
967 timer_clocksource.name = "time-travel-clock-external";
968 return time_travel_connect_external(str + 5);
969 }
970
971 if (!*str) {
972 time_travel_mode = TT_MODE_BASIC;
973 timer_clockevent.name = "time-travel-timer";
974 timer_clocksource.name = "time-travel-clock";
975 return 1;
976 }
977
978 return -EINVAL;
979}
980
981__setup("time-travel", setup_time_travel);
982__uml_help(setup_time_travel,
983"time-travel\n"
984"This option just enables basic time travel mode, in which the clock/timers\n"
985"inside the UML instance skip forward when there's nothing to do, rather than\n"
986"waiting for real time to elapse. However, instance CPU speed is limited by\n"
987"the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
988"clock (but quicker when there's nothing to do).\n"
989"\n"
990"time-travel=inf-cpu\n"
991"This enables time travel mode with infinite processing power, in which there\n"
992"are no wall clock timers, and any CPU processing happens - as seen from the\n"
993"guest - instantly. This can be useful for accurate simulation regardless of\n"
994"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
995"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
996"\n"
997"time-travel=ext:[ID:]/path/to/socket\n"
998"This enables time travel mode similar to =inf-cpu, except the system will\n"
999"use the given socket to coordinate with a central scheduler, in order to\n"
1000"have more than one system simultaneously be on simulated time. The virtio\n"
1001"driver code in UML knows about this so you can also simulate networks and\n"
1002"devices using it, assuming the device has the right capabilities.\n"
1003"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
1004
1005static int setup_time_travel_start(char *str)
1006{
1007 int err;
1008
1009 err = kstrtoull(str, 0, &time_travel_start);
1010 if (err)
1011 return err;
1012
1013 time_travel_start_set = 1;
1014 return 1;
1015}
1016
1017__setup("time-travel-start=", setup_time_travel_start);
1018__uml_help(setup_time_travel_start,
1019"time-travel-start=<nanoseconds>\n"
1020"Configure the UML instance's wall clock to start at this value rather than\n"
1021"the host's wall clock at the time of UML boot.\n");
1022static struct kobject *bc_time_kobject;
1023
1024static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
1025{
1026 return sprintf(buf, "0x%llx", bc_message);
1027}
1028
1029static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
1030{
1031 int ret;
1032 u64 user_bc_message;
1033
1034 ret = kstrtou64(buf, 0, &user_bc_message);
1035 if (ret)
1036 return ret;
1037
1038 bc_message = user_bc_message;
1039
1040 time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
1041 pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
1042 return count;
1043}
1044
1045static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
1046
1047static int __init um_bc_start(void)
1048{
1049 if (time_travel_mode != TT_MODE_EXTERNAL)
1050 return 0;
1051
1052 bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
1053 if (!bc_time_kobject)
1054 return 0;
1055
1056 if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
1057 pr_debug("failed to create the bc file in /sys/kernel/um_time");
1058
1059 return 0;
1060}
1061late_initcall(um_bc_start);
1062#endif