1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
5 * Copyright (C) 2012-2014 Cisco Systems
6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7 * Copyright (C) 2019 Intel Corporation
10 #include <linux/clockchips.h>
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/jiffies.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/threads.h>
19 #include <asm/param.h>
20 #include <kern_util.h>
22 #include <linux/time-internal.h>
23 #include <linux/um_timetravel.h>
24 #include <shared/init.h>
26 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
27 enum time_travel_mode time_travel_mode;
28 EXPORT_SYMBOL_GPL(time_travel_mode);
30 static bool time_travel_start_set;
31 static unsigned long long time_travel_start;
32 static unsigned long long time_travel_time;
33 static LIST_HEAD(time_travel_events);
34 static LIST_HEAD(time_travel_irqs);
35 static unsigned long long time_travel_timer_interval;
36 static unsigned long long time_travel_next_event;
37 static struct time_travel_event time_travel_timer_event;
38 static int time_travel_ext_fd = -1;
39 static unsigned int time_travel_ext_waiting;
40 static bool time_travel_ext_prev_request_valid;
41 static unsigned long long time_travel_ext_prev_request;
42 static bool time_travel_ext_free_until_valid;
43 static unsigned long long time_travel_ext_free_until;
45 static void time_travel_set_time(unsigned long long ns)
47 if (unlikely(ns < time_travel_time))
48 panic("time-travel: time goes backwards %lld -> %lld\n",
49 time_travel_time, ns);
50 else if (unlikely(ns >= S64_MAX))
51 panic("The system was going to sleep forever, aborting");
53 time_travel_time = ns;
56 enum time_travel_message_handling {
62 static void time_travel_handle_message(struct um_timetravel_msg *msg,
63 enum time_travel_message_handling mode)
65 struct um_timetravel_msg resp = {
66 .op = UM_TIMETRAVEL_ACK,
71 * We can't unlock here, but interrupt signals with a timetravel_handler
72 * (see um_request_irq_tt) get to the timetravel_handler anyway.
74 if (mode != TTMH_READ) {
75 BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
77 while (os_poll(1, &time_travel_ext_fd) != 0) {
82 ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
85 panic("time-travel external link is broken\n");
86 if (ret != sizeof(*msg))
87 panic("invalid time-travel message - %d bytes\n", ret);
91 WARN_ONCE(1, "time-travel: unexpected message %lld\n",
92 (unsigned long long)msg->op);
94 case UM_TIMETRAVEL_ACK:
96 case UM_TIMETRAVEL_RUN:
97 time_travel_set_time(msg->time);
99 case UM_TIMETRAVEL_FREE_UNTIL:
100 time_travel_ext_free_until_valid = true;
101 time_travel_ext_free_until = msg->time;
106 os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
109 static u64 time_travel_ext_req(u32 op, u64 time)
113 struct um_timetravel_msg msg = {
120 * We need to block even the timetravel handlers of SIGIO here and
121 * only restore their use when we got the ACK - otherwise we may
122 * (will) get interrupted by that, try to queue the IRQ for future
123 * processing and thus send another request while we're still waiting
124 * for an ACK, but the peer doesn't know we got interrupted and will
125 * send the ACKs in the same order as the message, but we'd need to
126 * see them in the opposite order ...
128 * This wouldn't matter *too* much, but some ACKs carry the
129 * current time (for UM_TIMETRAVEL_GET) and getting another
130 * ACK without a time would confuse us a lot!
132 * The sequence number assignment that happens here lets us
133 * debug such message handling issues more easily.
135 block_signals_hard();
136 os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
138 while (msg.op != UM_TIMETRAVEL_ACK)
139 time_travel_handle_message(&msg, TTMH_READ);
142 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
143 msg.op, msg.seq, mseq, msg.time);
145 if (op == UM_TIMETRAVEL_GET)
146 time_travel_set_time(msg.time);
147 unblock_signals_hard();
152 void __time_travel_wait_readable(int fd)
154 int fds[2] = { fd, time_travel_ext_fd };
157 if (time_travel_mode != TT_MODE_EXTERNAL)
160 while ((ret = os_poll(2, fds))) {
161 struct um_timetravel_msg msg;
164 time_travel_handle_message(&msg, TTMH_READ);
167 EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
169 static void time_travel_ext_update_request(unsigned long long time)
171 if (time_travel_mode != TT_MODE_EXTERNAL)
174 /* asked for exactly this time previously */
175 if (time_travel_ext_prev_request_valid &&
176 time == time_travel_ext_prev_request)
180 * if we're running and are allowed to run past the request
181 * then we don't need to update it either
183 if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
184 time < time_travel_ext_free_until)
187 time_travel_ext_prev_request = time;
188 time_travel_ext_prev_request_valid = true;
189 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
192 void __time_travel_propagate_time(void)
194 static unsigned long long last_propagated;
196 if (last_propagated == time_travel_time)
199 time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
200 last_propagated = time_travel_time;
202 EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
204 /* returns true if we must do a wait to the simtime device */
205 static bool time_travel_ext_request(unsigned long long time)
208 * If we received an external sync point ("free until") then we
209 * don't have to request/wait for anything until then, unless
210 * we're already waiting.
212 if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
213 time < time_travel_ext_free_until)
216 time_travel_ext_update_request(time);
220 static void time_travel_ext_wait(bool idle)
222 struct um_timetravel_msg msg = {
223 .op = UM_TIMETRAVEL_ACK,
226 time_travel_ext_prev_request_valid = false;
227 time_travel_ext_free_until_valid = false;
228 time_travel_ext_waiting++;
230 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
233 * Here we are deep in the idle loop, so we have to break out of the
234 * kernel abstraction in a sense and implement this in terms of the
235 * UML system waiting on the VQ interrupt while sleeping, when we get
236 * the signal it'll call time_travel_ext_vq_notify_done() completing the
239 while (msg.op != UM_TIMETRAVEL_RUN)
240 time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
242 time_travel_ext_waiting--;
244 /* we might request more stuff while polling - reset when we run */
245 time_travel_ext_prev_request_valid = false;
248 static void time_travel_ext_get_time(void)
250 time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
253 static void __time_travel_update_time(unsigned long long ns, bool idle)
255 if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
256 time_travel_ext_wait(idle);
258 time_travel_set_time(ns);
261 static struct time_travel_event *time_travel_first_event(void)
263 return list_first_entry_or_null(&time_travel_events,
264 struct time_travel_event,
268 static void __time_travel_add_event(struct time_travel_event *e,
269 unsigned long long time)
271 struct time_travel_event *tmp;
272 bool inserted = false;
281 local_irq_save(flags);
282 list_for_each_entry(tmp, &time_travel_events, list) {
284 * Add the new entry before one with higher time,
285 * or if they're equal and both on stack, because
286 * in that case we need to unwind the stack in the
287 * right order, and the later event (timer sleep
288 * or such) must be dequeued first.
290 if ((tmp->time > e->time) ||
291 (tmp->time == e->time && tmp->onstack && e->onstack)) {
292 list_add_tail(&e->list, &tmp->list);
299 list_add_tail(&e->list, &time_travel_events);
301 tmp = time_travel_first_event();
302 time_travel_ext_update_request(tmp->time);
303 time_travel_next_event = tmp->time;
304 local_irq_restore(flags);
307 static void time_travel_add_event(struct time_travel_event *e,
308 unsigned long long time)
313 __time_travel_add_event(e, time);
316 void time_travel_add_event_rel(struct time_travel_event *e,
317 unsigned long long delay_ns)
319 time_travel_add_event(e, time_travel_time + delay_ns);
322 void time_travel_periodic_timer(struct time_travel_event *e)
324 time_travel_add_event(&time_travel_timer_event,
325 time_travel_time + time_travel_timer_interval);
329 void deliver_time_travel_irqs(void)
331 struct time_travel_event *e;
335 * Don't do anything for most cases. Note that because here we have
336 * to disable IRQs (and re-enable later) we'll actually recurse at
337 * the end of the function, so this is strictly necessary.
339 if (likely(list_empty(&time_travel_irqs)))
342 local_irq_save(flags);
344 while ((e = list_first_entry_or_null(&time_travel_irqs,
345 struct time_travel_event,
352 local_irq_restore(flags);
355 static void time_travel_deliver_event(struct time_travel_event *e)
357 if (e == &time_travel_timer_event) {
359 * deliver_alarm() does the irq_enter/irq_exit
360 * by itself, so must handle it specially here
363 } else if (irqs_disabled()) {
364 list_add_tail(&e->list, &time_travel_irqs);
366 * set pending again, it was set to false when the
367 * event was deleted from the original list, but
368 * now it's still pending until we deliver the IRQ.
374 local_irq_save(flags);
378 local_irq_restore(flags);
382 bool time_travel_del_event(struct time_travel_event *e)
388 local_irq_save(flags);
391 local_irq_restore(flags);
395 static void time_travel_update_time(unsigned long long next, bool idle)
397 struct time_travel_event ne = {
400 struct time_travel_event *e;
401 bool finished = idle;
403 /* add it without a handler - we deal with that specifically below */
404 __time_travel_add_event(&ne, next);
407 e = time_travel_first_event();
410 __time_travel_update_time(e->time, idle);
412 /* new events may have been inserted while we were waiting */
413 if (e == time_travel_first_event()) {
414 BUG_ON(!time_travel_del_event(e));
415 BUG_ON(time_travel_time != e->time);
421 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
422 time_travel_time, e->time, e);
423 time_travel_deliver_event(e);
427 e = time_travel_first_event();
429 time_travel_ext_update_request(e->time);
430 } while (ne.pending && !finished);
432 time_travel_del_event(&ne);
435 void time_travel_ndelay(unsigned long nsec)
437 time_travel_update_time(time_travel_time + nsec, false);
439 EXPORT_SYMBOL(time_travel_ndelay);
441 void time_travel_add_irq_event(struct time_travel_event *e)
443 BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
445 time_travel_ext_get_time();
447 * We could model interrupt latency here, for now just
448 * don't have any latency at all and request the exact
449 * same time (again) to run the interrupt...
451 time_travel_add_event(e, time_travel_time);
453 EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
455 static void time_travel_oneshot_timer(struct time_travel_event *e)
460 void time_travel_sleep(void)
463 * Wait "forever" (using S64_MAX because there are some potential
464 * wrapping issues, especially with the current TT_MODE_EXTERNAL
465 * controller application.
467 unsigned long long next = S64_MAX;
469 if (time_travel_mode == TT_MODE_BASIC)
472 time_travel_update_time(next, true);
474 if (time_travel_mode == TT_MODE_BASIC &&
475 time_travel_timer_event.pending) {
476 if (time_travel_timer_event.fn == time_travel_periodic_timer) {
478 * This is somewhat wrong - we should get the first
479 * one sooner like the os_timer_one_shot() below...
481 os_timer_set_interval(time_travel_timer_interval);
483 os_timer_one_shot(time_travel_timer_event.time - next);
488 static void time_travel_handle_real_alarm(void)
490 time_travel_set_time(time_travel_next_event);
492 time_travel_del_event(&time_travel_timer_event);
494 if (time_travel_timer_event.fn == time_travel_periodic_timer)
495 time_travel_add_event(&time_travel_timer_event,
497 time_travel_timer_interval);
500 static void time_travel_set_interval(unsigned long long interval)
502 time_travel_timer_interval = interval;
505 static int time_travel_connect_external(const char *socket)
508 unsigned long long id = (unsigned long long)-1;
511 if ((sep = strchr(socket, ':'))) {
513 if (sep - socket > sizeof(buf) - 1)
516 memcpy(buf, socket, sep - socket);
517 if (kstrtoull(buf, 0, &id)) {
519 panic("time-travel: invalid external ID in string '%s'\n",
527 rc = os_connect_socket(socket);
529 panic("time-travel: failed to connect to external socket %s\n",
534 time_travel_ext_fd = rc;
536 time_travel_ext_req(UM_TIMETRAVEL_START, id);
541 static void time_travel_set_start(void)
543 if (time_travel_start_set)
546 switch (time_travel_mode) {
547 case TT_MODE_EXTERNAL:
548 time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
549 /* controller gave us the *current* time, so adjust by that */
550 time_travel_ext_get_time();
551 time_travel_start -= time_travel_time;
555 if (!time_travel_start_set)
556 time_travel_start = os_persistent_clock_emulation();
559 /* we just read the host clock with os_persistent_clock_emulation() */
563 time_travel_start_set = true;
565 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
566 #define time_travel_start_set 0
567 #define time_travel_start 0
568 #define time_travel_time 0
569 #define time_travel_ext_waiting 0
571 static inline void time_travel_update_time(unsigned long long ns, bool retearly)
575 static inline void time_travel_handle_real_alarm(void)
579 static void time_travel_set_interval(unsigned long long interval)
583 static inline void time_travel_set_start(void)
587 /* fail link if this actually gets used */
588 extern u64 time_travel_ext_req(u32 op, u64 time);
590 /* these are empty macros so the struct/fn need not exist */
591 #define time_travel_add_event(e, time) do { } while (0)
592 /* externally not usable - redefine here so we can */
593 #undef time_travel_del_event
594 #define time_travel_del_event(e) do { } while (0)
597 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
602 * In basic time-travel mode we still get real interrupts
603 * (signals) but since we don't read time from the OS, we
604 * must update the simulated time here to the expiry when
606 * This is not the case in inf-cpu mode, since there we
607 * never get any real signals from the OS.
609 if (time_travel_mode == TT_MODE_BASIC)
610 time_travel_handle_real_alarm();
612 local_irq_save(flags);
613 do_IRQ(TIMER_IRQ, regs);
614 local_irq_restore(flags);
617 static int itimer_shutdown(struct clock_event_device *evt)
619 if (time_travel_mode != TT_MODE_OFF)
620 time_travel_del_event(&time_travel_timer_event);
622 if (time_travel_mode != TT_MODE_INFCPU &&
623 time_travel_mode != TT_MODE_EXTERNAL)
629 static int itimer_set_periodic(struct clock_event_device *evt)
631 unsigned long long interval = NSEC_PER_SEC / HZ;
633 if (time_travel_mode != TT_MODE_OFF) {
634 time_travel_del_event(&time_travel_timer_event);
635 time_travel_set_event_fn(&time_travel_timer_event,
636 time_travel_periodic_timer);
637 time_travel_set_interval(interval);
638 time_travel_add_event(&time_travel_timer_event,
639 time_travel_time + interval);
642 if (time_travel_mode != TT_MODE_INFCPU &&
643 time_travel_mode != TT_MODE_EXTERNAL)
644 os_timer_set_interval(interval);
649 static int itimer_next_event(unsigned long delta,
650 struct clock_event_device *evt)
654 if (time_travel_mode != TT_MODE_OFF) {
655 time_travel_del_event(&time_travel_timer_event);
656 time_travel_set_event_fn(&time_travel_timer_event,
657 time_travel_oneshot_timer);
658 time_travel_add_event(&time_travel_timer_event,
659 time_travel_time + delta);
662 if (time_travel_mode != TT_MODE_INFCPU &&
663 time_travel_mode != TT_MODE_EXTERNAL)
664 return os_timer_one_shot(delta);
669 static int itimer_one_shot(struct clock_event_device *evt)
671 return itimer_next_event(0, evt);
674 static struct clock_event_device timer_clockevent = {
675 .name = "posix-timer",
677 .cpumask = cpu_possible_mask,
678 .features = CLOCK_EVT_FEAT_PERIODIC |
679 CLOCK_EVT_FEAT_ONESHOT,
680 .set_state_shutdown = itimer_shutdown,
681 .set_state_periodic = itimer_set_periodic,
682 .set_state_oneshot = itimer_one_shot,
683 .set_next_event = itimer_next_event,
685 .max_delta_ns = 0xffffffff,
686 .max_delta_ticks = 0xffffffff,
687 .min_delta_ns = TIMER_MIN_DELTA,
688 .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
693 static irqreturn_t um_timer(int irq, void *dev)
695 if (get_current()->mm != NULL)
697 /* userspace - relay signal, results in correct userspace timers */
698 os_alarm_process(get_current()->mm->context.id.u.pid);
701 (*timer_clockevent.event_handler)(&timer_clockevent);
706 static u64 timer_read(struct clocksource *cs)
708 if (time_travel_mode != TT_MODE_OFF) {
710 * We make reading the timer cost a bit so that we don't get
711 * stuck in loops that expect time to move more than the
712 * exact requested sleep amount, e.g. python's socket server,
713 * see https://bugs.python.org/issue37026.
715 * However, don't do that when we're in interrupt or such as
716 * then we might recurse into our own processing, and get to
717 * even more waiting, and that's not good - it messes up the
718 * "what do I do next" and onstack event we use to know when
719 * to return from time_travel_update_time().
721 if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
722 !time_travel_ext_waiting)
723 time_travel_update_time(time_travel_time +
726 return time_travel_time / TIMER_MULTIPLIER;
729 return os_nsecs() / TIMER_MULTIPLIER;
732 static struct clocksource timer_clocksource = {
736 .mask = CLOCKSOURCE_MASK(64),
737 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
740 static void __init um_timer_setup(void)
744 err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
746 printk(KERN_ERR "register_timer : request_irq failed - "
747 "errno = %d\n", -err);
749 err = os_timer_create();
751 printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
755 err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
757 printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
760 clockevents_register_device(&timer_clockevent);
763 void read_persistent_clock64(struct timespec64 *ts)
767 time_travel_set_start();
769 if (time_travel_mode != TT_MODE_OFF)
770 nsecs = time_travel_start + time_travel_time;
772 nsecs = os_persistent_clock_emulation();
774 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
775 nsecs % NSEC_PER_SEC);
778 void __init time_init(void)
780 timer_set_signal_handler();
781 late_time_init = um_timer_setup;
784 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
785 unsigned long calibrate_delay_is_known(void)
787 if (time_travel_mode == TT_MODE_INFCPU ||
788 time_travel_mode == TT_MODE_EXTERNAL)
793 int setup_time_travel(char *str)
795 if (strcmp(str, "=inf-cpu") == 0) {
796 time_travel_mode = TT_MODE_INFCPU;
797 timer_clockevent.name = "time-travel-timer-infcpu";
798 timer_clocksource.name = "time-travel-clock";
802 if (strncmp(str, "=ext:", 5) == 0) {
803 time_travel_mode = TT_MODE_EXTERNAL;
804 timer_clockevent.name = "time-travel-timer-external";
805 timer_clocksource.name = "time-travel-clock-external";
806 return time_travel_connect_external(str + 5);
810 time_travel_mode = TT_MODE_BASIC;
811 timer_clockevent.name = "time-travel-timer";
812 timer_clocksource.name = "time-travel-clock";
819 __setup("time-travel", setup_time_travel);
820 __uml_help(setup_time_travel,
822 "This option just enables basic time travel mode, in which the clock/timers\n"
823 "inside the UML instance skip forward when there's nothing to do, rather than\n"
824 "waiting for real time to elapse. However, instance CPU speed is limited by\n"
825 "the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
826 "clock (but quicker when there's nothing to do).\n"
828 "time-travel=inf-cpu\n"
829 "This enables time travel mode with infinite processing power, in which there\n"
830 "are no wall clock timers, and any CPU processing happens - as seen from the\n"
831 "guest - instantly. This can be useful for accurate simulation regardless of\n"
832 "debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
833 "easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
835 "time-travel=ext:[ID:]/path/to/socket\n"
836 "This enables time travel mode similar to =inf-cpu, except the system will\n"
837 "use the given socket to coordinate with a central scheduler, in order to\n"
838 "have more than one system simultaneously be on simulated time. The virtio\n"
839 "driver code in UML knows about this so you can also simulate networks and\n"
840 "devices using it, assuming the device has the right capabilities.\n"
841 "The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
843 int setup_time_travel_start(char *str)
847 err = kstrtoull(str, 0, &time_travel_start);
851 time_travel_start_set = 1;
855 __setup("time-travel-start", setup_time_travel_start);
856 __uml_help(setup_time_travel_start,
857 "time-travel-start=<seconds>\n"
858 "Configure the UML instance's wall clock to start at this value rather than\n"
859 "the host's wall clock at the time of UML boot.\n");