1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
5 * Copyright (C) 2012-2014 Cisco Systems
6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7 * Copyright (C) 2019 Intel Corporation
10 #include <linux/clockchips.h>
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/jiffies.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/threads.h>
19 #include <asm/param.h>
20 #include <kern_util.h>
22 #include <linux/time-internal.h>
23 #include <linux/um_timetravel.h>
24 #include <shared/init.h>
26 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
27 enum time_travel_mode time_travel_mode;
28 EXPORT_SYMBOL_GPL(time_travel_mode);
30 static bool time_travel_start_set;
31 static unsigned long long time_travel_start;
32 static unsigned long long time_travel_time;
33 static LIST_HEAD(time_travel_events);
34 static LIST_HEAD(time_travel_irqs);
35 static unsigned long long time_travel_timer_interval;
36 static unsigned long long time_travel_next_event;
37 static struct time_travel_event time_travel_timer_event;
38 static int time_travel_ext_fd = -1;
39 static unsigned int time_travel_ext_waiting;
40 static bool time_travel_ext_prev_request_valid;
41 static unsigned long long time_travel_ext_prev_request;
42 static bool time_travel_ext_free_until_valid;
43 static unsigned long long time_travel_ext_free_until;
45 static void time_travel_set_time(unsigned long long ns)
47 if (unlikely(ns < time_travel_time))
48 panic("time-travel: time goes backwards %lld -> %lld\n",
49 time_travel_time, ns);
50 else if (unlikely(ns >= S64_MAX))
51 panic("The system was going to sleep forever, aborting");
53 time_travel_time = ns;
56 enum time_travel_message_handling {
62 static void time_travel_handle_message(struct um_timetravel_msg *msg,
63 enum time_travel_message_handling mode)
65 struct um_timetravel_msg resp = {
66 .op = UM_TIMETRAVEL_ACK,
71 * Poll outside the locked section (if we're not called to only read
72 * the response) so we can get interrupts for e.g. virtio while we're
73 * here, but then we need to lock to not get interrupted between the
74 * read of the message and write of the ACK.
76 if (mode != TTMH_READ) {
77 bool disabled = irqs_disabled();
79 BUG_ON(mode == TTMH_IDLE && !disabled);
83 while (os_poll(1, &time_travel_ext_fd) != 0) {
90 ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
93 panic("time-travel external link is broken\n");
94 if (ret != sizeof(*msg))
95 panic("invalid time-travel message - %d bytes\n", ret);
99 WARN_ONCE(1, "time-travel: unexpected message %lld\n",
100 (unsigned long long)msg->op);
102 case UM_TIMETRAVEL_ACK:
104 case UM_TIMETRAVEL_RUN:
105 time_travel_set_time(msg->time);
107 case UM_TIMETRAVEL_FREE_UNTIL:
108 time_travel_ext_free_until_valid = true;
109 time_travel_ext_free_until = msg->time;
114 os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
117 static u64 time_travel_ext_req(u32 op, u64 time)
121 struct um_timetravel_msg msg = {
129 * We need to save interrupts here and only restore when we
130 * got the ACK - otherwise we can get interrupted and send
131 * another request while we're still waiting for an ACK, but
132 * the peer doesn't know we got interrupted and will send
133 * the ACKs in the same order as the message, but we'd need
134 * to see them in the opposite order ...
136 * This wouldn't matter *too* much, but some ACKs carry the
137 * current time (for UM_TIMETRAVEL_GET) and getting another
138 * ACK without a time would confuse us a lot!
140 * The sequence number assignment that happens here lets us
141 * debug such message handling issues more easily.
143 local_irq_save(flags);
144 os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
146 while (msg.op != UM_TIMETRAVEL_ACK)
147 time_travel_handle_message(&msg, TTMH_READ);
150 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
151 msg.op, msg.seq, mseq, msg.time);
153 if (op == UM_TIMETRAVEL_GET)
154 time_travel_set_time(msg.time);
155 local_irq_restore(flags);
160 void __time_travel_wait_readable(int fd)
162 int fds[2] = { fd, time_travel_ext_fd };
165 if (time_travel_mode != TT_MODE_EXTERNAL)
168 while ((ret = os_poll(2, fds))) {
169 struct um_timetravel_msg msg;
172 time_travel_handle_message(&msg, TTMH_READ);
175 EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
177 static void time_travel_ext_update_request(unsigned long long time)
179 if (time_travel_mode != TT_MODE_EXTERNAL)
182 /* asked for exactly this time previously */
183 if (time_travel_ext_prev_request_valid &&
184 time == time_travel_ext_prev_request)
188 * if we're running and are allowed to run past the request
189 * then we don't need to update it either
191 if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
192 time < time_travel_ext_free_until)
195 time_travel_ext_prev_request = time;
196 time_travel_ext_prev_request_valid = true;
197 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
200 void __time_travel_propagate_time(void)
202 static unsigned long long last_propagated;
204 if (last_propagated == time_travel_time)
207 time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
208 last_propagated = time_travel_time;
210 EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
212 /* returns true if we must do a wait to the simtime device */
213 static bool time_travel_ext_request(unsigned long long time)
216 * If we received an external sync point ("free until") then we
217 * don't have to request/wait for anything until then, unless
218 * we're already waiting.
220 if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
221 time < time_travel_ext_free_until)
224 time_travel_ext_update_request(time);
228 static void time_travel_ext_wait(bool idle)
230 struct um_timetravel_msg msg = {
231 .op = UM_TIMETRAVEL_ACK,
234 time_travel_ext_prev_request_valid = false;
235 time_travel_ext_free_until_valid = false;
236 time_travel_ext_waiting++;
238 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
241 * Here we are deep in the idle loop, so we have to break out of the
242 * kernel abstraction in a sense and implement this in terms of the
243 * UML system waiting on the VQ interrupt while sleeping, when we get
244 * the signal it'll call time_travel_ext_vq_notify_done() completing the
247 while (msg.op != UM_TIMETRAVEL_RUN)
248 time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
250 time_travel_ext_waiting--;
252 /* we might request more stuff while polling - reset when we run */
253 time_travel_ext_prev_request_valid = false;
256 static void time_travel_ext_get_time(void)
258 time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
261 static void __time_travel_update_time(unsigned long long ns, bool idle)
263 if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
264 time_travel_ext_wait(idle);
266 time_travel_set_time(ns);
269 static struct time_travel_event *time_travel_first_event(void)
271 return list_first_entry_or_null(&time_travel_events,
272 struct time_travel_event,
276 static void __time_travel_add_event(struct time_travel_event *e,
277 unsigned long long time)
279 struct time_travel_event *tmp;
280 bool inserted = false;
289 local_irq_save(flags);
290 list_for_each_entry(tmp, &time_travel_events, list) {
292 * Add the new entry before one with higher time,
293 * or if they're equal and both on stack, because
294 * in that case we need to unwind the stack in the
295 * right order, and the later event (timer sleep
296 * or such) must be dequeued first.
298 if ((tmp->time > e->time) ||
299 (tmp->time == e->time && tmp->onstack && e->onstack)) {
300 list_add_tail(&e->list, &tmp->list);
307 list_add_tail(&e->list, &time_travel_events);
309 tmp = time_travel_first_event();
310 time_travel_ext_update_request(tmp->time);
311 time_travel_next_event = tmp->time;
312 local_irq_restore(flags);
315 static void time_travel_add_event(struct time_travel_event *e,
316 unsigned long long time)
321 __time_travel_add_event(e, time);
324 void time_travel_add_event_rel(struct time_travel_event *e,
325 unsigned long long delay_ns)
327 time_travel_add_event(e, time_travel_time + delay_ns);
330 void time_travel_periodic_timer(struct time_travel_event *e)
332 time_travel_add_event(&time_travel_timer_event,
333 time_travel_time + time_travel_timer_interval);
337 void deliver_time_travel_irqs(void)
339 struct time_travel_event *e;
343 * Don't do anything for most cases. Note that because here we have
344 * to disable IRQs (and re-enable later) we'll actually recurse at
345 * the end of the function, so this is strictly necessary.
347 if (likely(list_empty(&time_travel_irqs)))
350 local_irq_save(flags);
352 while ((e = list_first_entry_or_null(&time_travel_irqs,
353 struct time_travel_event,
355 WARN(e->time != time_travel_time,
356 "time moved from %lld to %lld before IRQ delivery\n",
357 time_travel_time, e->time);
363 local_irq_restore(flags);
366 static void time_travel_deliver_event(struct time_travel_event *e)
368 if (e == &time_travel_timer_event) {
370 * deliver_alarm() does the irq_enter/irq_exit
371 * by itself, so must handle it specially here
374 } else if (irqs_disabled()) {
375 list_add_tail(&e->list, &time_travel_irqs);
377 * set pending again, it was set to false when the
378 * event was deleted from the original list, but
379 * now it's still pending until we deliver the IRQ.
385 local_irq_save(flags);
389 local_irq_restore(flags);
393 bool time_travel_del_event(struct time_travel_event *e)
399 local_irq_save(flags);
402 local_irq_restore(flags);
406 static void time_travel_update_time(unsigned long long next, bool idle)
408 struct time_travel_event ne = {
411 struct time_travel_event *e;
412 bool finished = idle;
414 /* add it without a handler - we deal with that specifically below */
415 __time_travel_add_event(&ne, next);
418 e = time_travel_first_event();
421 __time_travel_update_time(e->time, idle);
423 /* new events may have been inserted while we were waiting */
424 if (e == time_travel_first_event()) {
425 BUG_ON(!time_travel_del_event(e));
426 BUG_ON(time_travel_time != e->time);
432 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
433 time_travel_time, e->time, e);
434 time_travel_deliver_event(e);
438 e = time_travel_first_event();
440 time_travel_ext_update_request(e->time);
441 } while (ne.pending && !finished);
443 time_travel_del_event(&ne);
446 void time_travel_ndelay(unsigned long nsec)
448 time_travel_update_time(time_travel_time + nsec, false);
450 EXPORT_SYMBOL(time_travel_ndelay);
452 void time_travel_add_irq_event(struct time_travel_event *e)
454 BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
456 time_travel_ext_get_time();
458 * We could model interrupt latency here, for now just
459 * don't have any latency at all and request the exact
460 * same time (again) to run the interrupt...
462 time_travel_add_event(e, time_travel_time);
464 EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
466 static void time_travel_oneshot_timer(struct time_travel_event *e)
471 void time_travel_sleep(void)
474 * Wait "forever" (using S64_MAX because there are some potential
475 * wrapping issues, especially with the current TT_MODE_EXTERNAL
476 * controller application.
478 unsigned long long next = S64_MAX;
480 if (time_travel_mode == TT_MODE_BASIC)
483 time_travel_update_time(next, true);
485 if (time_travel_mode == TT_MODE_BASIC &&
486 time_travel_timer_event.pending) {
487 if (time_travel_timer_event.fn == time_travel_periodic_timer) {
489 * This is somewhat wrong - we should get the first
490 * one sooner like the os_timer_one_shot() below...
492 os_timer_set_interval(time_travel_timer_interval);
494 os_timer_one_shot(time_travel_timer_event.time - next);
499 static void time_travel_handle_real_alarm(void)
501 time_travel_set_time(time_travel_next_event);
503 time_travel_del_event(&time_travel_timer_event);
505 if (time_travel_timer_event.fn == time_travel_periodic_timer)
506 time_travel_add_event(&time_travel_timer_event,
508 time_travel_timer_interval);
511 static void time_travel_set_interval(unsigned long long interval)
513 time_travel_timer_interval = interval;
516 static int time_travel_connect_external(const char *socket)
519 unsigned long long id = (unsigned long long)-1;
522 if ((sep = strchr(socket, ':'))) {
524 if (sep - socket > sizeof(buf) - 1)
527 memcpy(buf, socket, sep - socket);
528 if (kstrtoull(buf, 0, &id)) {
530 panic("time-travel: invalid external ID in string '%s'\n",
538 rc = os_connect_socket(socket);
540 panic("time-travel: failed to connect to external socket %s\n",
545 time_travel_ext_fd = rc;
547 time_travel_ext_req(UM_TIMETRAVEL_START, id);
552 static void time_travel_set_start(void)
554 if (time_travel_start_set)
557 switch (time_travel_mode) {
558 case TT_MODE_EXTERNAL:
559 time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
560 /* controller gave us the *current* time, so adjust by that */
561 time_travel_ext_get_time();
562 time_travel_start -= time_travel_time;
566 if (!time_travel_start_set)
567 time_travel_start = os_persistent_clock_emulation();
570 /* we just read the host clock with os_persistent_clock_emulation() */
574 time_travel_start_set = true;
576 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
577 #define time_travel_start_set 0
578 #define time_travel_start 0
579 #define time_travel_time 0
580 #define time_travel_ext_waiting 0
582 static inline void time_travel_update_time(unsigned long long ns, bool retearly)
586 static inline void time_travel_handle_real_alarm(void)
590 static void time_travel_set_interval(unsigned long long interval)
594 static inline void time_travel_set_start(void)
598 /* fail link if this actually gets used */
599 extern u64 time_travel_ext_req(u32 op, u64 time);
601 /* these are empty macros so the struct/fn need not exist */
602 #define time_travel_add_event(e, time) do { } while (0)
603 /* externally not usable - redefine here so we can */
604 #undef time_travel_del_event
605 #define time_travel_del_event(e) do { } while (0)
608 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
613 * In basic time-travel mode we still get real interrupts
614 * (signals) but since we don't read time from the OS, we
615 * must update the simulated time here to the expiry when
617 * This is not the case in inf-cpu mode, since there we
618 * never get any real signals from the OS.
620 if (time_travel_mode == TT_MODE_BASIC)
621 time_travel_handle_real_alarm();
623 local_irq_save(flags);
624 do_IRQ(TIMER_IRQ, regs);
625 local_irq_restore(flags);
628 static int itimer_shutdown(struct clock_event_device *evt)
630 if (time_travel_mode != TT_MODE_OFF)
631 time_travel_del_event(&time_travel_timer_event);
633 if (time_travel_mode != TT_MODE_INFCPU &&
634 time_travel_mode != TT_MODE_EXTERNAL)
640 static int itimer_set_periodic(struct clock_event_device *evt)
642 unsigned long long interval = NSEC_PER_SEC / HZ;
644 if (time_travel_mode != TT_MODE_OFF) {
645 time_travel_del_event(&time_travel_timer_event);
646 time_travel_set_event_fn(&time_travel_timer_event,
647 time_travel_periodic_timer);
648 time_travel_set_interval(interval);
649 time_travel_add_event(&time_travel_timer_event,
650 time_travel_time + interval);
653 if (time_travel_mode != TT_MODE_INFCPU &&
654 time_travel_mode != TT_MODE_EXTERNAL)
655 os_timer_set_interval(interval);
660 static int itimer_next_event(unsigned long delta,
661 struct clock_event_device *evt)
665 if (time_travel_mode != TT_MODE_OFF) {
666 time_travel_del_event(&time_travel_timer_event);
667 time_travel_set_event_fn(&time_travel_timer_event,
668 time_travel_oneshot_timer);
669 time_travel_add_event(&time_travel_timer_event,
670 time_travel_time + delta);
673 if (time_travel_mode != TT_MODE_INFCPU &&
674 time_travel_mode != TT_MODE_EXTERNAL)
675 return os_timer_one_shot(delta);
680 static int itimer_one_shot(struct clock_event_device *evt)
682 return itimer_next_event(0, evt);
685 static struct clock_event_device timer_clockevent = {
686 .name = "posix-timer",
688 .cpumask = cpu_possible_mask,
689 .features = CLOCK_EVT_FEAT_PERIODIC |
690 CLOCK_EVT_FEAT_ONESHOT,
691 .set_state_shutdown = itimer_shutdown,
692 .set_state_periodic = itimer_set_periodic,
693 .set_state_oneshot = itimer_one_shot,
694 .set_next_event = itimer_next_event,
696 .max_delta_ns = 0xffffffff,
697 .max_delta_ticks = 0xffffffff,
698 .min_delta_ns = TIMER_MIN_DELTA,
699 .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
704 static irqreturn_t um_timer(int irq, void *dev)
706 if (get_current()->mm != NULL)
708 /* userspace - relay signal, results in correct userspace timers */
709 os_alarm_process(get_current()->mm->context.id.u.pid);
712 (*timer_clockevent.event_handler)(&timer_clockevent);
717 static u64 timer_read(struct clocksource *cs)
719 if (time_travel_mode != TT_MODE_OFF) {
721 * We make reading the timer cost a bit so that we don't get
722 * stuck in loops that expect time to move more than the
723 * exact requested sleep amount, e.g. python's socket server,
724 * see https://bugs.python.org/issue37026.
726 * However, don't do that when we're in interrupt or such as
727 * then we might recurse into our own processing, and get to
728 * even more waiting, and that's not good - it messes up the
729 * "what do I do next" and onstack event we use to know when
730 * to return from time_travel_update_time().
732 if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
733 !time_travel_ext_waiting)
734 time_travel_update_time(time_travel_time +
737 return time_travel_time / TIMER_MULTIPLIER;
740 return os_nsecs() / TIMER_MULTIPLIER;
743 static struct clocksource timer_clocksource = {
747 .mask = CLOCKSOURCE_MASK(64),
748 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
751 static void __init um_timer_setup(void)
755 err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
757 printk(KERN_ERR "register_timer : request_irq failed - "
758 "errno = %d\n", -err);
760 err = os_timer_create();
762 printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
766 err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
768 printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
771 clockevents_register_device(&timer_clockevent);
774 void read_persistent_clock64(struct timespec64 *ts)
778 time_travel_set_start();
780 if (time_travel_mode != TT_MODE_OFF)
781 nsecs = time_travel_start + time_travel_time;
783 nsecs = os_persistent_clock_emulation();
785 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
786 nsecs % NSEC_PER_SEC);
789 void __init time_init(void)
791 timer_set_signal_handler();
792 late_time_init = um_timer_setup;
795 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
796 unsigned long calibrate_delay_is_known(void)
798 if (time_travel_mode == TT_MODE_INFCPU ||
799 time_travel_mode == TT_MODE_EXTERNAL)
804 int setup_time_travel(char *str)
806 if (strcmp(str, "=inf-cpu") == 0) {
807 time_travel_mode = TT_MODE_INFCPU;
808 timer_clockevent.name = "time-travel-timer-infcpu";
809 timer_clocksource.name = "time-travel-clock";
813 if (strncmp(str, "=ext:", 5) == 0) {
814 time_travel_mode = TT_MODE_EXTERNAL;
815 timer_clockevent.name = "time-travel-timer-external";
816 timer_clocksource.name = "time-travel-clock-external";
817 return time_travel_connect_external(str + 5);
821 time_travel_mode = TT_MODE_BASIC;
822 timer_clockevent.name = "time-travel-timer";
823 timer_clocksource.name = "time-travel-clock";
830 __setup("time-travel", setup_time_travel);
831 __uml_help(setup_time_travel,
833 "This option just enables basic time travel mode, in which the clock/timers\n"
834 "inside the UML instance skip forward when there's nothing to do, rather than\n"
835 "waiting for real time to elapse. However, instance CPU speed is limited by\n"
836 "the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
837 "clock (but quicker when there's nothing to do).\n"
839 "time-travel=inf-cpu\n"
840 "This enables time travel mode with infinite processing power, in which there\n"
841 "are no wall clock timers, and any CPU processing happens - as seen from the\n"
842 "guest - instantly. This can be useful for accurate simulation regardless of\n"
843 "debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
844 "easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
846 "time-travel=ext:[ID:]/path/to/socket\n"
847 "This enables time travel mode similar to =inf-cpu, except the system will\n"
848 "use the given socket to coordinate with a central scheduler, in order to\n"
849 "have more than one system simultaneously be on simulated time. The virtio\n"
850 "driver code in UML knows about this so you can also simulate networks and\n"
851 "devices using it, assuming the device has the right capabilities.\n"
852 "The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
854 int setup_time_travel_start(char *str)
858 err = kstrtoull(str, 0, &time_travel_start);
862 time_travel_start_set = 1;
866 __setup("time-travel-start", setup_time_travel_start);
867 __uml_help(setup_time_travel_start,
868 "time-travel-start=<seconds>\n"
869 "Configure the UML instance's wall clock to start at this value rather than\n"
870 "the host's wall clock at the time of UML boot.\n");