1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2010, Microsoft Corporation.
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/sysctl.h>
16 #include <linux/reboot.h>
17 #include <linux/hyperv.h>
18 #include <linux/clockchips.h>
19 #include <linux/ptp_clock_kernel.h>
20 #include <asm/mshyperv.h>
22 #include "hyperv_vmbus.h"
28 #define SD_VERSION_3_1 (SD_MAJOR << 16 | SD_MINOR_1)
29 #define SD_VERSION_3_2 (SD_MAJOR << 16 | SD_MINOR_2)
30 #define SD_VERSION (SD_MAJOR << 16 | SD_MINOR)
33 #define SD_VERSION_1 (SD_MAJOR_1 << 16 | SD_MINOR)
37 #define TS_VERSION (TS_MAJOR << 16 | TS_MINOR)
40 #define TS_VERSION_1 (TS_MAJOR_1 << 16 | TS_MINOR)
43 #define TS_VERSION_3 (TS_MAJOR_3 << 16 | TS_MINOR)
47 #define HB_VERSION (HB_MAJOR << 16 | HB_MINOR)
50 #define HB_VERSION_1 (HB_MAJOR_1 << 16 | HB_MINOR)
52 static int sd_srv_version;
53 static int ts_srv_version;
54 static int hb_srv_version;
56 #define SD_VER_COUNT 4
57 static const int sd_versions[] = {
64 #define TS_VER_COUNT 3
65 static const int ts_versions[] = {
71 #define HB_VER_COUNT 2
72 static const int hb_versions[] = {
77 #define FW_VER_COUNT 2
78 static const int fw_versions[] = {
84 * Send the "hibernate" udev event in a thread context.
86 struct hibernate_work_context {
87 struct work_struct work;
88 struct hv_device *dev;
91 static struct hibernate_work_context hibernate_context;
92 static bool hibernation_supported;
94 static void send_hibernate_uevent(struct work_struct *work)
96 char *uevent_env[2] = { "EVENT=hibernate", NULL };
97 struct hibernate_work_context *ctx;
99 ctx = container_of(work, struct hibernate_work_context, work);
101 kobject_uevent_env(&ctx->dev->device.kobj, KOBJ_CHANGE, uevent_env);
103 pr_info("Sent hibernation uevent\n");
106 static int hv_shutdown_init(struct hv_util_service *srv)
108 struct vmbus_channel *channel = srv->channel;
110 INIT_WORK(&hibernate_context.work, send_hibernate_uevent);
111 hibernate_context.dev = channel->device_obj;
113 hibernation_supported = hv_is_hibernation_supported();
118 static void shutdown_onchannelcallback(void *context);
119 static struct hv_util_service util_shutdown = {
120 .util_cb = shutdown_onchannelcallback,
121 .util_init = hv_shutdown_init,
124 static int hv_timesync_init(struct hv_util_service *srv);
125 static int hv_timesync_pre_suspend(void);
126 static void hv_timesync_deinit(void);
128 static void timesync_onchannelcallback(void *context);
129 static struct hv_util_service util_timesynch = {
130 .util_cb = timesync_onchannelcallback,
131 .util_init = hv_timesync_init,
132 .util_pre_suspend = hv_timesync_pre_suspend,
133 .util_deinit = hv_timesync_deinit,
136 static void heartbeat_onchannelcallback(void *context);
137 static struct hv_util_service util_heartbeat = {
138 .util_cb = heartbeat_onchannelcallback,
141 static struct hv_util_service util_kvp = {
142 .util_cb = hv_kvp_onchannelcallback,
143 .util_init = hv_kvp_init,
144 .util_init_transport = hv_kvp_init_transport,
145 .util_pre_suspend = hv_kvp_pre_suspend,
146 .util_pre_resume = hv_kvp_pre_resume,
147 .util_deinit = hv_kvp_deinit,
150 static struct hv_util_service util_vss = {
151 .util_cb = hv_vss_onchannelcallback,
152 .util_init = hv_vss_init,
153 .util_init_transport = hv_vss_init_transport,
154 .util_pre_suspend = hv_vss_pre_suspend,
155 .util_pre_resume = hv_vss_pre_resume,
156 .util_deinit = hv_vss_deinit,
159 static void perform_shutdown(struct work_struct *dummy)
161 orderly_poweroff(true);
164 static void perform_restart(struct work_struct *dummy)
170 * Perform the shutdown operation in a thread context.
172 static DECLARE_WORK(shutdown_work, perform_shutdown);
175 * Perform the restart operation in a thread context.
177 static DECLARE_WORK(restart_work, perform_restart);
179 static void shutdown_onchannelcallback(void *context)
181 struct vmbus_channel *channel = context;
182 struct work_struct *work = NULL;
185 u8 *shut_txf_buf = util_shutdown.recv_buffer;
187 struct shutdown_msg_data *shutdown_msg;
189 struct icmsg_hdr *icmsghdrp;
191 if (vmbus_recvpacket(channel, shut_txf_buf, HV_HYP_PAGE_SIZE, &recvlen, &requestid)) {
192 pr_err_ratelimited("Shutdown request received. Could not read into shut txf buf\n");
199 /* Ensure recvlen is big enough to read header data */
200 if (recvlen < ICMSG_HDR) {
201 pr_err_ratelimited("Shutdown request received. Packet length too small: %d\n",
206 icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[sizeof(struct vmbuspipe_hdr)];
208 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
209 if (vmbus_prep_negotiate_resp(icmsghdrp,
210 shut_txf_buf, recvlen,
211 fw_versions, FW_VER_COUNT,
212 sd_versions, SD_VER_COUNT,
213 NULL, &sd_srv_version)) {
214 pr_info("Shutdown IC version %d.%d\n",
215 sd_srv_version >> 16,
216 sd_srv_version & 0xFFFF);
218 } else if (icmsghdrp->icmsgtype == ICMSGTYPE_SHUTDOWN) {
219 /* Ensure recvlen is big enough to contain shutdown_msg_data struct */
220 if (recvlen < ICMSG_HDR + sizeof(struct shutdown_msg_data)) {
221 pr_err_ratelimited("Invalid shutdown msg data. Packet length too small: %u\n",
226 shutdown_msg = (struct shutdown_msg_data *)&shut_txf_buf[ICMSG_HDR];
229 * shutdown_msg->flags can be 0(shut down), 2(reboot),
230 * or 4(hibernate). It may bitwise-OR 1, which means
231 * performing the request by force. Linux always tries
232 * to perform the request by force.
234 switch (shutdown_msg->flags) {
237 icmsghdrp->status = HV_S_OK;
238 work = &shutdown_work;
239 pr_info("Shutdown request received - graceful shutdown initiated\n");
243 icmsghdrp->status = HV_S_OK;
244 work = &restart_work;
245 pr_info("Restart request received - graceful restart initiated\n");
249 pr_info("Hibernation request received\n");
250 icmsghdrp->status = hibernation_supported ?
252 if (hibernation_supported)
253 work = &hibernate_context.work;
256 icmsghdrp->status = HV_E_FAIL;
257 pr_info("Shutdown request received - Invalid request\n");
261 icmsghdrp->status = HV_E_FAIL;
262 pr_err_ratelimited("Shutdown request received. Invalid msg type: %d\n",
263 icmsghdrp->icmsgtype);
266 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
267 | ICMSGHDRFLAG_RESPONSE;
269 vmbus_sendpacket(channel, shut_txf_buf,
271 VM_PKT_DATA_INBAND, 0);
278 * Set the host time in a process context.
280 static struct work_struct adj_time_work;
283 * The last time sample, received from the host. PTP device responds to
284 * requests by using this data and the current partition-wide time reference
293 static bool timesync_implicit;
295 module_param(timesync_implicit, bool, 0644);
296 MODULE_PARM_DESC(timesync_implicit, "If set treat SAMPLE as SYNC when clock is behind");
298 static inline u64 reftime_to_ns(u64 reftime)
300 return (reftime - WLTIMEDELTA) * 100;
304 * Hard coded threshold for host timesync delay: 600 seconds
306 static const u64 HOST_TIMESYNC_DELAY_THRESH = 600 * (u64)NSEC_PER_SEC;
308 static int hv_get_adj_host_time(struct timespec64 *ts)
310 u64 newtime, reftime, timediff_adj;
314 spin_lock_irqsave(&host_ts.lock, flags);
315 reftime = hv_read_reference_counter();
318 * We need to let the caller know that last update from host
319 * is older than the max allowable threshold. clock_gettime()
320 * and PTP ioctl do not have a documented error that we could
321 * return for this specific case. Use ESTALE to report this.
323 timediff_adj = reftime - host_ts.ref_time;
324 if (timediff_adj * 100 > HOST_TIMESYNC_DELAY_THRESH) {
325 pr_warn_once("TIMESYNC IC: Stale time stamp, %llu nsecs old\n",
326 (timediff_adj * 100));
330 newtime = host_ts.host_time + timediff_adj;
331 *ts = ns_to_timespec64(reftime_to_ns(newtime));
332 spin_unlock_irqrestore(&host_ts.lock, flags);
337 static void hv_set_host_time(struct work_struct *work)
340 struct timespec64 ts;
342 if (!hv_get_adj_host_time(&ts))
343 do_settimeofday64(&ts);
347 * Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume.
348 * Force a sync if the guest is behind.
350 static inline bool hv_implicit_sync(u64 host_time)
352 struct timespec64 new_ts;
353 struct timespec64 threshold_ts;
355 new_ts = ns_to_timespec64(reftime_to_ns(host_time));
356 ktime_get_real_ts64(&threshold_ts);
358 threshold_ts.tv_sec += 5;
361 * If guest behind the host by 5 or more seconds.
363 if (timespec64_compare(&new_ts, &threshold_ts) >= 0)
370 * Synchronize time with host after reboot, restore, etc.
372 * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
373 * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
374 * message after the timesync channel is opened. Since the hv_utils module is
375 * loaded after hv_vmbus, the first message is usually missed. This bit is
376 * considered a hard request to discipline the clock.
378 * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
379 * typically used as a hint to the guest. The guest is under no obligation
380 * to discipline the clock.
382 static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags)
388 * Save the adjusted time sample from the host and the snapshot
389 * of the current system time.
391 spin_lock_irqsave(&host_ts.lock, flags);
393 cur_reftime = hv_read_reference_counter();
394 host_ts.host_time = hosttime;
395 host_ts.ref_time = cur_reftime;
398 * TimeSync v4 messages contain reference time (guest's Hyper-V
399 * clocksource read when the time sample was generated), we can
400 * improve the precision by adding the delta between now and the
401 * time of generation. For older protocols we set
402 * reftime == cur_reftime on call.
404 host_ts.host_time += (cur_reftime - reftime);
406 spin_unlock_irqrestore(&host_ts.lock, flags);
408 /* Schedule work to do do_settimeofday64() */
409 if ((adj_flags & ICTIMESYNCFLAG_SYNC) ||
410 (timesync_implicit && hv_implicit_sync(host_ts.host_time)))
411 schedule_work(&adj_time_work);
415 * Time Sync Channel message handler.
417 static void timesync_onchannelcallback(void *context)
419 struct vmbus_channel *channel = context;
422 struct icmsg_hdr *icmsghdrp;
423 struct ictimesync_data *timedatap;
424 struct ictimesync_ref_data *refdata;
425 u8 *time_txf_buf = util_timesynch.recv_buffer;
428 * Drain the ring buffer and use the last packet to update
432 int ret = vmbus_recvpacket(channel, time_txf_buf,
433 HV_HYP_PAGE_SIZE, &recvlen,
436 pr_err_ratelimited("TimeSync IC pkt recv failed (Err: %d)\n",
444 /* Ensure recvlen is big enough to read header data */
445 if (recvlen < ICMSG_HDR) {
446 pr_err_ratelimited("Timesync request received. Packet length too small: %d\n",
451 icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[
452 sizeof(struct vmbuspipe_hdr)];
454 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
455 if (vmbus_prep_negotiate_resp(icmsghdrp,
456 time_txf_buf, recvlen,
457 fw_versions, FW_VER_COUNT,
458 ts_versions, TS_VER_COUNT,
459 NULL, &ts_srv_version)) {
460 pr_info("TimeSync IC version %d.%d\n",
461 ts_srv_version >> 16,
462 ts_srv_version & 0xFFFF);
464 } else if (icmsghdrp->icmsgtype == ICMSGTYPE_TIMESYNC) {
465 if (ts_srv_version > TS_VERSION_3) {
466 /* Ensure recvlen is big enough to read ictimesync_ref_data */
467 if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_ref_data)) {
468 pr_err_ratelimited("Invalid ictimesync ref data. Length too small: %u\n",
472 refdata = (struct ictimesync_ref_data *)&time_txf_buf[ICMSG_HDR];
474 adj_guesttime(refdata->parenttime,
475 refdata->vmreferencetime,
478 /* Ensure recvlen is big enough to read ictimesync_data */
479 if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_data)) {
480 pr_err_ratelimited("Invalid ictimesync data. Length too small: %u\n",
484 timedatap = (struct ictimesync_data *)&time_txf_buf[ICMSG_HDR];
486 adj_guesttime(timedatap->parenttime,
487 hv_read_reference_counter(),
491 icmsghdrp->status = HV_E_FAIL;
492 pr_err_ratelimited("Timesync request received. Invalid msg type: %d\n",
493 icmsghdrp->icmsgtype);
496 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
497 | ICMSGHDRFLAG_RESPONSE;
499 vmbus_sendpacket(channel, time_txf_buf,
501 VM_PKT_DATA_INBAND, 0);
506 * Heartbeat functionality.
507 * Every two seconds, Hyper-V send us a heartbeat request message.
508 * we respond to this message, and Hyper-V knows we are alive.
510 static void heartbeat_onchannelcallback(void *context)
512 struct vmbus_channel *channel = context;
515 struct icmsg_hdr *icmsghdrp;
516 struct heartbeat_msg_data *heartbeat_msg;
517 u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
521 if (vmbus_recvpacket(channel, hbeat_txf_buf, HV_HYP_PAGE_SIZE,
522 &recvlen, &requestid)) {
523 pr_err_ratelimited("Heartbeat request received. Could not read into hbeat txf buf\n");
530 /* Ensure recvlen is big enough to read header data */
531 if (recvlen < ICMSG_HDR) {
532 pr_err_ratelimited("Heartbeat request received. Packet length too small: %d\n",
537 icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[
538 sizeof(struct vmbuspipe_hdr)];
540 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
541 if (vmbus_prep_negotiate_resp(icmsghdrp,
542 hbeat_txf_buf, recvlen,
543 fw_versions, FW_VER_COUNT,
544 hb_versions, HB_VER_COUNT,
545 NULL, &hb_srv_version)) {
547 pr_info("Heartbeat IC version %d.%d\n",
548 hb_srv_version >> 16,
549 hb_srv_version & 0xFFFF);
551 } else if (icmsghdrp->icmsgtype == ICMSGTYPE_HEARTBEAT) {
553 * Ensure recvlen is big enough to read seq_num. Reserved area is not
554 * included in the check as the host may not fill it up entirely
556 if (recvlen < ICMSG_HDR + sizeof(u64)) {
557 pr_err_ratelimited("Invalid heartbeat msg data. Length too small: %u\n",
561 heartbeat_msg = (struct heartbeat_msg_data *)&hbeat_txf_buf[ICMSG_HDR];
563 heartbeat_msg->seq_num += 1;
565 icmsghdrp->status = HV_E_FAIL;
566 pr_err_ratelimited("Heartbeat request received. Invalid msg type: %d\n",
567 icmsghdrp->icmsgtype);
570 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
571 | ICMSGHDRFLAG_RESPONSE;
573 vmbus_sendpacket(channel, hbeat_txf_buf,
575 VM_PKT_DATA_INBAND, 0);
579 #define HV_UTIL_RING_SEND_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
580 #define HV_UTIL_RING_RECV_SIZE VMBUS_RING_SIZE(3 * HV_HYP_PAGE_SIZE)
582 static int util_probe(struct hv_device *dev,
583 const struct hv_vmbus_device_id *dev_id)
585 struct hv_util_service *srv =
586 (struct hv_util_service *)dev_id->driver_data;
589 srv->recv_buffer = kmalloc(HV_HYP_PAGE_SIZE * 4, GFP_KERNEL);
590 if (!srv->recv_buffer)
592 srv->channel = dev->channel;
593 if (srv->util_init) {
594 ret = srv->util_init(srv);
600 * The set of services managed by the util driver are not performance
601 * critical and do not need batched reading. Furthermore, some services
602 * such as KVP can only handle one message from the host at a time.
603 * Turn off batched reading for all util drivers before we open the
606 set_channel_read_mode(dev->channel, HV_CALL_DIRECT);
608 hv_set_drvdata(dev, srv);
610 ret = vmbus_open(dev->channel, HV_UTIL_RING_SEND_SIZE,
611 HV_UTIL_RING_RECV_SIZE, NULL, 0, srv->util_cb,
616 if (srv->util_init_transport) {
617 ret = srv->util_init_transport();
619 vmbus_close(dev->channel);
626 if (srv->util_deinit)
629 kfree(srv->recv_buffer);
633 static void util_remove(struct hv_device *dev)
635 struct hv_util_service *srv = hv_get_drvdata(dev);
637 if (srv->util_deinit)
639 vmbus_close(dev->channel);
640 kfree(srv->recv_buffer);
644 * When we're in util_suspend(), all the userspace processes have been frozen
645 * (refer to hibernate() -> freeze_processes()). The userspace is thawed only
646 * after the whole resume procedure, including util_resume(), finishes.
648 static int util_suspend(struct hv_device *dev)
650 struct hv_util_service *srv = hv_get_drvdata(dev);
653 if (srv->util_pre_suspend) {
654 ret = srv->util_pre_suspend();
659 vmbus_close(dev->channel);
664 static int util_resume(struct hv_device *dev)
666 struct hv_util_service *srv = hv_get_drvdata(dev);
669 if (srv->util_pre_resume) {
670 ret = srv->util_pre_resume();
675 ret = vmbus_open(dev->channel, HV_UTIL_RING_SEND_SIZE,
676 HV_UTIL_RING_RECV_SIZE, NULL, 0, srv->util_cb,
681 static const struct hv_vmbus_device_id id_table[] = {
684 .driver_data = (unsigned long)&util_shutdown
686 /* Time synch guid */
688 .driver_data = (unsigned long)&util_timesynch
691 { HV_HEART_BEAT_GUID,
692 .driver_data = (unsigned long)&util_heartbeat
696 .driver_data = (unsigned long)&util_kvp
700 .driver_data = (unsigned long)&util_vss
705 MODULE_DEVICE_TABLE(vmbus, id_table);
707 /* The one and only one */
708 static struct hv_driver util_drv = {
710 .id_table = id_table,
712 .remove = util_remove,
713 .suspend = util_suspend,
714 .resume = util_resume,
716 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
720 static int hv_ptp_enable(struct ptp_clock_info *info,
721 struct ptp_clock_request *request, int on)
726 static int hv_ptp_settime(struct ptp_clock_info *p, const struct timespec64 *ts)
731 static int hv_ptp_adjfine(struct ptp_clock_info *ptp, long delta)
735 static int hv_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
740 static int hv_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts)
742 return hv_get_adj_host_time(ts);
745 static struct ptp_clock_info ptp_hyperv_info = {
747 .enable = hv_ptp_enable,
748 .adjtime = hv_ptp_adjtime,
749 .adjfine = hv_ptp_adjfine,
750 .gettime64 = hv_ptp_gettime,
751 .settime64 = hv_ptp_settime,
752 .owner = THIS_MODULE,
755 static struct ptp_clock *hv_ptp_clock;
757 static int hv_timesync_init(struct hv_util_service *srv)
759 spin_lock_init(&host_ts.lock);
761 INIT_WORK(&adj_time_work, hv_set_host_time);
764 * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is
765 * disabled but the driver is still useful without the PTP device
766 * as it still handles the ICTIMESYNCFLAG_SYNC case.
768 hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
769 if (IS_ERR_OR_NULL(hv_ptp_clock)) {
770 pr_err("cannot register PTP clock: %d\n",
771 PTR_ERR_OR_ZERO(hv_ptp_clock));
778 static void hv_timesync_cancel_work(void)
780 cancel_work_sync(&adj_time_work);
783 static int hv_timesync_pre_suspend(void)
785 hv_timesync_cancel_work();
789 static void hv_timesync_deinit(void)
792 ptp_clock_unregister(hv_ptp_clock);
794 hv_timesync_cancel_work();
797 static int __init init_hyperv_utils(void)
799 pr_info("Registering HyperV Utility Driver\n");
801 return vmbus_driver_register(&util_drv);
804 static void exit_hyperv_utils(void)
806 pr_info("De-Registered HyperV Utility Driver\n");
808 vmbus_driver_unregister(&util_drv);
811 module_init(init_hyperv_utils);
812 module_exit(exit_hyperv_utils);
814 MODULE_DESCRIPTION("Hyper-V Utilities");
815 MODULE_LICENSE("GPL");