Merge tag 'hyperv-next-signed-20210426' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <[email protected]>

Mon, 26 Apr 2021 17:44:16 +0000 (10:44 -0700)

committer Linus Torvalds <[email protected]>

Mon, 26 Apr 2021 17:44:16 +0000 (10:44 -0700)
author Linus Torvalds <[email protected]>
Mon, 26 Apr 2021 17:44:16 +0000 (10:44 -0700)
committer Linus Torvalds <[email protected]>
Mon, 26 Apr 2021 17:44:16 +0000 (10:44 -0700)
diff --combined arch/x86/hyperv/hv_init.c

index e7b94f636cc19a0a0ee433a6c2b1773015d23f70,f7dbfa51a38a90d7f4aed5507079e57a8bb4fc48..bb0ae4b5c00f1bf6654142ddbb69f870610a240c
--- 1/arch/x86/hyperv/hv_init.c
--- 2/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@@ -54,28 -54,6 +54,6 @@@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_ar
   u32 hv_max_vp_index;
   EXPORT_SYMBOL_GPL(hv_max_vp_index);
   
- void *hv_alloc_hyperv_page(void)
- {
-       BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
- 
-       return (void *)__get_free_page(GFP_KERNEL);
- }
- EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
- 
- void *hv_alloc_hyperv_zeroed_page(void)
- {
-         BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
- 
-         return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- }
- EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
- 
- void hv_free_hyperv_page(unsigned long addr)
- {
-       free_page(addr);
- }
- EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
- 
   static int hv_cpu_init(unsigned int cpu)
   {
         u64 msr_vp_index;
@@@ -97,7 -75,7 +75,7 @@@
                 *output_arg = page_address(pg + 1);
         }
   
-       hv_get_vp_index(msr_vp_index);
+       msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
   
         hv_vp_index[smp_processor_id()] = msr_vp_index;
   
@@@ -162,7 -140,7 +140,7 @@@ EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emula
   static inline bool hv_reenlightenment_available(void)
   {
         /*
- -       * Check for required features and priviliges to make TSC frequency
+ +       * Check for required features and privileges to make TSC frequency
          * change notifications work.
          */
         return ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
@@@ -292,7 -270,7 +270,7 @@@ static int hv_suspend(void
   
         /*
          * Reset the hypercall page as it is going to be invalidated
- -       * accross hibernation. Setting hv_hypercall_pg to NULL ensures
+ +       * across hibernation. Setting hv_hypercall_pg to NULL ensures
          * that any subsequent hypercall operation fails safely instead of
          * crashing due to an access of an invalid page. The hypercall page
          * pointer is restored on resume.
@@@ -349,7 -327,7 +327,7 @@@ static void __init hv_stimer_setup_perc
          * Ignore any errors in setting up stimer clockevents
          * as we can run with the LAPIC timer as a fallback.
          */
-       (void)hv_stimer_alloc();
+       (void)hv_stimer_alloc(false);
   
         /*
          * Still register the LAPIC timer, because the direct-mode STIMER is
@@@ -369,7 -347,7 +347,7 @@@ static void __init hv_get_partition_id(
         local_irq_save(flags);
         output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
         status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page);
-       if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
+       if (!hv_result_success(status)) {
                 /* No point in proceeding if this failed */
                 pr_err("Failed to get partition ID: %lld\n", status);
                 BUG();
@@@ -520,6 -498,8 +498,8 @@@ void __init hyperv_init(void
                 x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain;
   #endif
   
+       /* Query the VMs extended capability once, so that it can be cached. */
+       hv_query_ext_cap(0);
         return;
   
   remove_cpuhp_state:
@@@ -593,33 -573,6 +573,6 @@@ void hyperv_report_panic(struct pt_reg
   }
   EXPORT_SYMBOL_GPL(hyperv_report_panic);
   
- /**
-  * hyperv_report_panic_msg - report panic message to Hyper-V
-  * @pa: physical address of the panic page containing the message
-  * @size: size of the message in the page
-  */
- void hyperv_report_panic_msg(phys_addr_t pa, size_t size)
- {
-       /*
-        * P3 to contain the physical address of the panic page & P4 to
-        * contain the size of the panic data in that page. Rest of the
-        * registers are no-op when the NOTIFY_MSG flag is set.
-        */
-       wrmsrl(HV_X64_MSR_CRASH_P0, 0);
-       wrmsrl(HV_X64_MSR_CRASH_P1, 0);
-       wrmsrl(HV_X64_MSR_CRASH_P2, 0);
-       wrmsrl(HV_X64_MSR_CRASH_P3, pa);
-       wrmsrl(HV_X64_MSR_CRASH_P4, size);
- 
-       /*
-        * Let Hyper-V know there is crash data available along with
-        * the panic message.
-        */
-       wrmsrl(HV_X64_MSR_CRASH_CTL,
-              (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG));
- }
- EXPORT_SYMBOL_GPL(hyperv_report_panic_msg);
- 
   bool hv_is_hyperv_initialized(void)
   {
         union hv_x64_msr_hypercall_contents hypercall_msr;
@@@ -650,7 -603,7 +603,7 @@@ EXPORT_SYMBOL_GPL(hv_is_hibernation_sup
   
   enum hv_isolation_type hv_get_isolation_type(void)
   {
-       if (!(ms_hyperv.features_b & HV_ISOLATION))
+       if (!(ms_hyperv.priv_high & HV_ISOLATION))
                 return HV_ISOLATION_TYPE_NONE;
         return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
   }
@@@ -661,3 -614,50 +614,50 @@@ bool hv_is_isolation_supported(void
         return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
   }
   EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
+ 
+ /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
+ bool hv_query_ext_cap(u64 cap_query)
+ {
+       /*
+        * The address of the 'hv_extended_cap' variable will be used as an
+        * output parameter to the hypercall below and so it should be
+        * compatible with 'virt_to_phys'. Which means, it's address should be
+        * directly mapped. Use 'static' to keep it compatible; stack variables
+        * can be virtually mapped, making them imcompatible with
+        * 'virt_to_phys'.
+        * Hypercall input/output addresses should also be 8-byte aligned.
+        */
+       static u64 hv_extended_cap __aligned(8);
+       static bool hv_extended_cap_queried;
+       u64 status;
+ 
+       /*
+        * Querying extended capabilities is an extended hypercall. Check if the
+        * partition supports extended hypercall, first.
+        */
+       if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
+               return false;
+ 
+       /* Extended capabilities do not change at runtime. */
+       if (hv_extended_cap_queried)
+               return hv_extended_cap & cap_query;
+ 
+       status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
+                                &hv_extended_cap);
+ 
+       /*
+        * The query extended capabilities hypercall should not fail under
+        * any normal circumstances. Avoid repeatedly making the hypercall, on
+        * error.
+        */
+       hv_extended_cap_queried = true;
+       status &= HV_HYPERCALL_RESULT_MASK;
+       if (status != HV_STATUS_SUCCESS) {
+               pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
+                      status);
+               return false;
+       }
+ 
+       return hv_extended_cap & cap_query;
+ }
+ EXPORT_SYMBOL_GPL(hv_query_ext_cap);
diff --combined arch/x86/kernel/cpu/mshyperv.c

index 415bc05d3dc7f85960eb5bbb997b52ef0251ee5d,3546d3e21787a954cd41800a5cf3fb5ffcbe6f39..22f13343b5da808f5045195a93522130dc8165d0
--- 1/arch/x86/kernel/cpu/mshyperv.c
--- 2/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@@ -60,23 -60,18 +60,18 @@@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_ca
         set_irq_regs(old_regs);
   }
   
- int hv_setup_vmbus_irq(int irq, void (*handler)(void))
+ void hv_setup_vmbus_handler(void (*handler)(void))
   {
-       /*
-        * The 'irq' argument is ignored on x86/x64 because a hard-coded
-        * interrupt vector is used for Hyper-V interrupts.
-        */
         vmbus_handler = handler;
-       return 0;
   }
+ EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
   
- void hv_remove_vmbus_irq(void)
+ void hv_remove_vmbus_handler(void)
   {
         /* We have no way to deallocate the interrupt gate */
         vmbus_handler = NULL;
   }
- EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
- EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+ EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
   
   /*
    * Routines to do per-architecture handling of stimer0
@@@ -95,21 -90,17 +90,17 @@@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_st
         set_irq_regs(old_regs);
   }
   
- int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
+ /* For x86/x64, override weak placeholders in hyperv_timer.c */
+ void hv_setup_stimer0_handler(void (*handler)(void))
   {
-       *vector = HYPERV_STIMER0_VECTOR;
-       *irq = -1;   /* Unused on x86/x64 */
         hv_stimer0_handler = handler;
-       return 0;
   }
- EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq);
   
- void hv_remove_stimer0_irq(int irq)
+ void hv_remove_stimer0_handler(void)
   {
         /* We have no way to deallocate the interrupt gate */
         hv_stimer0_handler = NULL;
   }
- EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq);
   
   void hv_setup_kexec_handler(void (*handler)(void))
   {
@@@ -197,7 -188,7 +188,7 @@@ static unsigned char hv_get_nmi_reason(
   #ifdef CONFIG_X86_LOCAL_APIC
   /*
    * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
- - * it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle
+ + * it difficult to process CHANNELMSG_UNLOAD in case of crash. Handle
    * unknown NMI on the first CPU which gets it.
    */
   static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
@@@ -274,12 -265,13 +265,13 @@@ static void __init ms_hyperv_init_platf
          * Extract the features and hints
          */
         ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
-       ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES);
+       ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
         ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
         ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
   
-       pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n",
-               ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features);
+       pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
+               ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
+               ms_hyperv.misc_features);
   
         ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
         ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
@@@ -325,7 -317,7 +317,7 @@@
                 x86_platform.calibrate_cpu = hv_get_tsc_khz;
         }
   
-       if (ms_hyperv.features_b & HV_ISOLATION) {
+       if (ms_hyperv.priv_high & HV_ISOLATION) {
                 ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
                 ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
   
@@@ -428,7 -420,7 +420,7 @@@
   
         /*
          * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
- -       * set x2apic destination mode to physcial mode when x2apic is available
+ +       * set x2apic destination mode to physical mode when x2apic is available
          * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
          * have 8-bit APIC id.
          */
diff --combined drivers/clocksource/hyperv_timer.c

index a02b0a224807e870cd31c5f7436b5e774d1eafea,ce94f78eb85135a935ede06591e56a9f9bf3bf4b..977fd05ac35f62e1db380a8f289f475b52109355
--- 1/drivers/clocksource/hyperv_timer.c
--- 2/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@@ -18,6 -18,9 +18,9 @@@
   #include <linux/sched_clock.h>
   #include <linux/mm.h>
   #include <linux/cpuhotplug.h>
+ #include <linux/interrupt.h>
+ #include <linux/irq.h>
+ #include <linux/acpi.h>
   #include <clocksource/hyperv_timer.h>
   #include <asm/hyperv-tlfs.h>
   #include <asm/mshyperv.h>
@@@ -43,14 -46,13 +46,13 @@@ static u64 hv_sched_clock_offset __ro_a
    */
   static bool direct_mode_enabled;
   
- static int stimer0_irq;
- static int stimer0_vector;
+ static int stimer0_irq = -1;
   static int stimer0_message_sint;
+ static DEFINE_PER_CPU(long, stimer0_evt);
   
   /*
-  * ISR for when stimer0 is operating in Direct Mode.  Direct Mode
-  * does not use VMbus or any VMbus messages, so process here and not
-  * in the VMbus driver code.
+  * Common code for stimer0 interrupts coming via Direct Mode or
+  * as a VMbus message.
    */
   void hv_stimer0_isr(void)
   {
@@@ -61,6 -63,16 +63,16 @@@
   }
   EXPORT_SYMBOL_GPL(hv_stimer0_isr);
   
+ /*
+  * stimer0 interrupt handler for architectures that support
+  * per-cpu interrupts, which also implies Direct Mode.
+  */
+ static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id)
+ {
+       hv_stimer0_isr();
+       return IRQ_HANDLED;
+ }
+ 
   static int hv_ce_set_next_event(unsigned long delta,
                                 struct clock_event_device *evt)
   {
@@@ -68,16 -80,16 +80,16 @@@
   
         current_tick = hv_read_reference_counter();
         current_tick += delta;
-       hv_init_timer(0, current_tick);
+       hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick);
         return 0;
   }
   
   static int hv_ce_shutdown(struct clock_event_device *evt)
   {
-       hv_init_timer(0, 0);
-       hv_init_timer_config(0, 0);
-       if (direct_mode_enabled)
-               hv_disable_stimer0_percpu_irq(stimer0_irq);
+       hv_set_register(HV_REGISTER_STIMER0_COUNT, 0);
+       hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0);
+       if (direct_mode_enabled && stimer0_irq >= 0)
+               disable_percpu_irq(stimer0_irq);
   
         return 0;
   }
@@@ -95,8 -107,9 +107,9 @@@ static int hv_ce_set_oneshot(struct clo
                  * on the specified hardware vector/IRQ.
                  */
                 timer_cfg.direct_mode = 1;
-               timer_cfg.apic_vector = stimer0_vector;
-               hv_enable_stimer0_percpu_irq(stimer0_irq);
+               timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR;
+               if (stimer0_irq >= 0)
+                       enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE);
         } else {
                 /*
                  * When it expires, the timer will generate a VMbus message,
@@@ -105,7 -118,7 +118,7 @@@
                 timer_cfg.direct_mode = 0;
                 timer_cfg.sintx = stimer0_message_sint;
         }
-       hv_init_timer_config(0, timer_cfg.as_uint64);
+       hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64);
         return 0;
   }
   
@@@ -169,10 -182,58 +182,58 @@@ int hv_stimer_cleanup(unsigned int cpu
   }
   EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
   
+ /*
+  * These placeholders are overridden by arch specific code on
+  * architectures that need special setup of the stimer0 IRQ because
+  * they don't support per-cpu IRQs (such as x86/x64).
+  */
+ void __weak hv_setup_stimer0_handler(void (*handler)(void))
+ {
+ };
+ 
+ void __weak hv_remove_stimer0_handler(void)
+ {
+ };
+ 
+ /* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */
+ static int hv_setup_stimer0_irq(void)
+ {
+       int ret;
+ 
+       ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR,
+                       ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH);
+       if (ret < 0) {
+               pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret);
+               return ret;
+       }
+       stimer0_irq = ret;
+ 
+       ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr,
+               "Hyper-V stimer0", &stimer0_evt);
+       if (ret) {
+               pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d",
+                       stimer0_irq, ret);
+               acpi_unregister_gsi(stimer0_irq);
+               stimer0_irq = -1;
+       }
+       return ret;
+ }
+ 
+ static void hv_remove_stimer0_irq(void)
+ {
+       if (stimer0_irq == -1) {
+               hv_remove_stimer0_handler();
+       } else {
+               free_percpu_irq(stimer0_irq, &stimer0_evt);
+               acpi_unregister_gsi(stimer0_irq);
+               stimer0_irq = -1;
+       }
+ }
+ 
   /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
- int hv_stimer_alloc(void)
+ int hv_stimer_alloc(bool have_percpu_irqs)
   {
-       int ret = 0;
+       int ret;
   
         /*
          * Synthetic timers are always available except on old versions of
@@@ -188,29 -249,37 +249,37 @@@
   
         direct_mode_enabled = ms_hyperv.misc_features &
                         HV_STIMER_DIRECT_MODE_AVAILABLE;
-       if (direct_mode_enabled) {
-               ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
-                               hv_stimer0_isr);
+ 
+       /*
+        * If Direct Mode isn't enabled, the remainder of the initialization
+        * is done later by hv_stimer_legacy_init()
+        */
+       if (!direct_mode_enabled)
+               return 0;
+ 
+       if (have_percpu_irqs) {
+               ret = hv_setup_stimer0_irq();
                 if (ret)
-                       goto free_percpu;
+                       goto free_clock_event;
+       } else {
+               hv_setup_stimer0_handler(hv_stimer0_isr);
+       }
   
-               /*
-                * Since we are in Direct Mode, stimer initialization
-                * can be done now with a CPUHP value in the same range
-                * as other clockevent devices.
-                */
-               ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
-                               "clockevents/hyperv/stimer:starting",
-                               hv_stimer_init, hv_stimer_cleanup);
-               if (ret < 0)
-                       goto free_stimer0_irq;
+       /*
+        * Since we are in Direct Mode, stimer initialization
+        * can be done now with a CPUHP value in the same range
+        * as other clockevent devices.
+        */
+       ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
+                       "clockevents/hyperv/stimer:starting",
+                       hv_stimer_init, hv_stimer_cleanup);
+       if (ret < 0) {
+               hv_remove_stimer0_irq();
+               goto free_clock_event;
         }
         return ret;
   
- free_stimer0_irq:
-       hv_remove_stimer0_irq(stimer0_irq);
-       stimer0_irq = 0;
- free_percpu:
+ free_clock_event:
         free_percpu(hv_clock_event);
         hv_clock_event = NULL;
         return ret;
@@@ -254,23 -323,6 +323,6 @@@ void hv_stimer_legacy_cleanup(unsigned 
   }
   EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
   
- 
- /* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
- void hv_stimer_free(void)
- {
-       if (!hv_clock_event)
-               return;
- 
-       if (direct_mode_enabled) {
-               cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
-               hv_remove_stimer0_irq(stimer0_irq);
-               stimer0_irq = 0;
-       }
-       free_percpu(hv_clock_event);
-       hv_clock_event = NULL;
- }
- EXPORT_SYMBOL_GPL(hv_stimer_free);
- 
   /*
    * Do a global cleanup of clockevents for the cases of kexec and
    * vmbus exit
@@@ -287,12 -339,17 +339,17 @@@ void hv_stimer_global_cleanup(void
                 hv_stimer_legacy_cleanup(cpu);
         }
   
-       /*
-        * If Direct Mode is enabled, the cpuhp teardown callback
-        * (hv_stimer_cleanup) will be run on all CPUs to stop the
-        * stimers.
-        */
-       hv_stimer_free();
+       if (!hv_clock_event)
+               return;
+ 
+       if (direct_mode_enabled) {
+               cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
+               hv_remove_stimer0_irq();
+               stimer0_irq = -1;
+       }
+       free_percpu(hv_clock_event);
+       hv_clock_event = NULL;
+ 
   }
   EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
   
@@@ -302,14 -359,6 +359,6 @@@
    * the other that uses the TSC reference page feature as defined in the
    * TLFS.  The MSR version is for compatibility with old versions of
    * Hyper-V and 32-bit x86.  The TSC reference page version is preferred.
-  *
-  * The Hyper-V clocksource ratings of 250 are chosen to be below the
-  * TSC clocksource rating of 300.  In configurations where Hyper-V offers
-  * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource
-  * is available and preferred.  With the higher rating, it will be the
-  * default.  On older hardware and Hyper-V versions, the TSC is marked
-  * "unstable", so no TSC clocksource is created and the selected Hyper-V
-  * clocksource will be the default.
    */
   
   u64 (*hv_read_reference_counter)(void);
@@@ -331,7 -380,7 +380,7 @@@ static u64 notrace read_hv_clock_tsc(vo
         u64 current_tick = hv_read_tsc_page(hv_get_tsc_page());
   
         if (current_tick == U64_MAX)
-               hv_get_time_ref_count(current_tick);
+               current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT);
   
         return current_tick;
   }
@@@ -352,9 -401,9 +401,9 @@@ static void suspend_hv_clock_tsc(struc
         u64 tsc_msr;
   
         /* Disable the TSC page */
-       hv_get_reference_tsc(tsc_msr);
+       tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);
         tsc_msr &= ~BIT_ULL(0);
-       hv_set_reference_tsc(tsc_msr);
+       hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
   }
   
   
@@@ -364,39 -413,44 +413,44 @@@ static void resume_hv_clock_tsc(struct 
         u64 tsc_msr;
   
         /* Re-enable the TSC page */
-       hv_get_reference_tsc(tsc_msr);
+       tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);
         tsc_msr &= GENMASK_ULL(11, 0);
         tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
-       hv_set_reference_tsc(tsc_msr);
+       hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
   }
   
+ #ifdef VDSO_CLOCKMODE_HVCLOCK
   static int hv_cs_enable(struct clocksource *cs)
   {
-       hv_enable_vdso_clocksource();
+       vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
         return 0;
   }
+ #endif
   
   static struct clocksource hyperv_cs_tsc = {
         .name   = "hyperv_clocksource_tsc_page",
-       .rating = 250,
+       .rating = 500,
         .read   = read_hv_clock_tsc_cs,
         .mask   = CLOCKSOURCE_MASK(64),
         .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
         .suspend= suspend_hv_clock_tsc,
         .resume = resume_hv_clock_tsc,
+ #ifdef VDSO_CLOCKMODE_HVCLOCK
         .enable = hv_cs_enable,
+       .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK,
+ #else
+       .vdso_clock_mode = VDSO_CLOCKMODE_NONE,
+ #endif
   };
   
   static u64 notrace read_hv_clock_msr(void)
   {
-       u64 current_tick;
         /*
          * Read the partition counter to get the current tick count. This count
          * is set to 0 when the partition is created and is incremented in
          * 100 nanosecond units.
          */
-       hv_get_time_ref_count(current_tick);
-       return current_tick;
+       return hv_get_register(HV_REGISTER_TIME_REF_COUNT);
   }
   
   static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg)
@@@ -412,12 -466,36 +466,36 @@@ static u64 notrace read_hv_sched_clock_
   
   static struct clocksource hyperv_cs_msr = {
         .name   = "hyperv_clocksource_msr",
-       .rating = 250,
+       .rating = 500,
         .read   = read_hv_clock_msr_cs,
         .mask   = CLOCKSOURCE_MASK(64),
         .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
   };
   
- -      pv_ops.time.sched_clock = sched_clock;
+ /*
+  * Reference to pv_ops must be inline so objtool
+  * detection of noinstr violations can work correctly.
+  */
+ #ifdef CONFIG_GENERIC_SCHED_CLOCK
+ static __always_inline void hv_setup_sched_clock(void *sched_clock)
+ {
+       /*
+        * We're on an architecture with generic sched clock (not x86/x64).
+        * The Hyper-V sched clock read function returns nanoseconds, not
+        * the normal 100ns units of the Hyper-V synthetic clock.
+        */
+       sched_clock_register(sched_clock, 64, NSEC_PER_SEC);
+ }
+ #elif defined CONFIG_PARAVIRT
+ static __always_inline void hv_setup_sched_clock(void *sched_clock)
+ {
+       /* We're on x86/x64 *and* using PV ops */
++      paravirt_set_sched_clock(sched_clock);
+ }
+ #else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */
+ static __always_inline void hv_setup_sched_clock(void *sched_clock) {}
+ #endif /* CONFIG_GENERIC_SCHED_CLOCK */
+ 
   static bool __init hv_init_tsc_clocksource(void)
   {
         u64             tsc_msr;
@@@ -429,6 -507,22 +507,22 @@@
         if (hv_root_partition)
                 return false;
   
+       /*
+        * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly
+        * handles frequency and offset changes due to live migration,
+        * pause/resume, and other VM management operations.  So lower the
+        * Hyper-V Reference TSC rating, causing the generic TSC to be used.
+        * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference
+        * TSC will be preferred over the virtualized ARM64 arch counter.
+        * While the Hyper-V MSR clocksource won't be used since the
+        * Reference TSC clocksource is present, change its rating as
+        * well for consistency.
+        */
+       if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
+               hyperv_cs_tsc.rating = 250;
+               hyperv_cs_msr.rating = 250;
+       }
+ 
         hv_read_reference_counter = read_hv_clock_tsc;
         phys_addr = virt_to_phys(hv_get_tsc_page());
   
@@@ -439,12 -533,11 +533,11 @@@
          * (which already has at least the low 12 bits set to zero since
          * it is page aligned). Also set the "enable" bit, which is bit 0.
          */
-       hv_get_reference_tsc(tsc_msr);
+       tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);
         tsc_msr &= GENMASK_ULL(11, 0);
         tsc_msr = tsc_msr | 0x1 | (u64)phys_addr;
-       hv_set_reference_tsc(tsc_msr);
+       hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
   
-       hv_set_clocksource_vdso(hyperv_cs_tsc);
         clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
   
         hv_sched_clock_offset = hv_read_reference_counter();
@@@ -457,7 -550,7 +550,7 @@@ void __init hv_init_clocksource(void
   {
         /*
          * Try to set up the TSC page clocksource. If it succeeds, we're
- -       * done. Otherwise, set up the MSR clocksoruce.  At least one of
+ +       * done. Otherwise, set up the MSR clocksource.  At least one of
          * these will always be available except on very old versions of
          * Hyper-V on x86.  In that case we won't have a Hyper-V
          * clocksource, but Linux will still run with a clocksource based
diff --combined drivers/pci/controller/pci-hyperv.c

index a313708bcf754702897f588e32772468801a732a,aa278005dea2b345dadd45723df4c2a3d878c120..1ff4ce24f4b300e05c0276f64456cfa6f99caeb0
--- 1/drivers/pci/controller/pci-hyperv.c
--- 2/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@@ -1292,7 -1292,7 +1292,7 @@@ exit_unlock
          * resumes, hv_pci_restore_msi_state() is able to correctly restore
          * the interrupt with the correct affinity.
          */
-       if (res && hbus->state != hv_pcibus_removing)
+       if (!hv_result_success(res) && hbus->state != hv_pcibus_removing)
                 dev_err(&hbus->hdev->device,
                         "%s() failed: %#llx", __func__, res);
   
@@@ -1458,7 -1458,7 +1458,7 @@@ static void hv_compose_msi_msg(struct i
          * Prevents hv_pci_onchannelcallback() from running concurrently
          * in the tasklet.
          */
- -      tasklet_disable(&channel->callback_event);
+ +      tasklet_disable_in_atomic(&channel->callback_event);
   
         /*
          * Since this function is called with IRQ locks held, can't
diff --combined drivers/video/fbdev/hyperv_fb.c

index 4dc9077dd2ac04d07401e210b13fcb052b0f6ab1,68adbf8d517ad90868998817ec93ff7bebfb68e4..a7e6eea2c4a1d09ca364f84e6c2961967f514047
--- 1/drivers/video/fbdev/hyperv_fb.c
--- 2/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@@ -308,7 -308,7 +308,7 @@@ static inline int synthvid_send(struct 
                                VM_PKT_DATA_INBAND, 0);
   
         if (ret)
-               pr_err("Unable to send packet via vmbus\n");
+               pr_err_ratelimited("Unable to send packet via vmbus; error %d\n", ret);
   
         return ret;
   }
@@@ -1031,6 -1031,7 +1031,6 @@@ static int hvfb_getmem(struct hv_devic
                         PCI_DEVICE_ID_HYPERV_VIDEO, NULL);
                 if (!pdev) {
                         pr_err("Unable to find PCI Hyper-V video\n");
- -                      kfree(info->apertures);
                         return -ENODEV;
                 }
   
@@@ -1128,6 -1129,7 +1128,6 @@@ getmem_done
         } else {
                 pci_dev_put(pdev);
         }
- -      kfree(info->apertures);
   
         return 0;
   
@@@ -1139,6 -1141,7 +1139,6 @@@ err2
   err1:
         if (!gen2vm)
                 pci_dev_put(pdev);
- -      kfree(info->apertures);
   
         return -ENOMEM;
   }
author	Linus Torvalds <[email protected]>
	Mon, 26 Apr 2021 17:44:16 +0000 (10:44 -0700)
committer	Linus Torvalds <[email protected]>
	Mon, 26 Apr 2021 17:44:16 +0000 (10:44 -0700)
		1	2
arch/x86/hyperv/hv_init.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/mshyperv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/clocksource/hyperv_timer.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/pci/controller/pci-hyperv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/fbdev/hyperv_fb.c	patch \|	diff1 \|	diff2 \|	blob \| history