]> Git Repo - linux.git/blob - drivers/hv/hv.c
kconfig: recursive checks drop file/lineno
[linux.git] / drivers / hv / hv.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2009, Microsoft Corporation.
4  *
5  * Authors:
6  *   Haiyang Zhang <[email protected]>
7  *   Hank Janssen  <[email protected]>
8  */
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/io.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/hyperv.h>
17 #include <linux/random.h>
18 #include <linux/clockchips.h>
19 #include <linux/delay.h>
20 #include <linux/interrupt.h>
21 #include <clocksource/hyperv_timer.h>
22 #include <asm/mshyperv.h>
23 #include <linux/set_memory.h>
24 #include "hyperv_vmbus.h"
25
26 /* The one and only */
27 struct hv_context hv_context;
28
29 /*
30  * hv_init - Main initialization routine.
31  *
32  * This routine must be called before any other routines in here are called
33  */
34 int hv_init(void)
35 {
36         hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
37         if (!hv_context.cpu_context)
38                 return -ENOMEM;
39         return 0;
40 }
41
42 /*
43  * hv_post_message - Post a message using the hypervisor message IPC.
44  *
45  * This involves a hypercall.
46  */
47 int hv_post_message(union hv_connection_id connection_id,
48                         enum hv_message_type message_type,
49                         void *payload, size_t payload_size)
50 {
51         struct hv_input_post_message *aligned_msg;
52         unsigned long flags;
53         u64 status;
54
55         if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
56                 return -EMSGSIZE;
57
58         local_irq_save(flags);
59
60         /*
61          * A TDX VM with the paravisor must use the decrypted post_msg_page: see
62          * the comment in struct hv_per_cpu_context. A SNP VM with the paravisor
63          * can use the encrypted hyperv_pcpu_input_arg because it copies the
64          * input into the GHCB page, which has been decrypted by the paravisor.
65          */
66         if (hv_isolation_type_tdx() && ms_hyperv.paravisor_present)
67                 aligned_msg = this_cpu_ptr(hv_context.cpu_context)->post_msg_page;
68         else
69                 aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
70
71         aligned_msg->connectionid = connection_id;
72         aligned_msg->reserved = 0;
73         aligned_msg->message_type = message_type;
74         aligned_msg->payload_size = payload_size;
75         memcpy((void *)aligned_msg->payload, payload, payload_size);
76
77         if (ms_hyperv.paravisor_present) {
78                 if (hv_isolation_type_tdx())
79                         status = hv_tdx_hypercall(HVCALL_POST_MESSAGE,
80                                                   virt_to_phys(aligned_msg), 0);
81                 else if (hv_isolation_type_snp())
82                         status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
83                                                    aligned_msg, NULL,
84                                                    sizeof(*aligned_msg));
85                 else
86                         status = HV_STATUS_INVALID_PARAMETER;
87         } else {
88                 status = hv_do_hypercall(HVCALL_POST_MESSAGE,
89                                          aligned_msg, NULL);
90         }
91
92         local_irq_restore(flags);
93
94         return hv_result(status);
95 }
96
97 int hv_synic_alloc(void)
98 {
99         int cpu, ret = -ENOMEM;
100         struct hv_per_cpu_context *hv_cpu;
101
102         /*
103          * First, zero all per-cpu memory areas so hv_synic_free() can
104          * detect what memory has been allocated and cleanup properly
105          * after any failures.
106          */
107         for_each_present_cpu(cpu) {
108                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
109                 memset(hv_cpu, 0, sizeof(*hv_cpu));
110         }
111
112         hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
113                                          GFP_KERNEL);
114         if (!hv_context.hv_numa_map) {
115                 pr_err("Unable to allocate NUMA map\n");
116                 goto err;
117         }
118
119         for_each_present_cpu(cpu) {
120                 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
121
122                 tasklet_init(&hv_cpu->msg_dpc,
123                              vmbus_on_msg_dpc, (unsigned long)hv_cpu);
124
125                 if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
126                         hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
127                         if (!hv_cpu->post_msg_page) {
128                                 pr_err("Unable to allocate post msg page\n");
129                                 goto err;
130                         }
131
132                         ret = set_memory_decrypted((unsigned long)hv_cpu->post_msg_page, 1);
133                         if (ret) {
134                                 pr_err("Failed to decrypt post msg page: %d\n", ret);
135                                 /* Just leak the page, as it's unsafe to free the page. */
136                                 hv_cpu->post_msg_page = NULL;
137                                 goto err;
138                         }
139
140                         memset(hv_cpu->post_msg_page, 0, PAGE_SIZE);
141                 }
142
143                 /*
144                  * Synic message and event pages are allocated by paravisor.
145                  * Skip these pages allocation here.
146                  */
147                 if (!ms_hyperv.paravisor_present && !hv_root_partition) {
148                         hv_cpu->synic_message_page =
149                                 (void *)get_zeroed_page(GFP_ATOMIC);
150                         if (!hv_cpu->synic_message_page) {
151                                 pr_err("Unable to allocate SYNIC message page\n");
152                                 goto err;
153                         }
154
155                         hv_cpu->synic_event_page =
156                                 (void *)get_zeroed_page(GFP_ATOMIC);
157                         if (!hv_cpu->synic_event_page) {
158                                 pr_err("Unable to allocate SYNIC event page\n");
159
160                                 free_page((unsigned long)hv_cpu->synic_message_page);
161                                 hv_cpu->synic_message_page = NULL;
162                                 goto err;
163                         }
164                 }
165
166                 if (!ms_hyperv.paravisor_present &&
167                     (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
168                         ret = set_memory_decrypted((unsigned long)
169                                 hv_cpu->synic_message_page, 1);
170                         if (ret) {
171                                 pr_err("Failed to decrypt SYNIC msg page: %d\n", ret);
172                                 hv_cpu->synic_message_page = NULL;
173
174                                 /*
175                                  * Free the event page here so that hv_synic_free()
176                                  * won't later try to re-encrypt it.
177                                  */
178                                 free_page((unsigned long)hv_cpu->synic_event_page);
179                                 hv_cpu->synic_event_page = NULL;
180                                 goto err;
181                         }
182
183                         ret = set_memory_decrypted((unsigned long)
184                                 hv_cpu->synic_event_page, 1);
185                         if (ret) {
186                                 pr_err("Failed to decrypt SYNIC event page: %d\n", ret);
187                                 hv_cpu->synic_event_page = NULL;
188                                 goto err;
189                         }
190
191                         memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
192                         memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
193                 }
194         }
195
196         return 0;
197
198 err:
199         /*
200          * Any memory allocations that succeeded will be freed when
201          * the caller cleans up by calling hv_synic_free()
202          */
203         return ret;
204 }
205
206 void hv_synic_free(void)
207 {
208         int cpu, ret;
209
210         for_each_present_cpu(cpu) {
211                 struct hv_per_cpu_context *hv_cpu =
212                         per_cpu_ptr(hv_context.cpu_context, cpu);
213
214                 /* It's better to leak the page if the encryption fails. */
215                 if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
216                         if (hv_cpu->post_msg_page) {
217                                 ret = set_memory_encrypted((unsigned long)
218                                         hv_cpu->post_msg_page, 1);
219                                 if (ret) {
220                                         pr_err("Failed to encrypt post msg page: %d\n", ret);
221                                         hv_cpu->post_msg_page = NULL;
222                                 }
223                         }
224                 }
225
226                 if (!ms_hyperv.paravisor_present &&
227                     (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
228                         if (hv_cpu->synic_message_page) {
229                                 ret = set_memory_encrypted((unsigned long)
230                                         hv_cpu->synic_message_page, 1);
231                                 if (ret) {
232                                         pr_err("Failed to encrypt SYNIC msg page: %d\n", ret);
233                                         hv_cpu->synic_message_page = NULL;
234                                 }
235                         }
236
237                         if (hv_cpu->synic_event_page) {
238                                 ret = set_memory_encrypted((unsigned long)
239                                         hv_cpu->synic_event_page, 1);
240                                 if (ret) {
241                                         pr_err("Failed to encrypt SYNIC event page: %d\n", ret);
242                                         hv_cpu->synic_event_page = NULL;
243                                 }
244                         }
245                 }
246
247                 free_page((unsigned long)hv_cpu->post_msg_page);
248                 free_page((unsigned long)hv_cpu->synic_event_page);
249                 free_page((unsigned long)hv_cpu->synic_message_page);
250         }
251
252         kfree(hv_context.hv_numa_map);
253 }
254
255 /*
256  * hv_synic_init - Initialize the Synthetic Interrupt Controller.
257  *
258  * If it is already initialized by another entity (ie x2v shim), we need to
259  * retrieve the initialized message and event pages.  Otherwise, we create and
260  * initialize the message and event pages.
261  */
262 void hv_synic_enable_regs(unsigned int cpu)
263 {
264         struct hv_per_cpu_context *hv_cpu =
265                 per_cpu_ptr(hv_context.cpu_context, cpu);
266         union hv_synic_simp simp;
267         union hv_synic_siefp siefp;
268         union hv_synic_sint shared_sint;
269         union hv_synic_scontrol sctrl;
270
271         /* Setup the Synic's message page */
272         simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
273         simp.simp_enabled = 1;
274
275         if (ms_hyperv.paravisor_present || hv_root_partition) {
276                 /* Mask out vTOM bit. ioremap_cache() maps decrypted */
277                 u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
278                                 ~ms_hyperv.shared_gpa_boundary;
279                 hv_cpu->synic_message_page =
280                         (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
281                 if (!hv_cpu->synic_message_page)
282                         pr_err("Fail to map synic message page.\n");
283         } else {
284                 simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
285                         >> HV_HYP_PAGE_SHIFT;
286         }
287
288         hv_set_msr(HV_MSR_SIMP, simp.as_uint64);
289
290         /* Setup the Synic's event page */
291         siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
292         siefp.siefp_enabled = 1;
293
294         if (ms_hyperv.paravisor_present || hv_root_partition) {
295                 /* Mask out vTOM bit. ioremap_cache() maps decrypted */
296                 u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
297                                 ~ms_hyperv.shared_gpa_boundary;
298                 hv_cpu->synic_event_page =
299                         (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
300                 if (!hv_cpu->synic_event_page)
301                         pr_err("Fail to map synic event page.\n");
302         } else {
303                 siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
304                         >> HV_HYP_PAGE_SHIFT;
305         }
306
307         hv_set_msr(HV_MSR_SIEFP, siefp.as_uint64);
308
309         /* Setup the shared SINT. */
310         if (vmbus_irq != -1)
311                 enable_percpu_irq(vmbus_irq, 0);
312         shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT);
313
314         shared_sint.vector = vmbus_interrupt;
315         shared_sint.masked = false;
316
317         /*
318          * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
319          * it doesn't provide a recommendation flag and AEOI must be disabled.
320          */
321 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED
322         shared_sint.auto_eoi =
323                         !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
324 #else
325         shared_sint.auto_eoi = 0;
326 #endif
327         hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
328
329         /* Enable the global synic bit */
330         sctrl.as_uint64 = hv_get_msr(HV_MSR_SCONTROL);
331         sctrl.enable = 1;
332
333         hv_set_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
334 }
335
336 int hv_synic_init(unsigned int cpu)
337 {
338         hv_synic_enable_regs(cpu);
339
340         hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
341
342         return 0;
343 }
344
345 /*
346  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
347  */
348 void hv_synic_disable_regs(unsigned int cpu)
349 {
350         struct hv_per_cpu_context *hv_cpu =
351                 per_cpu_ptr(hv_context.cpu_context, cpu);
352         union hv_synic_sint shared_sint;
353         union hv_synic_simp simp;
354         union hv_synic_siefp siefp;
355         union hv_synic_scontrol sctrl;
356
357         shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT);
358
359         shared_sint.masked = 1;
360
361         /* Need to correctly cleanup in the case of SMP!!! */
362         /* Disable the interrupt */
363         hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
364
365         simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
366         /*
367          * In Isolation VM, sim and sief pages are allocated by
368          * paravisor. These pages also will be used by kdump
369          * kernel. So just reset enable bit here and keep page
370          * addresses.
371          */
372         simp.simp_enabled = 0;
373         if (ms_hyperv.paravisor_present || hv_root_partition) {
374                 iounmap(hv_cpu->synic_message_page);
375                 hv_cpu->synic_message_page = NULL;
376         } else {
377                 simp.base_simp_gpa = 0;
378         }
379
380         hv_set_msr(HV_MSR_SIMP, simp.as_uint64);
381
382         siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
383         siefp.siefp_enabled = 0;
384
385         if (ms_hyperv.paravisor_present || hv_root_partition) {
386                 iounmap(hv_cpu->synic_event_page);
387                 hv_cpu->synic_event_page = NULL;
388         } else {
389                 siefp.base_siefp_gpa = 0;
390         }
391
392         hv_set_msr(HV_MSR_SIEFP, siefp.as_uint64);
393
394         /* Disable the global synic bit */
395         sctrl.as_uint64 = hv_get_msr(HV_MSR_SCONTROL);
396         sctrl.enable = 0;
397         hv_set_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
398
399         if (vmbus_irq != -1)
400                 disable_percpu_irq(vmbus_irq);
401 }
402
403 #define HV_MAX_TRIES 3
404 /*
405  * Scan the event flags page of 'this' CPU looking for any bit that is set.  If we find one
406  * bit set, then wait for a few milliseconds.  Repeat these steps for a maximum of 3 times.
407  * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
408  *
409  * If a bit is set, that means there is a pending channel interrupt.  The expectation is
410  * that the normal interrupt handling mechanism will find and process the channel interrupt
411  * "very soon", and in the process clear the bit.
412  */
413 static bool hv_synic_event_pending(void)
414 {
415         struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
416         union hv_synic_event_flags *event =
417                 (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
418         unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
419         bool pending;
420         u32 relid;
421         int tries = 0;
422
423 retry:
424         pending = false;
425         for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
426                 /* Special case - VMBus channel protocol messages */
427                 if (relid == 0)
428                         continue;
429                 pending = true;
430                 break;
431         }
432         if (pending && tries++ < HV_MAX_TRIES) {
433                 usleep_range(10000, 20000);
434                 goto retry;
435         }
436         return pending;
437 }
438
439 int hv_synic_cleanup(unsigned int cpu)
440 {
441         struct vmbus_channel *channel, *sc;
442         bool channel_found = false;
443
444         if (vmbus_connection.conn_state != CONNECTED)
445                 goto always_cleanup;
446
447         /*
448          * Hyper-V does not provide a way to change the connect CPU once
449          * it is set; we must prevent the connect CPU from going offline
450          * while the VM is running normally. But in the panic or kexec()
451          * path where the vmbus is already disconnected, the CPU must be
452          * allowed to shut down.
453          */
454         if (cpu == VMBUS_CONNECT_CPU)
455                 return -EBUSY;
456
457         /*
458          * Search for channels which are bound to the CPU we're about to
459          * cleanup.  In case we find one and vmbus is still connected, we
460          * fail; this will effectively prevent CPU offlining.
461          *
462          * TODO: Re-bind the channels to different CPUs.
463          */
464         mutex_lock(&vmbus_connection.channel_mutex);
465         list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
466                 if (channel->target_cpu == cpu) {
467                         channel_found = true;
468                         break;
469                 }
470                 list_for_each_entry(sc, &channel->sc_list, sc_list) {
471                         if (sc->target_cpu == cpu) {
472                                 channel_found = true;
473                                 break;
474                         }
475                 }
476                 if (channel_found)
477                         break;
478         }
479         mutex_unlock(&vmbus_connection.channel_mutex);
480
481         if (channel_found)
482                 return -EBUSY;
483
484         /*
485          * channel_found == false means that any channels that were previously
486          * assigned to the CPU have been reassigned elsewhere with a call of
487          * vmbus_send_modifychannel().  Scan the event flags page looking for
488          * bits that are set and waiting with a timeout for vmbus_chan_sched()
489          * to process such bits.  If bits are still set after this operation
490          * and VMBus is connected, fail the CPU offlining operation.
491          */
492         if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
493                 return -EBUSY;
494
495 always_cleanup:
496         hv_stimer_legacy_cleanup(cpu);
497
498         hv_synic_disable_regs(cpu);
499
500         return 0;
501 }
This page took 0.057181 seconds and 4 git commands to generate.