]> Git Repo - linux.git/blob - arch/x86/hyperv/mmu.c
kvm: mmu: Add guest_mode to kvm_mmu_page_role
[linux.git] / arch / x86 / hyperv / mmu.c
1 #define pr_fmt(fmt)  "Hyper-V: " fmt
2
3 #include <linux/hyperv.h>
4 #include <linux/log2.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7
8 #include <asm/fpu/api.h>
9 #include <asm/mshyperv.h>
10 #include <asm/msr.h>
11 #include <asm/tlbflush.h>
12
13 #define CREATE_TRACE_POINTS
14 #include <asm/trace/hyperv.h>
15
16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
17 struct hv_flush_pcpu {
18         u64 address_space;
19         u64 flags;
20         u64 processor_mask;
21         u64 gva_list[];
22 };
23
24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
25 struct hv_flush_pcpu_ex {
26         u64 address_space;
27         u64 flags;
28         struct {
29                 u64 format;
30                 u64 valid_bank_mask;
31                 u64 bank_contents[];
32         } hv_vp_set;
33         u64 gva_list[];
34 };
35
36 /* Each gva in gva_list encodes up to 4096 pages to flush */
37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
38
39 static struct hv_flush_pcpu __percpu **pcpu_flush;
40
41 static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
42
43 /*
44  * Fills in gva_list starting from offset. Returns the number of items added.
45  */
46 static inline int fill_gva_list(u64 gva_list[], int offset,
47                                 unsigned long start, unsigned long end)
48 {
49         int gva_n = offset;
50         unsigned long cur = start, diff;
51
52         do {
53                 diff = end > cur ? end - cur : 0;
54
55                 gva_list[gva_n] = cur & PAGE_MASK;
56                 /*
57                  * Lower 12 bits encode the number of additional
58                  * pages to flush (in addition to the 'cur' page).
59                  */
60                 if (diff >= HV_TLB_FLUSH_UNIT)
61                         gva_list[gva_n] |= ~PAGE_MASK;
62                 else if (diff)
63                         gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
64
65                 cur += HV_TLB_FLUSH_UNIT;
66                 gva_n++;
67
68         } while (cur < end);
69
70         return gva_n - offset;
71 }
72
73 /* Return the number of banks in the resulting vp_set */
74 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
75                                     const struct cpumask *cpus)
76 {
77         int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
78
79         /* valid_bank_mask can represent up to 64 banks */
80         if (hv_max_vp_index / 64 >= 64)
81                 return 0;
82
83         /*
84          * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
85          * structs are not cleared between calls, we risk flushing unneeded
86          * vCPUs otherwise.
87          */
88         for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
89                 flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
90
91         /*
92          * Some banks may end up being empty but this is acceptable.
93          */
94         for_each_cpu(cpu, cpus) {
95                 vcpu = hv_cpu_number_to_vp_number(cpu);
96                 vcpu_bank = vcpu / 64;
97                 vcpu_offset = vcpu % 64;
98                 __set_bit(vcpu_offset, (unsigned long *)
99                           &flush->hv_vp_set.bank_contents[vcpu_bank]);
100                 if (vcpu_bank >= nr_bank)
101                         nr_bank = vcpu_bank + 1;
102         }
103         flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
104
105         return nr_bank;
106 }
107
108 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
109                                     const struct flush_tlb_info *info)
110 {
111         int cpu, vcpu, gva_n, max_gvas;
112         struct hv_flush_pcpu **flush_pcpu;
113         struct hv_flush_pcpu *flush;
114         u64 status = U64_MAX;
115         unsigned long flags;
116
117         trace_hyperv_mmu_flush_tlb_others(cpus, info);
118
119         if (!pcpu_flush || !hv_hypercall_pg)
120                 goto do_native;
121
122         if (cpumask_empty(cpus))
123                 return;
124
125         local_irq_save(flags);
126
127         flush_pcpu = this_cpu_ptr(pcpu_flush);
128
129         if (unlikely(!*flush_pcpu))
130                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
131
132         flush = *flush_pcpu;
133
134         if (unlikely(!flush)) {
135                 local_irq_restore(flags);
136                 goto do_native;
137         }
138
139         if (info->mm) {
140                 /*
141                  * AddressSpace argument must match the CR3 with PCID bits
142                  * stripped out.
143                  */
144                 flush->address_space = virt_to_phys(info->mm->pgd);
145                 flush->address_space &= CR3_ADDR_MASK;
146                 flush->flags = 0;
147         } else {
148                 flush->address_space = 0;
149                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
150         }
151
152         flush->processor_mask = 0;
153         if (cpumask_equal(cpus, cpu_present_mask)) {
154                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
155         } else {
156                 for_each_cpu(cpu, cpus) {
157                         vcpu = hv_cpu_number_to_vp_number(cpu);
158                         if (vcpu >= 64)
159                                 goto do_native;
160
161                         __set_bit(vcpu, (unsigned long *)
162                                   &flush->processor_mask);
163                 }
164         }
165
166         /*
167          * We can flush not more than max_gvas with one hypercall. Flush the
168          * whole address space if we were asked to do more.
169          */
170         max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
171
172         if (info->end == TLB_FLUSH_ALL) {
173                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
174                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
175                                          flush, NULL);
176         } else if (info->end &&
177                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
178                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
179                                          flush, NULL);
180         } else {
181                 gva_n = fill_gva_list(flush->gva_list, 0,
182                                       info->start, info->end);
183                 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
184                                              gva_n, 0, flush, NULL);
185         }
186
187         local_irq_restore(flags);
188
189         if (!(status & HV_HYPERCALL_RESULT_MASK))
190                 return;
191 do_native:
192         native_flush_tlb_others(cpus, info);
193 }
194
195 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
196                                        const struct flush_tlb_info *info)
197 {
198         int nr_bank = 0, max_gvas, gva_n;
199         struct hv_flush_pcpu_ex **flush_pcpu;
200         struct hv_flush_pcpu_ex *flush;
201         u64 status = U64_MAX;
202         unsigned long flags;
203
204         trace_hyperv_mmu_flush_tlb_others(cpus, info);
205
206         if (!pcpu_flush_ex || !hv_hypercall_pg)
207                 goto do_native;
208
209         if (cpumask_empty(cpus))
210                 return;
211
212         local_irq_save(flags);
213
214         flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
215
216         if (unlikely(!*flush_pcpu))
217                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
218
219         flush = *flush_pcpu;
220
221         if (unlikely(!flush)) {
222                 local_irq_restore(flags);
223                 goto do_native;
224         }
225
226         if (info->mm) {
227                 /*
228                  * AddressSpace argument must match the CR3 with PCID bits
229                  * stripped out.
230                  */
231                 flush->address_space = virt_to_phys(info->mm->pgd);
232                 flush->address_space &= CR3_ADDR_MASK;
233                 flush->flags = 0;
234         } else {
235                 flush->address_space = 0;
236                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
237         }
238
239         flush->hv_vp_set.valid_bank_mask = 0;
240
241         if (!cpumask_equal(cpus, cpu_present_mask)) {
242                 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
243                 nr_bank = cpumask_to_vp_set(flush, cpus);
244         }
245
246         if (!nr_bank) {
247                 flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
248                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
249         }
250
251         /*
252          * We can flush not more than max_gvas with one hypercall. Flush the
253          * whole address space if we were asked to do more.
254          */
255         max_gvas =
256                 (PAGE_SIZE - sizeof(*flush) - nr_bank *
257                  sizeof(flush->hv_vp_set.bank_contents[0])) /
258                 sizeof(flush->gva_list[0]);
259
260         if (info->end == TLB_FLUSH_ALL) {
261                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
262                 status = hv_do_rep_hypercall(
263                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
264                         0, nr_bank, flush, NULL);
265         } else if (info->end &&
266                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
267                 status = hv_do_rep_hypercall(
268                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
269                         0, nr_bank, flush, NULL);
270         } else {
271                 gva_n = fill_gva_list(flush->gva_list, nr_bank,
272                                       info->start, info->end);
273                 status = hv_do_rep_hypercall(
274                         HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
275                         gva_n, nr_bank, flush, NULL);
276         }
277
278         local_irq_restore(flags);
279
280         if (!(status & HV_HYPERCALL_RESULT_MASK))
281                 return;
282 do_native:
283         native_flush_tlb_others(cpus, info);
284 }
285
286 void hyperv_setup_mmu_ops(void)
287 {
288         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
289                 return;
290
291         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
292                 pr_info("Using hypercall for remote TLB flush\n");
293                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
294         } else {
295                 pr_info("Using ext hypercall for remote TLB flush\n");
296                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
297         }
298 }
299
300 void hyper_alloc_mmu(void)
301 {
302         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
303                 return;
304
305         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
306                 pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
307         else
308                 pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
309 }
This page took 0.048653 seconds and 4 git commands to generate.