arch/x86/hyperv/mmu.c

   1 #define pr_fmt(fmt)  "Hyper-V: " fmt
   2
   3 #include <linux/log2.h>
   4 #include <linux/slab.h>
   5 #include <linux/types.h>
   6
   7 #include <asm/fpu/api.h>
   8 #include <asm/mshyperv.h>
   9 #include <asm/msr.h>
  10 #include <asm/tlbflush.h>
  11 #include <asm/tlb.h>
  12
  13 #define CREATE_TRACE_POINTS
  14 #include <asm/trace/hyperv.h>
  15
  16 /* Each gva in gva_list encodes up to 4096 pages to flush */
  17 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
  18
  19 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
  20                                       const struct flush_tlb_info *info);
  21
  22 /*
  23  * Fills in gva_list starting from offset. Returns the number of items added.
  24  */
  25 static inline int fill_gva_list(u64 gva_list[], int offset,
  26                                 unsigned long start, unsigned long end)
  27 {
  28         int gva_n = offset;
  29         unsigned long cur = start, diff;
  30
  31         do {
  32                 diff = end > cur ? end - cur : 0;
  33
  34                 gva_list[gva_n] = cur & PAGE_MASK;
  35                 /*
  36                  * Lower 12 bits encode the number of additional
  37                  * pages to flush (in addition to the 'cur' page).
  38                  */
  39                 if (diff >= HV_TLB_FLUSH_UNIT) {
  40                         gva_list[gva_n] |= ~PAGE_MASK;
  41                         cur += HV_TLB_FLUSH_UNIT;
  42                 }  else if (diff) {
  43                         gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
  44                         cur = end;
  45                 }
  46
  47                 gva_n++;
  48
  49         } while (cur < end);
  50
  51         return gva_n - offset;
  52 }
  53
  54 static bool cpu_is_lazy(int cpu)
  55 {
  56         return per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
  57 }
  58
  59 static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
  60                                    const struct flush_tlb_info *info)
  61 {
  62         int cpu, vcpu, gva_n, max_gvas;
  63         struct hv_tlb_flush *flush;
  64         u64 status;
  65         unsigned long flags;
  66         bool do_lazy = !info->freed_tables;
  67
  68         trace_hyperv_mmu_flush_tlb_multi(cpus, info);
  69
  70         if (!hv_hypercall_pg)
  71                 goto do_native;
  72
  73         local_irq_save(flags);
  74
  75         flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
  76
  77         if (unlikely(!flush)) {
  78                 local_irq_restore(flags);
  79                 goto do_native;
  80         }
  81
  82         if (info->mm) {
  83                 /*
  84                  * AddressSpace argument must match the CR3 with PCID bits
  85                  * stripped out.
  86                  */
  87                 flush->address_space = virt_to_phys(info->mm->pgd);
  88                 flush->address_space &= CR3_ADDR_MASK;
  89                 flush->flags = 0;
  90         } else {
  91                 flush->address_space = 0;
  92                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
  93         }
  94
  95         flush->processor_mask = 0;
  96         if (cpumask_equal(cpus, cpu_present_mask)) {
  97                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
  98         } else {
  99                 /*
 100                  * From the supplied CPU set we need to figure out if we can get
 101                  * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
 102                  * hypercalls. This is possible when the highest VP number in
 103                  * the set is < 64. As VP numbers are usually in ascending order
 104                  * and match Linux CPU ids, here is an optimization: we check
 105                  * the VP number for the highest bit in the supplied set first
 106                  * so we can quickly find out if using *_EX hypercalls is a
 107                  * must. We will also check all VP numbers when walking the
 108                  * supplied CPU set to remain correct in all cases.
 109                  */
 110                 cpu = cpumask_last(cpus);
 111
 112                 if (cpu < nr_cpumask_bits && hv_cpu_number_to_vp_number(cpu) >= 64)
 113                         goto do_ex_hypercall;
 114
 115                 for_each_cpu(cpu, cpus) {
 116                         if (do_lazy && cpu_is_lazy(cpu))
 117                                 continue;
 118                         vcpu = hv_cpu_number_to_vp_number(cpu);
 119                         if (vcpu == VP_INVAL) {
 120                                 local_irq_restore(flags);
 121                                 goto do_native;
 122                         }
 123
 124                         if (vcpu >= 64)
 125                                 goto do_ex_hypercall;
 126
 127                         __set_bit(vcpu, (unsigned long *)
 128                                   &flush->processor_mask);
 129                 }
 130
 131                 /* nothing to flush if 'processor_mask' ends up being empty */
 132                 if (!flush->processor_mask) {
 133                         local_irq_restore(flags);
 134                         return;
 135                 }
 136         }
 137
 138         /*
 139          * We can flush not more than max_gvas with one hypercall. Flush the
 140          * whole address space if we were asked to do more.
 141          */
 142         max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
 143
 144         if (info->end == TLB_FLUSH_ALL) {
 145                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 146                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 147                                          flush, NULL);
 148         } else if (info->end &&
 149                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 150                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 151                                          flush, NULL);
 152         } else {
 153                 gva_n = fill_gva_list(flush->gva_list, 0,
 154                                       info->start, info->end);
 155                 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
 156                                              gva_n, 0, flush, NULL);
 157         }
 158         goto check_status;
 159
 160 do_ex_hypercall:
 161         status = hyperv_flush_tlb_others_ex(cpus, info);
 162
 163 check_status:
 164         local_irq_restore(flags);
 165
 166         if (hv_result_success(status))
 167                 return;
 168 do_native:
 169         native_flush_tlb_multi(cpus, info);
 170 }
 171
 172 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 173                                       const struct flush_tlb_info *info)
 174 {
 175         int nr_bank = 0, max_gvas, gva_n;
 176         struct hv_tlb_flush_ex *flush;
 177         u64 status;
 178
 179         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
 180                 return HV_STATUS_INVALID_PARAMETER;
 181
 182         flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
 183
 184         if (info->mm) {
 185                 /*
 186                  * AddressSpace argument must match the CR3 with PCID bits
 187                  * stripped out.
 188                  */
 189                 flush->address_space = virt_to_phys(info->mm->pgd);
 190                 flush->address_space &= CR3_ADDR_MASK;
 191                 flush->flags = 0;
 192         } else {
 193                 flush->address_space = 0;
 194                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
 195         }
 196
 197         flush->hv_vp_set.valid_bank_mask = 0;
 198
 199         flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
 200         nr_bank = cpumask_to_vpset_skip(&flush->hv_vp_set, cpus,
 201                         info->freed_tables ? NULL : cpu_is_lazy);
 202         if (nr_bank < 0)
 203                 return HV_STATUS_INVALID_PARAMETER;
 204
 205         /*
 206          * We can flush not more than max_gvas with one hypercall. Flush the
 207          * whole address space if we were asked to do more.
 208          */
 209         max_gvas =
 210                 (PAGE_SIZE - sizeof(*flush) - nr_bank *
 211                  sizeof(flush->hv_vp_set.bank_contents[0])) /
 212                 sizeof(flush->gva_list[0]);
 213
 214         if (info->end == TLB_FLUSH_ALL) {
 215                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 216                 status = hv_do_rep_hypercall(
 217                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 218                         0, nr_bank, flush, NULL);
 219         } else if (info->end &&
 220                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 221                 status = hv_do_rep_hypercall(
 222                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 223                         0, nr_bank, flush, NULL);
 224         } else {
 225                 gva_n = fill_gva_list(flush->gva_list, nr_bank,
 226                                       info->start, info->end);
 227                 status = hv_do_rep_hypercall(
 228                         HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
 229                         gva_n, nr_bank, flush, NULL);
 230         }
 231
 232         return status;
 233 }
 234
 235 void hyperv_setup_mmu_ops(void)
 236 {
 237         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
 238                 return;
 239
 240         pr_info("Using hypercall for remote TLB flush\n");
 241         pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
 242         pv_ops.mmu.tlb_remove_table = tlb_remove_table;
 243 }