1 // SPDX-License-Identifier: GPL-2.0-only
3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4 * No bombay mix was harmed in the writing of this file.
6 * Copyright (C) 2020 Google LLC
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/stage2_pgtable.h>
15 #define KVM_PTE_TYPE BIT(1)
16 #define KVM_PTE_TYPE_BLOCK 0
17 #define KVM_PTE_TYPE_PAGE 1
18 #define KVM_PTE_TYPE_TABLE 1
20 struct kvm_pgtable_walk_data {
21 struct kvm_pgtable_walker *walker;
28 static bool kvm_pgtable_walk_skip_bbm_tlbi(const struct kvm_pgtable_visit_ctx *ctx)
30 return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_BBM_TLBI);
33 static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
35 return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
38 static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
40 u64 granule = kvm_granule_size(ctx->level);
42 if (!kvm_level_supports_block_mapping(ctx->level))
45 if (granule > (ctx->end - ctx->addr))
48 if (!IS_ALIGNED(phys, granule))
51 return IS_ALIGNED(ctx->addr, granule);
54 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level)
56 u64 shift = kvm_granule_shift(level);
57 u64 mask = BIT(PAGE_SHIFT - 3) - 1;
59 return (data->addr >> shift) & mask;
62 static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
64 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
65 u64 mask = BIT(pgt->ia_bits) - 1;
67 return (addr & mask) >> shift;
70 static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level)
72 struct kvm_pgtable pgt = {
74 .start_level = start_level,
77 return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
80 static bool kvm_pte_table(kvm_pte_t pte, s8 level)
82 if (level == KVM_PGTABLE_LAST_LEVEL)
85 if (!kvm_pte_valid(pte))
88 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
91 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
93 return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
96 static void kvm_clear_pte(kvm_pte_t *ptep)
101 static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops)
103 kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
105 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
106 pte |= KVM_PTE_VALID;
110 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level)
112 kvm_pte_t pte = kvm_phys_to_pte(pa);
113 u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE :
116 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
117 pte |= FIELD_PREP(KVM_PTE_TYPE, type);
118 pte |= KVM_PTE_VALID;
123 static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
125 return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
128 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
129 const struct kvm_pgtable_visit_ctx *ctx,
130 enum kvm_pgtable_walk_flags visit)
132 struct kvm_pgtable_walker *walker = data->walker;
134 /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */
135 WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held());
136 return walker->cb(ctx, visit);
139 static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
143 * Visitor callbacks return EAGAIN when the conditions that led to a
144 * fault are no longer reflected in the page tables due to a race to
145 * update a PTE. In the context of a fault handler this is interpreted
146 * as a signal to retry guest execution.
148 * Ignore the return code altogether for walkers outside a fault handler
149 * (e.g. write protecting a range of memory) and chug along with the
153 return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT);
158 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
159 struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level);
161 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
162 struct kvm_pgtable_mm_ops *mm_ops,
163 kvm_pteref_t pteref, s8 level)
165 enum kvm_pgtable_walk_flags flags = data->walker->flags;
166 kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
167 struct kvm_pgtable_visit_ctx ctx = {
169 .old = READ_ONCE(*ptep),
170 .arg = data->walker->arg,
172 .start = data->start,
181 bool table = kvm_pte_table(ctx.old, level);
183 if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
184 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
188 if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
189 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
194 * Reload the page table after invoking the walker callback for leaf
195 * entries or after pre-order traversal, to allow the walker to descend
196 * into a newly installed or replaced table.
199 ctx.old = READ_ONCE(*ptep);
200 table = kvm_pte_table(ctx.old, level);
203 if (!kvm_pgtable_walk_continue(data->walker, ret))
207 data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
208 data->addr += kvm_granule_size(level);
212 childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops);
213 ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1);
214 if (!kvm_pgtable_walk_continue(data->walker, ret))
217 if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST)
218 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST);
221 if (kvm_pgtable_walk_continue(data->walker, ret))
227 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
228 struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level)
233 if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL ||
234 level > KVM_PGTABLE_LAST_LEVEL))
237 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
238 kvm_pteref_t pteref = &pgtable[idx];
240 if (data->addr >= data->end)
243 ret = __kvm_pgtable_visit(data, mm_ops, pteref, level);
251 static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data)
255 u64 limit = BIT(pgt->ia_bits);
257 if (data->addr > limit || data->end > limit)
263 for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) {
264 kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE];
266 ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level);
274 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
275 struct kvm_pgtable_walker *walker)
277 struct kvm_pgtable_walk_data walk_data = {
278 .start = ALIGN_DOWN(addr, PAGE_SIZE),
279 .addr = ALIGN_DOWN(addr, PAGE_SIZE),
280 .end = PAGE_ALIGN(walk_data.addr + size),
285 r = kvm_pgtable_walk_begin(walker);
289 r = _kvm_pgtable_walk(pgt, &walk_data);
290 kvm_pgtable_walk_end(walker);
295 struct leaf_walk_data {
300 static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
301 enum kvm_pgtable_walk_flags visit)
303 struct leaf_walk_data *data = ctx->arg;
305 data->pte = ctx->old;
306 data->level = ctx->level;
311 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
312 kvm_pte_t *ptep, s8 *level)
314 struct leaf_walk_data data;
315 struct kvm_pgtable_walker walker = {
317 .flags = KVM_PGTABLE_WALK_LEAF,
322 ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
334 struct hyp_map_data {
339 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
341 bool device = prot & KVM_PGTABLE_PROT_DEVICE;
342 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
343 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
344 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
345 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
346 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
348 if (!(prot & KVM_PGTABLE_PROT_R))
351 if (prot & KVM_PGTABLE_PROT_X) {
352 if (prot & KVM_PGTABLE_PROT_W)
358 if (system_supports_bti_kernel())
359 attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP;
361 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
364 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
365 if (!kvm_lpa2_is_enabled())
366 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
367 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
368 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
374 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
376 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
379 if (!kvm_pte_valid(pte))
382 if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
383 prot |= KVM_PGTABLE_PROT_X;
385 ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
386 if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
387 prot |= KVM_PGTABLE_PROT_R;
388 else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
389 prot |= KVM_PGTABLE_PROT_RW;
394 static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
395 struct hyp_map_data *data)
397 u64 phys = data->phys + (ctx->addr - ctx->start);
400 if (!kvm_block_mapping_supported(ctx, phys))
403 new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
406 if (!kvm_pte_valid(ctx->old))
407 ctx->mm_ops->get_page(ctx->ptep);
408 else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
411 smp_store_release(ctx->ptep, new);
415 static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
416 enum kvm_pgtable_walk_flags visit)
418 kvm_pte_t *childp, new;
419 struct hyp_map_data *data = ctx->arg;
420 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
422 if (hyp_map_walker_try_leaf(ctx, data))
425 if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
428 childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
432 new = kvm_init_table_pte(childp, mm_ops);
433 mm_ops->get_page(ctx->ptep);
434 smp_store_release(ctx->ptep, new);
439 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
440 enum kvm_pgtable_prot prot)
443 struct hyp_map_data map_data = {
444 .phys = ALIGN_DOWN(phys, PAGE_SIZE),
446 struct kvm_pgtable_walker walker = {
447 .cb = hyp_map_walker,
448 .flags = KVM_PGTABLE_WALK_LEAF,
452 ret = hyp_set_prot_attr(prot, &map_data.attr);
456 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
462 static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
463 enum kvm_pgtable_walk_flags visit)
465 kvm_pte_t *childp = NULL;
466 u64 granule = kvm_granule_size(ctx->level);
467 u64 *unmapped = ctx->arg;
468 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
470 if (!kvm_pte_valid(ctx->old))
473 if (kvm_pte_table(ctx->old, ctx->level)) {
474 childp = kvm_pte_follow(ctx->old, mm_ops);
476 if (mm_ops->page_count(childp) != 1)
479 kvm_clear_pte(ctx->ptep);
481 __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
483 if (ctx->end - ctx->addr < granule)
486 kvm_clear_pte(ctx->ptep);
488 __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
489 *unmapped += granule;
494 mm_ops->put_page(ctx->ptep);
497 mm_ops->put_page(childp);
502 u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
505 struct kvm_pgtable_walker walker = {
506 .cb = hyp_unmap_walker,
508 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
511 if (!pgt->mm_ops->page_count)
514 kvm_pgtable_walk(pgt, addr, size, &walker);
518 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
519 struct kvm_pgtable_mm_ops *mm_ops)
521 s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 -
522 ARM64_HW_PGTABLE_LEVELS(va_bits);
524 if (start_level < KVM_PGTABLE_FIRST_LEVEL ||
525 start_level > KVM_PGTABLE_LAST_LEVEL)
528 pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
532 pgt->ia_bits = va_bits;
533 pgt->start_level = start_level;
534 pgt->mm_ops = mm_ops;
536 pgt->force_pte_cb = NULL;
541 static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
542 enum kvm_pgtable_walk_flags visit)
544 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
546 if (!kvm_pte_valid(ctx->old))
549 mm_ops->put_page(ctx->ptep);
551 if (kvm_pte_table(ctx->old, ctx->level))
552 mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
557 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
559 struct kvm_pgtable_walker walker = {
560 .cb = hyp_free_walker,
561 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
564 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
565 pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd));
569 struct stage2_map_data {
577 struct kvm_s2_mmu *mmu;
580 /* Force mappings to page granularity */
583 /* Walk should update owner_id only */
587 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
589 u64 vtcr = VTCR_EL2_FLAGS;
592 vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
593 vtcr |= VTCR_EL2_T0SZ(phys_shift);
595 * Use a minimum 2 level page table to prevent splitting
596 * host PMD huge pages at stage2.
598 lvls = stage2_pgtable_levels(phys_shift);
603 * When LPA2 is enabled, the HW supports an extra level of translation
604 * (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2
605 * to as an addition to SL0 to enable encoding this extra start level.
606 * However, since we always use concatenated pages for the first level
607 * lookup, we will never need this extra level and therefore do not need
610 vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
612 #ifdef CONFIG_ARM64_HW_AFDBM
614 * Enable the Hardware Access Flag management, unconditionally
615 * on all CPUs. In systems that have asymmetric support for the feature
616 * this allows KVM to leverage hardware support on the subset of cores
617 * that implement the feature.
619 * The architecture requires VTCR_EL2.HA to be RES0 (thus ignored by
620 * hardware) on implementations that do not advertise support for the
621 * feature. As such, setting HA unconditionally is safe, unless you
622 * happen to be running on a design that has unadvertised support for
623 * HAFDBS. Here be dragons.
625 if (!cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
627 #endif /* CONFIG_ARM64_HW_AFDBM */
629 if (kvm_lpa2_is_enabled())
632 /* Set the vmid bits */
633 vtcr |= (get_vmid_bits(mmfr1) == 16) ?
640 static bool stage2_has_fwb(struct kvm_pgtable *pgt)
642 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
645 return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
648 void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
649 phys_addr_t addr, size_t size)
651 unsigned long pages, inval_pages;
653 if (!system_supports_tlb_range()) {
654 kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
658 pages = size >> PAGE_SHIFT;
660 inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
661 kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
663 addr += inval_pages << PAGE_SHIFT;
664 pages -= inval_pages;
668 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
670 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
674 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
676 switch (prot & (KVM_PGTABLE_PROT_DEVICE |
677 KVM_PGTABLE_PROT_NORMAL_NC)) {
678 case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC:
680 case KVM_PGTABLE_PROT_DEVICE:
681 if (prot & KVM_PGTABLE_PROT_X)
683 attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
685 case KVM_PGTABLE_PROT_NORMAL_NC:
686 if (prot & KVM_PGTABLE_PROT_X)
688 attr = KVM_S2_MEMATTR(pgt, NORMAL_NC);
691 attr = KVM_S2_MEMATTR(pgt, NORMAL);
694 if (!(prot & KVM_PGTABLE_PROT_X))
695 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
697 if (prot & KVM_PGTABLE_PROT_R)
698 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
700 if (prot & KVM_PGTABLE_PROT_W)
701 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
703 if (!kvm_lpa2_is_enabled())
704 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
706 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
707 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
713 enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
715 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
717 if (!kvm_pte_valid(pte))
720 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
721 prot |= KVM_PGTABLE_PROT_R;
722 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
723 prot |= KVM_PGTABLE_PROT_W;
724 if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
725 prot |= KVM_PGTABLE_PROT_X;
730 static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
732 if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
735 return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
738 static bool stage2_pte_is_counted(kvm_pte_t pte)
741 * The refcount tracks valid entries as well as invalid entries if they
742 * encode ownership of a page to another entity than the page-table
743 * owner, whose id is 0.
748 static bool stage2_pte_is_locked(kvm_pte_t pte)
750 return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED);
753 static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
755 if (!kvm_pgtable_walk_shared(ctx)) {
756 WRITE_ONCE(*ctx->ptep, new);
760 return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old;
764 * stage2_try_break_pte() - Invalidates a pte according to the
765 * 'break-before-make' requirements of the
768 * @ctx: context of the visited pte.
771 * Returns: true if the pte was successfully broken.
773 * If the removed pte was valid, performs the necessary serialization and TLB
774 * invalidation for the old value. For counted ptes, drops the reference count
775 * on the containing table page.
777 static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
778 struct kvm_s2_mmu *mmu)
780 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
782 if (stage2_pte_is_locked(ctx->old)) {
784 * Should never occur if this walker has exclusive access to the
787 WARN_ON(!kvm_pgtable_walk_shared(ctx));
791 if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
794 if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) {
796 * Perform the appropriate TLB invalidation based on the
797 * evicted pte value (if any).
799 if (kvm_pte_table(ctx->old, ctx->level)) {
800 u64 size = kvm_granule_size(ctx->level);
801 u64 addr = ALIGN_DOWN(ctx->addr, size);
803 kvm_tlb_flush_vmid_range(mmu, addr, size);
804 } else if (kvm_pte_valid(ctx->old)) {
805 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
806 ctx->addr, ctx->level);
810 if (stage2_pte_is_counted(ctx->old))
811 mm_ops->put_page(ctx->ptep);
816 static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
818 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
820 WARN_ON(!stage2_pte_is_locked(*ctx->ptep));
822 if (stage2_pte_is_counted(new))
823 mm_ops->get_page(ctx->ptep);
825 smp_store_release(ctx->ptep, new);
828 static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
831 * If FEAT_TLBIRANGE is implemented, defer the individual
832 * TLB invalidations until the entire walk is finished, and
833 * then use the range-based TLBI instructions to do the
834 * invalidations. Condition deferred TLB invalidation on the
835 * system supporting FWB as the optimization is entirely
836 * pointless when the unmap walker needs to perform CMOs.
838 return system_supports_tlb_range() && stage2_has_fwb(pgt);
841 static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
842 struct kvm_s2_mmu *mmu,
843 struct kvm_pgtable_mm_ops *mm_ops)
845 struct kvm_pgtable *pgt = ctx->arg;
848 * Clear the existing PTE, and perform break-before-make if it was
849 * valid. Depending on the system support, defer the TLB maintenance
850 * for the same until the entire unmap walk is completed.
852 if (kvm_pte_valid(ctx->old)) {
853 kvm_clear_pte(ctx->ptep);
855 if (kvm_pte_table(ctx->old, ctx->level)) {
856 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
858 } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
859 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
864 mm_ops->put_page(ctx->ptep);
867 static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
869 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
870 return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
873 static bool stage2_pte_executable(kvm_pte_t pte)
875 return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
878 static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
879 const struct stage2_map_data *data)
881 u64 phys = data->phys;
883 /* Work out the correct PA based on how far the walk has gotten */
884 return phys + (ctx->addr - ctx->start);
887 static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
888 struct stage2_map_data *data)
890 u64 phys = stage2_map_walker_phys_addr(ctx, data);
892 if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
895 if (data->annotation)
898 return kvm_block_mapping_supported(ctx, phys);
901 static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
902 struct stage2_map_data *data)
905 u64 phys = stage2_map_walker_phys_addr(ctx, data);
906 u64 granule = kvm_granule_size(ctx->level);
907 struct kvm_pgtable *pgt = data->mmu->pgt;
908 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
910 if (!stage2_leaf_mapping_allowed(ctx, data))
913 if (!data->annotation)
914 new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
916 new = kvm_init_invalid_leaf_owner(data->owner_id);
919 * Skip updating the PTE if we are trying to recreate the exact
920 * same mapping or only change the access permissions. Instead,
921 * the vCPU will exit one more time from guest if still needed
922 * and then go through the path of relaxing permissions.
924 if (!stage2_pte_needs_update(ctx->old, new))
927 /* If we're only changing software bits, then store them and go! */
928 if (!kvm_pgtable_walk_shared(ctx) &&
929 !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) {
930 bool old_is_counted = stage2_pte_is_counted(ctx->old);
932 if (old_is_counted != stage2_pte_is_counted(new)) {
934 mm_ops->put_page(ctx->ptep);
936 mm_ops->get_page(ctx->ptep);
938 WARN_ON_ONCE(!stage2_try_set_pte(ctx, new));
942 if (!stage2_try_break_pte(ctx, data->mmu))
945 /* Perform CMOs before installation of the guest stage-2 PTE */
946 if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->dcache_clean_inval_poc &&
947 stage2_pte_cacheable(pgt, new))
948 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
951 if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->icache_inval_pou &&
952 stage2_pte_executable(new))
953 mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
955 stage2_make_pte(ctx, new);
960 static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
961 struct stage2_map_data *data)
963 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
964 kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
967 if (!stage2_leaf_mapping_allowed(ctx, data))
970 ret = stage2_map_walker_try_leaf(ctx, data);
974 mm_ops->free_unlinked_table(childp, ctx->level);
978 static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
979 struct stage2_map_data *data)
981 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
982 kvm_pte_t *childp, new;
985 ret = stage2_map_walker_try_leaf(ctx, data);
989 if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
995 childp = mm_ops->zalloc_page(data->memcache);
999 if (!stage2_try_break_pte(ctx, data->mmu)) {
1000 mm_ops->put_page(childp);
1005 * If we've run into an existing block mapping then replace it with
1006 * a table. Accesses beyond 'end' that fall within the new table
1007 * will be mapped lazily.
1009 new = kvm_init_table_pte(childp, mm_ops);
1010 stage2_make_pte(ctx, new);
1016 * The TABLE_PRE callback runs for table entries on the way down, looking
1017 * for table entries which we could conceivably replace with a block entry
1018 * for this mapping. If it finds one it replaces the entry and calls
1019 * kvm_pgtable_mm_ops::free_unlinked_table() to tear down the detached table.
1021 * Otherwise, the LEAF callback performs the mapping at the existing leaves
1024 static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
1025 enum kvm_pgtable_walk_flags visit)
1027 struct stage2_map_data *data = ctx->arg;
1030 case KVM_PGTABLE_WALK_TABLE_PRE:
1031 return stage2_map_walk_table_pre(ctx, data);
1032 case KVM_PGTABLE_WALK_LEAF:
1033 return stage2_map_walk_leaf(ctx, data);
1039 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
1040 u64 phys, enum kvm_pgtable_prot prot,
1041 void *mc, enum kvm_pgtable_walk_flags flags)
1044 struct stage2_map_data map_data = {
1045 .phys = ALIGN_DOWN(phys, PAGE_SIZE),
1048 .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
1050 struct kvm_pgtable_walker walker = {
1051 .cb = stage2_map_walker,
1053 KVM_PGTABLE_WALK_TABLE_PRE |
1054 KVM_PGTABLE_WALK_LEAF,
1058 if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
1061 ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
1065 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1070 int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
1071 void *mc, u8 owner_id)
1074 struct stage2_map_data map_data = {
1077 .owner_id = owner_id,
1081 struct kvm_pgtable_walker walker = {
1082 .cb = stage2_map_walker,
1083 .flags = KVM_PGTABLE_WALK_TABLE_PRE |
1084 KVM_PGTABLE_WALK_LEAF,
1088 if (owner_id > KVM_MAX_OWNER_ID)
1091 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1095 static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
1096 enum kvm_pgtable_walk_flags visit)
1098 struct kvm_pgtable *pgt = ctx->arg;
1099 struct kvm_s2_mmu *mmu = pgt->mmu;
1100 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
1101 kvm_pte_t *childp = NULL;
1102 bool need_flush = false;
1104 if (!kvm_pte_valid(ctx->old)) {
1105 if (stage2_pte_is_counted(ctx->old)) {
1106 kvm_clear_pte(ctx->ptep);
1107 mm_ops->put_page(ctx->ptep);
1112 if (kvm_pte_table(ctx->old, ctx->level)) {
1113 childp = kvm_pte_follow(ctx->old, mm_ops);
1115 if (mm_ops->page_count(childp) != 1)
1117 } else if (stage2_pte_cacheable(pgt, ctx->old)) {
1118 need_flush = !stage2_has_fwb(pgt);
1122 * This is similar to the map() path in that we unmap the entire
1123 * block entry and rely on the remaining portions being faulted
1126 stage2_unmap_put_pte(ctx, mmu, mm_ops);
1128 if (need_flush && mm_ops->dcache_clean_inval_poc)
1129 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
1130 kvm_granule_size(ctx->level));
1133 mm_ops->put_page(childp);
1138 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
1141 struct kvm_pgtable_walker walker = {
1142 .cb = stage2_unmap_walker,
1144 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
1147 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1148 if (stage2_unmap_defer_tlb_flush(pgt))
1149 /* Perform the deferred TLB invalidations */
1150 kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
1155 struct stage2_attr_data {
1162 static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
1163 enum kvm_pgtable_walk_flags visit)
1165 kvm_pte_t pte = ctx->old;
1166 struct stage2_attr_data *data = ctx->arg;
1167 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
1169 if (!kvm_pte_valid(ctx->old))
1172 data->level = ctx->level;
1174 pte &= ~data->attr_clr;
1175 pte |= data->attr_set;
1178 * We may race with the CPU trying to set the access flag here,
1179 * but worst-case the access flag update gets lost and will be
1180 * set on the next access instead.
1182 if (data->pte != pte) {
1184 * Invalidate instruction cache before updating the guest
1185 * stage-2 PTE if we are going to add executable permission.
1187 if (mm_ops->icache_inval_pou &&
1188 stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old))
1189 mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
1190 kvm_granule_size(ctx->level));
1192 if (!stage2_try_set_pte(ctx, pte))
1199 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
1200 u64 size, kvm_pte_t attr_set,
1201 kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
1202 s8 *level, enum kvm_pgtable_walk_flags flags)
1205 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
1206 struct stage2_attr_data data = {
1207 .attr_set = attr_set & attr_mask,
1208 .attr_clr = attr_clr & attr_mask,
1210 struct kvm_pgtable_walker walker = {
1211 .cb = stage2_attr_walker,
1213 .flags = flags | KVM_PGTABLE_WALK_LEAF,
1216 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1221 *orig_pte = data.pte;
1224 *level = data.level;
1228 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
1230 return stage2_update_leaf_attrs(pgt, addr, size, 0,
1231 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
1235 void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
1236 enum kvm_pgtable_walk_flags flags)
1240 ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
1246 struct stage2_age_data {
1251 static int stage2_age_walker(const struct kvm_pgtable_visit_ctx *ctx,
1252 enum kvm_pgtable_walk_flags visit)
1254 kvm_pte_t new = ctx->old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF;
1255 struct stage2_age_data *data = ctx->arg;
1257 if (!kvm_pte_valid(ctx->old) || new == ctx->old)
1263 * stage2_age_walker() is always called while holding the MMU lock for
1264 * write, so this will always succeed. Nonetheless, this deliberately
1265 * follows the race detection pattern of the other stage-2 walkers in
1266 * case the locking mechanics of the MMU notifiers is ever changed.
1268 if (data->mkold && !stage2_try_set_pte(ctx, new))
1272 * "But where's the TLBI?!", you scream.
1273 * "Over in the core code", I sigh.
1275 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
1280 bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
1281 u64 size, bool mkold)
1283 struct stage2_age_data data = {
1286 struct kvm_pgtable_walker walker = {
1287 .cb = stage2_age_walker,
1289 .flags = KVM_PGTABLE_WALK_LEAF,
1292 WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
1296 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
1297 enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags)
1301 kvm_pte_t set = 0, clr = 0;
1303 if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
1306 if (prot & KVM_PGTABLE_PROT_R)
1307 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
1309 if (prot & KVM_PGTABLE_PROT_W)
1310 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
1312 if (prot & KVM_PGTABLE_PROT_X)
1313 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
1315 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
1316 if (!ret || ret == -EAGAIN)
1317 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
1321 static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
1322 enum kvm_pgtable_walk_flags visit)
1324 struct kvm_pgtable *pgt = ctx->arg;
1325 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1327 if (!stage2_pte_cacheable(pgt, ctx->old))
1330 if (mm_ops->dcache_clean_inval_poc)
1331 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
1332 kvm_granule_size(ctx->level));
1336 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
1338 struct kvm_pgtable_walker walker = {
1339 .cb = stage2_flush_walker,
1340 .flags = KVM_PGTABLE_WALK_LEAF,
1344 if (stage2_has_fwb(pgt))
1347 return kvm_pgtable_walk(pgt, addr, size, &walker);
1350 kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
1352 enum kvm_pgtable_prot prot,
1353 void *mc, bool force_pte)
1355 struct stage2_map_data map_data = {
1359 .force_pte = force_pte,
1361 struct kvm_pgtable_walker walker = {
1362 .cb = stage2_map_walker,
1363 .flags = KVM_PGTABLE_WALK_LEAF |
1364 KVM_PGTABLE_WALK_SKIP_BBM_TLBI |
1365 KVM_PGTABLE_WALK_SKIP_CMO,
1369 * The input address (.addr) is irrelevant for walking an
1370 * unlinked table. Construct an ambiguous IA range to map
1371 * kvm_granule_size(level) worth of memory.
1373 struct kvm_pgtable_walk_data data = {
1376 .end = kvm_granule_size(level),
1378 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1382 if (!IS_ALIGNED(phys, kvm_granule_size(level)))
1383 return ERR_PTR(-EINVAL);
1385 ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
1387 return ERR_PTR(ret);
1389 pgtable = mm_ops->zalloc_page(mc);
1391 return ERR_PTR(-ENOMEM);
1393 ret = __kvm_pgtable_walk(&data, mm_ops, (kvm_pteref_t)pgtable,
1396 kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
1397 return ERR_PTR(ret);
1404 * Get the number of page-tables needed to replace a block with a
1405 * fully populated tree up to the PTE entries. Note that @level is
1406 * interpreted as in "level @level entry".
1408 static int stage2_block_get_nr_page_tables(s8 level)
1412 return PTRS_PER_PTE + 1;
1418 WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
1419 level > KVM_PGTABLE_LAST_LEVEL);
1424 static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
1425 enum kvm_pgtable_walk_flags visit)
1427 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
1428 struct kvm_mmu_memory_cache *mc = ctx->arg;
1429 struct kvm_s2_mmu *mmu;
1430 kvm_pte_t pte = ctx->old, new, *childp;
1431 enum kvm_pgtable_prot prot;
1432 s8 level = ctx->level;
1437 /* No huge-pages exist at the last level */
1438 if (level == KVM_PGTABLE_LAST_LEVEL)
1441 /* We only split valid block mappings */
1442 if (!kvm_pte_valid(pte))
1445 nr_pages = stage2_block_get_nr_page_tables(level);
1449 if (mc->nobjs >= nr_pages) {
1450 /* Build a tree mapped down to the PTE granularity. */
1454 * Don't force PTEs, so create_unlinked() below does
1455 * not populate the tree up to the PTE level. The
1456 * consequence is that the call will require a single
1457 * page of level 2 entries at level 1, or a single
1458 * page of PTEs at level 2. If we are at level 1, the
1459 * PTEs will be created recursively.
1465 if (mc->nobjs < nr_pages)
1468 mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
1469 phys = kvm_pte_to_phys(pte);
1470 prot = kvm_pgtable_stage2_pte_prot(pte);
1472 childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
1473 level, prot, mc, force_pte);
1475 return PTR_ERR(childp);
1477 if (!stage2_try_break_pte(ctx, mmu)) {
1478 kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
1483 * Note, the contents of the page table are guaranteed to be made
1484 * visible before the new PTE is assigned because stage2_make_pte()
1485 * writes the PTE using smp_store_release().
1487 new = kvm_init_table_pte(childp, mm_ops);
1488 stage2_make_pte(ctx, new);
1492 int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
1493 struct kvm_mmu_memory_cache *mc)
1495 struct kvm_pgtable_walker walker = {
1496 .cb = stage2_split_walker,
1497 .flags = KVM_PGTABLE_WALK_LEAF,
1502 ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1507 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
1508 struct kvm_pgtable_mm_ops *mm_ops,
1509 enum kvm_pgtable_stage2_flags flags,
1510 kvm_pgtable_force_pte_cb_t force_pte_cb)
1513 u64 vtcr = mmu->vtcr;
1514 u32 ia_bits = VTCR_EL2_IPA(vtcr);
1515 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1516 s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1518 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1519 pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
1523 pgt->ia_bits = ia_bits;
1524 pgt->start_level = start_level;
1525 pgt->mm_ops = mm_ops;
1528 pgt->force_pte_cb = force_pte_cb;
1530 /* Ensure zeroed PGD pages are visible to the hardware walker */
1535 size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
1537 u32 ia_bits = VTCR_EL2_IPA(vtcr);
1538 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1539 s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1541 return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1544 static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
1545 enum kvm_pgtable_walk_flags visit)
1547 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
1549 if (!stage2_pte_is_counted(ctx->old))
1552 mm_ops->put_page(ctx->ptep);
1554 if (kvm_pte_table(ctx->old, ctx->level))
1555 mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
1560 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
1563 struct kvm_pgtable_walker walker = {
1564 .cb = stage2_free_walker,
1565 .flags = KVM_PGTABLE_WALK_LEAF |
1566 KVM_PGTABLE_WALK_TABLE_POST,
1569 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
1570 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
1571 pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
1575 void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
1577 kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
1578 struct kvm_pgtable_walker walker = {
1579 .cb = stage2_free_walker,
1580 .flags = KVM_PGTABLE_WALK_LEAF |
1581 KVM_PGTABLE_WALK_TABLE_POST,
1583 struct kvm_pgtable_walk_data data = {
1587 * At this point the IPA really doesn't matter, as the page
1588 * table being traversed has already been removed from the stage
1589 * 2. Set an appropriate range to cover the entire page table.
1592 .end = kvm_granule_size(level),
1595 WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
1597 WARN_ON(mm_ops->page_count(pgtable) != 1);
1598 mm_ops->put_page(pgtable);