1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2020-2023 Intel Corporation
6 #include <linux/bitfield.h>
7 #include <linux/highmem.h>
12 #include "ivpu_mmu_context.h"
14 #define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30)
15 #define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21)
16 #define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12)
17 #define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0)
18 #define IVPU_MMU_ENTRY_FLAG_NG BIT(11)
19 #define IVPU_MMU_ENTRY_FLAG_AF BIT(10)
20 #define IVPU_MMU_ENTRY_FLAG_USER BIT(6)
21 #define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2)
22 #define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1)
23 #define IVPU_MMU_ENTRY_FLAG_VALID BIT(0)
25 #define IVPU_MMU_PAGE_SIZE SZ_4K
26 #define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
27 #define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
28 #define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
30 #define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
31 #define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
32 #define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK)
33 #define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
34 IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
36 static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
41 pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL);
46 pgtable->pgd_dma = pgd_dma;
51 static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
53 int pgd_index, pmd_index;
55 for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) {
56 u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index];
57 u64 *pmd = pgtable->pgd_entries[pgd_index];
62 for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) {
63 if (pmd_entries[pmd_index])
64 dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE,
65 pmd_entries[pmd_index],
66 pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
70 dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index],
71 pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
74 dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd,
75 pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK);
79 ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index)
85 if (pgtable->pgd_entries[pgd_index])
86 return pgtable->pgd_entries[pgd_index];
88 pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
92 pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
96 pgtable->pgd_entries[pgd_index] = pmd;
97 pgtable->pgd_cpu_entries[pgd_index] = pmd_entries;
98 pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID;
103 dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma);
108 ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
109 int pgd_index, int pmd_index)
114 if (pgtable->pgd_cpu_entries[pgd_index][pmd_index])
115 return pgtable->pgd_cpu_entries[pgd_index][pmd_index];
117 pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
121 pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte;
122 pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID;
128 ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
129 u64 vpu_addr, dma_addr_t dma_addr, int prot)
132 int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
133 int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
134 int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
136 /* Allocate PMD - second level page table if needed */
137 if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index))
140 /* Allocate PTE - third level page table if needed */
141 pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index);
145 /* Update PTE - third level page table with DMA address */
146 pte[pte_index] = dma_addr | prot;
151 static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
153 int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
154 int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
155 int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
157 /* Update PTE with dummy physical address and clear flags */
158 ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID;
162 ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
164 u64 end_addr = vpu_addr + size;
165 u64 *pgd = ctx->pgtable.pgd;
167 /* Align to PMD entry (2 MB) */
168 vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
170 while (vpu_addr < end_addr) {
171 int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
172 u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
173 u64 *pmd = ctx->pgtable.pgd_entries[pgd_index];
175 while (vpu_addr < end_addr && vpu_addr < pmd_end) {
176 int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
177 u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index];
179 clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE);
180 vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
182 clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE);
184 clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE);
188 ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
189 u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot)
192 int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
197 vpu_addr += IVPU_MMU_PAGE_SIZE;
198 dma_addr += IVPU_MMU_PAGE_SIZE;
199 size -= IVPU_MMU_PAGE_SIZE;
205 static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
208 ivpu_mmu_context_unmap_page(ctx, vpu_addr);
209 vpu_addr += IVPU_MMU_PAGE_SIZE;
210 size -= IVPU_MMU_PAGE_SIZE;
215 ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
216 u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
218 struct scatterlist *sg;
223 if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
226 * VPU is only 32 bit, but DMA engine is 38 bit
227 * Ranges < 2 GB are reserved for VPU internal registers
228 * Limit range to 8 GB
230 if (vpu_addr < SZ_2G || vpu_addr > SZ_8G)
233 prot = IVPU_MMU_ENTRY_MAPPED;
235 prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
237 mutex_lock(&ctx->lock);
239 for_each_sgtable_dma_sg(sgt, sg, i) {
240 u64 dma_addr = sg_dma_address(sg) - sg->offset;
241 size_t size = sg_dma_len(sg) + sg->offset;
243 ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
245 ivpu_err(vdev, "Failed to map context pages\n");
246 mutex_unlock(&ctx->lock);
249 ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
253 mutex_unlock(&ctx->lock);
255 ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
257 ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
262 ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
263 u64 vpu_addr, struct sg_table *sgt)
265 struct scatterlist *sg;
269 if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
270 ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
272 mutex_lock(&ctx->lock);
274 for_each_sgtable_dma_sg(sgt, sg, i) {
275 size_t size = sg_dma_len(sg) + sg->offset;
277 ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
278 ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
282 mutex_unlock(&ctx->lock);
284 ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
286 ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
290 ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
291 const struct ivpu_addr_range *range,
292 u64 size, struct drm_mm_node *node)
294 lockdep_assert_held(&ctx->lock);
296 return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE,
297 0, range->start, range->end, DRM_MM_INSERT_BEST);
301 ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
303 lockdep_assert_held(&ctx->lock);
305 drm_mm_remove_node(node);
309 ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
314 mutex_init(&ctx->lock);
315 INIT_LIST_HEAD(&ctx->bo_list);
317 ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
322 start = vdev->hw->ranges.global_low.start;
323 end = vdev->hw->ranges.global_high.end;
325 start = vdev->hw->ranges.user_low.start;
326 end = vdev->hw->ranges.user_high.end;
329 drm_mm_init(&ctx->mm, start, end - start);
330 ctx->id = context_id;
335 static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
337 drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd);
339 mutex_destroy(&ctx->lock);
340 ivpu_mmu_pgtable_free(vdev, &ctx->pgtable);
341 drm_mm_takedown(&ctx->mm);
344 int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
346 return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
349 void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
351 return ivpu_mmu_context_fini(vdev, &vdev->gctx);
354 void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
356 struct ivpu_file_priv *file_priv;
358 xa_lock(&vdev->context_xa);
360 file_priv = xa_load(&vdev->context_xa, ssid);
362 file_priv->has_mmu_faults = true;
364 xa_unlock(&vdev->context_xa);
367 int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
371 drm_WARN_ON(&vdev->drm, !ctx_id);
373 ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
375 ivpu_err(vdev, "Failed to initialize context: %d\n", ret);
379 ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
381 ivpu_err(vdev, "Failed to set page table: %d\n", ret);
382 goto err_context_fini;
388 ivpu_mmu_context_fini(vdev, ctx);
392 void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
394 drm_WARN_ON(&vdev->drm, !ctx->id);
396 ivpu_mmu_clear_pgtable(vdev, ctx->id);
397 ivpu_mmu_context_fini(vdev, ctx);