1 // SPDX-License-Identifier: GPL-2.0-only
2 /**************************************************************************
3 * Copyright (c) 2007, Intel Corporation.
5 **************************************************************************/
7 #include <linux/highmem.h>
8 #include <linux/vmalloc.h>
15 * Code for the SGX MMU:
19 * clflush on one processor only:
20 * clflush should apparently flush the cache line on all processors in an
26 * The usage of the slots must be completely encapsulated within a spinlock, and
27 * no other functions that may be using the locks for other purposed may be
28 * called from within the locked region.
29 * Since the slots are per processor, this will guarantee that we are the only
34 * TODO: Inserting ptes from an interrupt handler:
35 * This may be desirable for some SGX functionality where the GPU can fault in
36 * needed pages. For that, we need to make an atomic insert_pages function, that
38 * If it fails, the caller need to insert the page using a workqueue function,
39 * but on average it should be fast.
42 static inline uint32_t psb_mmu_pt_index(uint32_t offset)
44 return (offset >> PSB_PTE_SHIFT) & 0x3FF;
47 static inline uint32_t psb_mmu_pd_index(uint32_t offset)
49 return offset >> PSB_PDE_SHIFT;
52 static inline void psb_clflush(void *addr)
54 __asm__ __volatile__("clflush (%0)\n" : : "r"(addr) : "memory");
57 static inline void psb_mmu_clflush(struct psb_mmu_driver *driver, void *addr)
59 if (!driver->has_clflush)
67 static void psb_mmu_flush_pd_locked(struct psb_mmu_driver *driver, int force)
69 struct drm_device *dev = driver->dev;
70 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
72 if (atomic_read(&driver->needs_tlbflush) || force) {
73 uint32_t val = PSB_RSGX32(PSB_CR_BIF_CTRL);
74 PSB_WSGX32(val | _PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
76 /* Make sure data cache is turned off before enabling it */
78 PSB_WSGX32(val & ~_PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
79 (void)PSB_RSGX32(PSB_CR_BIF_CTRL);
80 if (driver->msvdx_mmu_invaldc)
81 atomic_set(driver->msvdx_mmu_invaldc, 1);
83 atomic_set(&driver->needs_tlbflush, 0);
87 static void psb_mmu_flush_pd(struct psb_mmu_driver *driver, int force)
89 down_write(&driver->sem);
90 psb_mmu_flush_pd_locked(driver, force);
91 up_write(&driver->sem);
95 void psb_mmu_flush(struct psb_mmu_driver *driver)
97 struct drm_device *dev = driver->dev;
98 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
101 down_write(&driver->sem);
102 val = PSB_RSGX32(PSB_CR_BIF_CTRL);
103 if (atomic_read(&driver->needs_tlbflush))
104 PSB_WSGX32(val | _PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
106 PSB_WSGX32(val | _PSB_CB_CTRL_FLUSH, PSB_CR_BIF_CTRL);
108 /* Make sure data cache is turned off and MMU is flushed before
109 restoring bank interface control register */
111 PSB_WSGX32(val & ~(_PSB_CB_CTRL_FLUSH | _PSB_CB_CTRL_INVALDC),
113 (void)PSB_RSGX32(PSB_CR_BIF_CTRL);
115 atomic_set(&driver->needs_tlbflush, 0);
116 if (driver->msvdx_mmu_invaldc)
117 atomic_set(driver->msvdx_mmu_invaldc, 1);
118 up_write(&driver->sem);
121 void psb_mmu_set_pd_context(struct psb_mmu_pd *pd, int hw_context)
123 struct drm_device *dev = pd->driver->dev;
124 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
125 uint32_t offset = (hw_context == 0) ? PSB_CR_BIF_DIR_LIST_BASE0 :
126 PSB_CR_BIF_DIR_LIST_BASE1 + hw_context * 4;
128 down_write(&pd->driver->sem);
129 PSB_WSGX32(page_to_pfn(pd->p) << PAGE_SHIFT, offset);
131 psb_mmu_flush_pd_locked(pd->driver, 1);
132 pd->hw_context = hw_context;
133 up_write(&pd->driver->sem);
137 static inline unsigned long psb_pd_addr_end(unsigned long addr,
140 addr = (addr + PSB_PDE_MASK + 1) & ~PSB_PDE_MASK;
141 return (addr < end) ? addr : end;
144 static inline uint32_t psb_mmu_mask_pte(uint32_t pfn, int type)
146 uint32_t mask = PSB_PTE_VALID;
148 if (type & PSB_MMU_CACHED_MEMORY)
149 mask |= PSB_PTE_CACHED;
150 if (type & PSB_MMU_RO_MEMORY)
152 if (type & PSB_MMU_WO_MEMORY)
155 return (pfn << PAGE_SHIFT) | mask;
158 struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver,
159 int trap_pagefaults, int invalid_type)
161 struct psb_mmu_pd *pd = kmalloc(sizeof(*pd), GFP_KERNEL);
168 pd->p = alloc_page(GFP_DMA32);
171 pd->dummy_pt = alloc_page(GFP_DMA32);
174 pd->dummy_page = alloc_page(GFP_DMA32);
178 if (!trap_pagefaults) {
179 pd->invalid_pde = psb_mmu_mask_pte(page_to_pfn(pd->dummy_pt),
181 pd->invalid_pte = psb_mmu_mask_pte(page_to_pfn(pd->dummy_page),
188 v = kmap_local_page(pd->dummy_pt);
189 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
190 v[i] = pd->invalid_pte;
194 v = kmap_local_page(pd->p);
195 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
196 v[i] = pd->invalid_pde;
200 clear_page(kmap(pd->dummy_page));
201 kunmap(pd->dummy_page);
203 pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024);
208 pd->pd_mask = PSB_PTE_VALID;
214 __free_page(pd->dummy_page);
216 __free_page(pd->dummy_pt);
224 static void psb_mmu_free_pt(struct psb_mmu_pt *pt)
230 void psb_mmu_free_pagedir(struct psb_mmu_pd *pd)
232 struct psb_mmu_driver *driver = pd->driver;
233 struct drm_device *dev = driver->dev;
234 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
235 struct psb_mmu_pt *pt;
238 down_write(&driver->sem);
239 if (pd->hw_context != -1) {
240 PSB_WSGX32(0, PSB_CR_BIF_DIR_LIST_BASE0 + pd->hw_context * 4);
241 psb_mmu_flush_pd_locked(driver, 1);
244 /* Should take the spinlock here, but we don't need to do that
245 since we have the semaphore in write mode. */
247 for (i = 0; i < 1024; ++i) {
254 __free_page(pd->dummy_page);
255 __free_page(pd->dummy_pt);
258 up_write(&driver->sem);
261 static struct psb_mmu_pt *psb_mmu_alloc_pt(struct psb_mmu_pd *pd)
263 struct psb_mmu_pt *pt = kmalloc(sizeof(*pt), GFP_KERNEL);
265 uint32_t clflush_add = pd->driver->clflush_add >> PAGE_SHIFT;
266 uint32_t clflush_count = PAGE_SIZE / clflush_add;
267 spinlock_t *lock = &pd->driver->lock;
275 pt->p = alloc_page(GFP_DMA32);
283 v = kmap_atomic(pt->p);
285 ptes = (uint32_t *) v;
286 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
287 *ptes++ = pd->invalid_pte;
289 if (pd->driver->has_clflush && pd->hw_context != -1) {
291 for (i = 0; i < clflush_count; ++i) {
307 static struct psb_mmu_pt *psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd *pd,
310 uint32_t index = psb_mmu_pd_index(addr);
311 struct psb_mmu_pt *pt;
313 spinlock_t *lock = &pd->driver->lock;
316 pt = pd->tables[index];
319 pt = psb_mmu_alloc_pt(pd);
324 if (pd->tables[index]) {
328 pt = pd->tables[index];
332 v = kmap_atomic(pd->p);
333 pd->tables[index] = pt;
334 v[index] = (page_to_pfn(pt->p) << 12) | pd->pd_mask;
336 kunmap_atomic((void *) v);
338 if (pd->hw_context != -1) {
339 psb_mmu_clflush(pd->driver, (void *)&v[index]);
340 atomic_set(&pd->driver->needs_tlbflush, 1);
343 pt->v = kmap_atomic(pt->p);
347 static struct psb_mmu_pt *psb_mmu_pt_map_lock(struct psb_mmu_pd *pd,
350 uint32_t index = psb_mmu_pd_index(addr);
351 struct psb_mmu_pt *pt;
352 spinlock_t *lock = &pd->driver->lock;
355 pt = pd->tables[index];
360 pt->v = kmap_atomic(pt->p);
364 static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt *pt)
366 struct psb_mmu_pd *pd = pt->pd;
369 kunmap_atomic(pt->v);
370 if (pt->count == 0) {
371 v = kmap_atomic(pd->p);
372 v[pt->index] = pd->invalid_pde;
373 pd->tables[pt->index] = NULL;
375 if (pd->hw_context != -1) {
376 psb_mmu_clflush(pd->driver, (void *)&v[pt->index]);
377 atomic_set(&pd->driver->needs_tlbflush, 1);
380 spin_unlock(&pd->driver->lock);
384 spin_unlock(&pd->driver->lock);
387 static inline void psb_mmu_set_pte(struct psb_mmu_pt *pt, unsigned long addr,
390 pt->v[psb_mmu_pt_index(addr)] = pte;
393 static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt *pt,
396 pt->v[psb_mmu_pt_index(addr)] = pt->pd->invalid_pte;
399 struct psb_mmu_pd *psb_mmu_get_default_pd(struct psb_mmu_driver *driver)
401 struct psb_mmu_pd *pd;
403 down_read(&driver->sem);
404 pd = driver->default_pd;
405 up_read(&driver->sem);
410 void psb_mmu_driver_takedown(struct psb_mmu_driver *driver)
412 struct drm_device *dev = driver->dev;
413 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
415 PSB_WSGX32(driver->bif_ctrl, PSB_CR_BIF_CTRL);
416 psb_mmu_free_pagedir(driver->default_pd);
420 struct psb_mmu_driver *psb_mmu_driver_init(struct drm_device *dev,
423 atomic_t *msvdx_mmu_invaldc)
425 struct psb_mmu_driver *driver;
426 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
428 driver = kmalloc(sizeof(*driver), GFP_KERNEL);
434 driver->default_pd = psb_mmu_alloc_pd(driver, trap_pagefaults,
436 if (!driver->default_pd)
439 spin_lock_init(&driver->lock);
440 init_rwsem(&driver->sem);
441 down_write(&driver->sem);
442 atomic_set(&driver->needs_tlbflush, 1);
443 driver->msvdx_mmu_invaldc = msvdx_mmu_invaldc;
445 driver->bif_ctrl = PSB_RSGX32(PSB_CR_BIF_CTRL);
446 PSB_WSGX32(driver->bif_ctrl | _PSB_CB_CTRL_CLEAR_FAULT,
448 PSB_WSGX32(driver->bif_ctrl & ~_PSB_CB_CTRL_CLEAR_FAULT,
451 driver->has_clflush = 0;
453 if (boot_cpu_has(X86_FEATURE_CLFLUSH)) {
454 uint32_t tfms, misc, cap0, cap4, clflush_size;
457 * clflush size is determined at kernel setup for x86_64 but not
458 * for i386. We have to do it here.
461 cpuid(0x00000001, &tfms, &misc, &cap0, &cap4);
462 clflush_size = ((misc >> 8) & 0xff) * 8;
463 driver->has_clflush = 1;
464 driver->clflush_add =
465 PAGE_SIZE * clflush_size / sizeof(uint32_t);
466 driver->clflush_mask = driver->clflush_add - 1;
467 driver->clflush_mask = ~driver->clflush_mask;
470 up_write(&driver->sem);
478 static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd, unsigned long address,
479 uint32_t num_pages, uint32_t desired_tile_stride,
480 uint32_t hw_tile_stride)
482 struct psb_mmu_pt *pt;
489 unsigned long row_add;
490 unsigned long clflush_add = pd->driver->clflush_add;
491 unsigned long clflush_mask = pd->driver->clflush_mask;
493 if (!pd->driver->has_clflush)
497 rows = num_pages / desired_tile_stride;
499 desired_tile_stride = num_pages;
501 add = desired_tile_stride << PAGE_SHIFT;
502 row_add = hw_tile_stride << PAGE_SHIFT;
504 for (i = 0; i < rows; ++i) {
510 next = psb_pd_addr_end(addr, end);
511 pt = psb_mmu_pt_map_lock(pd, addr);
515 psb_clflush(&pt->v[psb_mmu_pt_index(addr)]);
516 } while (addr += clflush_add,
517 (addr & clflush_mask) < next);
519 psb_mmu_pt_unmap_unlock(pt);
520 } while (addr = next, next != end);
526 void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd *pd,
527 unsigned long address, uint32_t num_pages)
529 struct psb_mmu_pt *pt;
533 unsigned long f_address = address;
535 down_read(&pd->driver->sem);
538 end = addr + (num_pages << PAGE_SHIFT);
541 next = psb_pd_addr_end(addr, end);
542 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
546 psb_mmu_invalidate_pte(pt, addr);
548 } while (addr += PAGE_SIZE, addr < next);
549 psb_mmu_pt_unmap_unlock(pt);
551 } while (addr = next, next != end);
554 if (pd->hw_context != -1)
555 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
557 up_read(&pd->driver->sem);
559 if (pd->hw_context != -1)
560 psb_mmu_flush(pd->driver);
565 void psb_mmu_remove_pages(struct psb_mmu_pd *pd, unsigned long address,
566 uint32_t num_pages, uint32_t desired_tile_stride,
567 uint32_t hw_tile_stride)
569 struct psb_mmu_pt *pt;
576 unsigned long row_add;
577 unsigned long f_address = address;
580 rows = num_pages / desired_tile_stride;
582 desired_tile_stride = num_pages;
584 add = desired_tile_stride << PAGE_SHIFT;
585 row_add = hw_tile_stride << PAGE_SHIFT;
587 down_read(&pd->driver->sem);
589 /* Make sure we only need to flush this processor's cache */
591 for (i = 0; i < rows; ++i) {
597 next = psb_pd_addr_end(addr, end);
598 pt = psb_mmu_pt_map_lock(pd, addr);
602 psb_mmu_invalidate_pte(pt, addr);
605 } while (addr += PAGE_SIZE, addr < next);
606 psb_mmu_pt_unmap_unlock(pt);
608 } while (addr = next, next != end);
611 if (pd->hw_context != -1)
612 psb_mmu_flush_ptes(pd, f_address, num_pages,
613 desired_tile_stride, hw_tile_stride);
615 up_read(&pd->driver->sem);
617 if (pd->hw_context != -1)
618 psb_mmu_flush(pd->driver);
621 int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd *pd, uint32_t start_pfn,
622 unsigned long address, uint32_t num_pages,
625 struct psb_mmu_pt *pt;
630 unsigned long f_address = address;
633 down_read(&pd->driver->sem);
636 end = addr + (num_pages << PAGE_SHIFT);
639 next = psb_pd_addr_end(addr, end);
640 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
646 pte = psb_mmu_mask_pte(start_pfn++, type);
647 psb_mmu_set_pte(pt, addr, pte);
649 } while (addr += PAGE_SIZE, addr < next);
650 psb_mmu_pt_unmap_unlock(pt);
652 } while (addr = next, next != end);
656 if (pd->hw_context != -1)
657 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
659 up_read(&pd->driver->sem);
661 if (pd->hw_context != -1)
662 psb_mmu_flush(pd->driver);
667 int psb_mmu_insert_pages(struct psb_mmu_pd *pd, struct page **pages,
668 unsigned long address, uint32_t num_pages,
669 uint32_t desired_tile_stride, uint32_t hw_tile_stride,
672 struct psb_mmu_pt *pt;
680 unsigned long row_add;
681 unsigned long f_address = address;
684 if (hw_tile_stride) {
685 if (num_pages % desired_tile_stride != 0)
687 rows = num_pages / desired_tile_stride;
689 desired_tile_stride = num_pages;
692 add = desired_tile_stride << PAGE_SHIFT;
693 row_add = hw_tile_stride << PAGE_SHIFT;
695 down_read(&pd->driver->sem);
697 for (i = 0; i < rows; ++i) {
703 next = psb_pd_addr_end(addr, end);
704 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
708 pte = psb_mmu_mask_pte(page_to_pfn(*pages++),
710 psb_mmu_set_pte(pt, addr, pte);
712 } while (addr += PAGE_SIZE, addr < next);
713 psb_mmu_pt_unmap_unlock(pt);
715 } while (addr = next, next != end);
722 if (pd->hw_context != -1)
723 psb_mmu_flush_ptes(pd, f_address, num_pages,
724 desired_tile_stride, hw_tile_stride);
726 up_read(&pd->driver->sem);
728 if (pd->hw_context != -1)
729 psb_mmu_flush(pd->driver);
734 int psb_mmu_virtual_to_pfn(struct psb_mmu_pd *pd, uint32_t virtual,
738 struct psb_mmu_pt *pt;
740 spinlock_t *lock = &pd->driver->lock;
742 down_read(&pd->driver->sem);
743 pt = psb_mmu_pt_map_lock(pd, virtual);
748 v = kmap_atomic(pd->p);
749 tmp = v[psb_mmu_pd_index(virtual)];
753 if (tmp != pd->invalid_pde || !(tmp & PSB_PTE_VALID) ||
754 !(pd->invalid_pte & PSB_PTE_VALID)) {
759 *pfn = pd->invalid_pte >> PAGE_SHIFT;
762 tmp = pt->v[psb_mmu_pt_index(virtual)];
763 if (!(tmp & PSB_PTE_VALID)) {
767 *pfn = tmp >> PAGE_SHIFT;
769 psb_mmu_pt_unmap_unlock(pt);
771 up_read(&pd->driver->sem);