]> Git Repo - linux.git/blame - arch/s390/mm/gmap.c
Linux 6.14-rc3
[linux.git] / arch / s390 / mm / gmap.c
CommitLineData
ac41aaee 1// SPDX-License-Identifier: GPL-2.0
1e133ab2
MS
2/*
3 * KVM guest address space mapping code
4 *
0cd2a787 5 * Copyright IBM Corp. 2007, 2020
1e133ab2 6 * Author(s): Martin Schwidefsky <[email protected]>
a9e00d83
JF
7 * David Hildenbrand <[email protected]>
8 * Janosch Frank <[email protected]>
1e133ab2
MS
9 */
10
11#include <linux/kernel.h>
a520110e 12#include <linux/pagewalk.h>
1e133ab2
MS
13#include <linux/swap.h>
14#include <linux/smp.h>
15#include <linux/spinlock.h>
16#include <linux/slab.h>
17#include <linux/swapops.h>
18#include <linux/ksm.h>
19#include <linux/mman.h>
ca5999fd 20#include <linux/pgtable.h>
65d37f16 21#include <asm/page-states.h>
1e133ab2
MS
22#include <asm/pgalloc.h>
23#include <asm/gmap.h>
1954da4a 24#include <asm/page.h>
1e133ab2
MS
25#include <asm/tlb.h>
26
37d1b5d8
CI
27/*
28 * The address is saved in a radix tree directly; NULL would be ambiguous,
29 * since 0 is a valid address, and NULL is returned when nothing was found.
30 * The lower bits are ignored by all users of the macro, so it can be used
31 * to distinguish a valid address 0 from a NULL.
32 */
33#define VALID_GADDR_FLAG 1
34#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)
35#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)
36
fd8d4e3a
DH
37#define GMAP_SHADOW_FAKE_TABLE 1ULL
38
1954da4a
HC
39static struct page *gmap_alloc_crst(void)
40{
41 struct page *page;
42
43 page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
44 if (!page)
45 return NULL;
65d37f16 46 __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
1954da4a
HC
47 return page;
48}
49
1e133ab2 50/**
6ea427bb 51 * gmap_alloc - allocate and initialize a guest address space
9c650d09 52 * @limit: maximum address of the gmap address space
1e133ab2
MS
53 *
54 * Returns a guest address space structure.
55 */
c9f721ed 56struct gmap *gmap_alloc(unsigned long limit)
1e133ab2
MS
57{
58 struct gmap *gmap;
59 struct page *page;
60 unsigned long *table;
61 unsigned long etype, atype;
62
f1c1174f
HC
63 if (limit < _REGION3_SIZE) {
64 limit = _REGION3_SIZE - 1;
1e133ab2
MS
65 atype = _ASCE_TYPE_SEGMENT;
66 etype = _SEGMENT_ENTRY_EMPTY;
f1c1174f
HC
67 } else if (limit < _REGION2_SIZE) {
68 limit = _REGION2_SIZE - 1;
1e133ab2
MS
69 atype = _ASCE_TYPE_REGION3;
70 etype = _REGION3_ENTRY_EMPTY;
f1c1174f
HC
71 } else if (limit < _REGION1_SIZE) {
72 limit = _REGION1_SIZE - 1;
1e133ab2
MS
73 atype = _ASCE_TYPE_REGION2;
74 etype = _REGION2_ENTRY_EMPTY;
75 } else {
76 limit = -1UL;
77 atype = _ASCE_TYPE_REGION1;
78 etype = _REGION1_ENTRY_EMPTY;
79 }
0cd2a787 80 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
1e133ab2
MS
81 if (!gmap)
82 goto out;
4be130a0 83 INIT_LIST_HEAD(&gmap->children);
0cd2a787
CB
84 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
85 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
86 INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
1e133ab2 87 spin_lock_init(&gmap->guest_table_lock);
4be130a0 88 spin_lock_init(&gmap->shadow_lock);
40e90656 89 refcount_set(&gmap->ref_count, 1);
1954da4a 90 page = gmap_alloc_crst();
1e133ab2
MS
91 if (!page)
92 goto out_free;
079f0c21 93 table = page_to_virt(page);
1e133ab2
MS
94 crst_table_init(table, etype);
95 gmap->table = table;
96 gmap->asce = atype | _ASCE_TABLE_LENGTH |
97 _ASCE_USER_BITS | __pa(table);
98 gmap->asce_end = limit;
1e133ab2
MS
99 return gmap;
100
101out_free:
102 kfree(gmap);
103out:
104 return NULL;
105}
c9f721ed 106EXPORT_SYMBOL_GPL(gmap_alloc);
6ea427bb
MS
107
108/**
109 * gmap_create - create a guest address space
110 * @mm: pointer to the parent mm_struct
111 * @limit: maximum size of the gmap address space
112 *
113 * Returns a guest address space structure.
114 */
115struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
116{
117 struct gmap *gmap;
44b6cc81 118 unsigned long gmap_asce;
6ea427bb
MS
119
120 gmap = gmap_alloc(limit);
121 if (!gmap)
122 return NULL;
123 gmap->mm = mm;
f28a4b4d 124 spin_lock(&mm->context.lock);
6ea427bb 125 list_add_rcu(&gmap->list, &mm->context.gmap_list);
44b6cc81
MS
126 if (list_is_singular(&mm->context.gmap_list))
127 gmap_asce = gmap->asce;
128 else
129 gmap_asce = -1UL;
130 WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
f28a4b4d 131 spin_unlock(&mm->context.lock);
6ea427bb
MS
132 return gmap;
133}
134EXPORT_SYMBOL_GPL(gmap_create);
1e133ab2
MS
135
136static void gmap_flush_tlb(struct gmap *gmap)
137{
138 if (MACHINE_HAS_IDTE)
f0454029 139 __tlb_flush_idte(gmap->asce);
1e133ab2
MS
140 else
141 __tlb_flush_global();
142}
143
144static void gmap_radix_tree_free(struct radix_tree_root *root)
145{
146 struct radix_tree_iter iter;
147 unsigned long indices[16];
148 unsigned long index;
d12a3d60 149 void __rcu **slot;
1e133ab2
MS
150 int i, nr;
151
152 /* A radix tree is freed by deleting all of its entries */
153 index = 0;
154 do {
155 nr = 0;
156 radix_tree_for_each_slot(slot, root, &iter, index) {
157 indices[nr] = iter.index;
158 if (++nr == 16)
159 break;
160 }
161 for (i = 0; i < nr; i++) {
162 index = indices[i];
163 radix_tree_delete(root, index);
164 }
165 } while (nr > 0);
166}
167
4be130a0
MS
168static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
169{
170 struct gmap_rmap *rmap, *rnext, *head;
171 struct radix_tree_iter iter;
172 unsigned long indices[16];
173 unsigned long index;
d12a3d60 174 void __rcu **slot;
4be130a0
MS
175 int i, nr;
176
177 /* A radix tree is freed by deleting all of its entries */
178 index = 0;
179 do {
180 nr = 0;
181 radix_tree_for_each_slot(slot, root, &iter, index) {
182 indices[nr] = iter.index;
183 if (++nr == 16)
184 break;
185 }
186 for (i = 0; i < nr; i++) {
187 index = indices[i];
188 head = radix_tree_delete(root, index);
189 gmap_for_each_rmap_safe(rmap, rnext, head)
190 kfree(rmap);
191 }
192 } while (nr > 0);
193}
194
ef0c8ef8
CI
195static void gmap_free_crst(unsigned long *table, bool free_ptes)
196{
197 bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
198 int i;
199
200 if (is_segment) {
201 if (!free_ptes)
202 goto out;
203 for (i = 0; i < _CRST_ENTRIES; i++)
204 if (!(table[i] & _SEGMENT_ENTRY_INVALID))
205 page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
206 } else {
207 for (i = 0; i < _CRST_ENTRIES; i++)
208 if (!(table[i] & _REGION_ENTRY_INVALID))
209 gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
210 }
211
212out:
213 free_pages((unsigned long)table, CRST_ALLOC_ORDER);
214}
215
1e133ab2
MS
216/**
217 * gmap_free - free a guest address space
218 * @gmap: pointer to the guest address space structure
4be130a0
MS
219 *
220 * No locks required. There are no references to this gmap anymore.
1e133ab2 221 */
c9f721ed 222void gmap_free(struct gmap *gmap)
1e133ab2 223{
eea3678d
DH
224 /* Flush tlb of all gmaps (if not already done for shadows) */
225 if (!(gmap_is_shadow(gmap) && gmap->removed))
226 gmap_flush_tlb(gmap);
1e133ab2 227 /* Free all segment & region tables. */
ef0c8ef8
CI
228 gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
229
1e133ab2
MS
230 gmap_radix_tree_free(&gmap->guest_to_host);
231 gmap_radix_tree_free(&gmap->host_to_guest);
4be130a0
MS
232
233 /* Free additional data for a shadow gmap */
234 if (gmap_is_shadow(gmap)) {
4be130a0
MS
235 gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
236 /* Release reference to the parent */
237 gmap_put(gmap->parent);
238 }
239
1e133ab2
MS
240 kfree(gmap);
241}
c9f721ed 242EXPORT_SYMBOL_GPL(gmap_free);
6ea427bb
MS
243
244/**
245 * gmap_get - increase reference counter for guest address space
246 * @gmap: pointer to the guest address space structure
247 *
248 * Returns the gmap pointer
249 */
250struct gmap *gmap_get(struct gmap *gmap)
251{
40e90656 252 refcount_inc(&gmap->ref_count);
6ea427bb
MS
253 return gmap;
254}
255EXPORT_SYMBOL_GPL(gmap_get);
256
257/**
258 * gmap_put - decrease reference counter for guest address space
259 * @gmap: pointer to the guest address space structure
260 *
261 * If the reference counter reaches zero the guest address space is freed.
262 */
263void gmap_put(struct gmap *gmap)
264{
40e90656 265 if (refcount_dec_and_test(&gmap->ref_count))
6ea427bb
MS
266 gmap_free(gmap);
267}
268EXPORT_SYMBOL_GPL(gmap_put);
269
270/**
271 * gmap_remove - remove a guest address space but do not free it yet
272 * @gmap: pointer to the guest address space structure
273 */
274void gmap_remove(struct gmap *gmap)
275{
4be130a0 276 struct gmap *sg, *next;
44b6cc81 277 unsigned long gmap_asce;
4be130a0 278
4be130a0
MS
279 /* Remove all shadow gmaps linked to this gmap */
280 if (!list_empty(&gmap->children)) {
281 spin_lock(&gmap->shadow_lock);
282 list_for_each_entry_safe(sg, next, &gmap->children, list) {
4be130a0
MS
283 list_del(&sg->list);
284 gmap_put(sg);
285 }
286 spin_unlock(&gmap->shadow_lock);
287 }
6ea427bb 288 /* Remove gmap from the pre-mm list */
f28a4b4d 289 spin_lock(&gmap->mm->context.lock);
6ea427bb 290 list_del_rcu(&gmap->list);
44b6cc81
MS
291 if (list_empty(&gmap->mm->context.gmap_list))
292 gmap_asce = 0;
293 else if (list_is_singular(&gmap->mm->context.gmap_list))
294 gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
295 struct gmap, list)->asce;
296 else
297 gmap_asce = -1UL;
298 WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
f28a4b4d 299 spin_unlock(&gmap->mm->context.lock);
6ea427bb
MS
300 synchronize_rcu();
301 /* Put reference */
302 gmap_put(gmap);
303}
304EXPORT_SYMBOL_GPL(gmap_remove);
1e133ab2 305
1e133ab2 306/*
c1e8d7c6 307 * gmap_alloc_table is assumed to be called with mmap_lock held
1e133ab2
MS
308 */
309static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
310 unsigned long init, unsigned long gaddr)
311{
312 struct page *page;
313 unsigned long *new;
314
315 /* since we dont free the gmap table until gmap_free we can unlock */
1954da4a 316 page = gmap_alloc_crst();
1e133ab2
MS
317 if (!page)
318 return -ENOMEM;
079f0c21 319 new = page_to_virt(page);
1e133ab2 320 crst_table_init(new, init);
4be130a0 321 spin_lock(&gmap->guest_table_lock);
1e133ab2 322 if (*table & _REGION_ENTRY_INVALID) {
079f0c21 323 *table = __pa(new) | _REGION_ENTRY_LENGTH |
1e133ab2 324 (*table & _REGION_ENTRY_TYPE_MASK);
1e133ab2
MS
325 page = NULL;
326 }
4be130a0 327 spin_unlock(&gmap->guest_table_lock);
1e133ab2 328 if (page)
f1c1174f 329 __free_pages(page, CRST_ALLOC_ORDER);
1e133ab2
MS
330 return 0;
331}
332
37d1b5d8 333static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)
1e133ab2 334{
37d1b5d8
CI
335 return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
336}
1e133ab2 337
37d1b5d8
CI
338static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)
339{
340 return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
341}
342
343static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,
344 unsigned long *gaddr)
345{
346 *gaddr = host_to_guest_delete(gmap, vmaddr);
347 if (IS_GADDR_VALID(*gaddr))
348 return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);
349 return NULL;
1e133ab2
MS
350}
351
352/**
353 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
354 * @gmap: pointer to the guest address space structure
355 * @vmaddr: address in the host process address space
356 *
357 * Returns 1 if a TLB flush is required
358 */
359static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
360{
37d1b5d8 361 unsigned long gaddr;
1e133ab2 362 int flush = 0;
37d1b5d8 363 pmd_t *pmdp;
1e133ab2 364
4be130a0 365 BUG_ON(gmap_is_shadow(gmap));
1e133ab2 366 spin_lock(&gmap->guest_table_lock);
37d1b5d8
CI
367
368 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
369 if (pmdp) {
370 flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);
371 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
1e133ab2 372 }
37d1b5d8 373
1e133ab2
MS
374 spin_unlock(&gmap->guest_table_lock);
375 return flush;
376}
377
378/**
379 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
380 * @gmap: pointer to the guest address space structure
381 * @gaddr: address in the guest address space
382 *
383 * Returns 1 if a TLB flush is required
384 */
385static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
386{
387 unsigned long vmaddr;
388
389 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
390 gaddr >> PMD_SHIFT);
391 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
392}
393
394/**
395 * gmap_unmap_segment - unmap segment from the guest address space
396 * @gmap: pointer to the guest address space structure
397 * @to: address in the guest address space
398 * @len: length of the memory area to unmap
399 *
400 * Returns 0 if the unmap succeeded, -EINVAL if not.
401 */
402int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
403{
404 unsigned long off;
405 int flush;
406
4be130a0 407 BUG_ON(gmap_is_shadow(gmap));
1e133ab2
MS
408 if ((to | len) & (PMD_SIZE - 1))
409 return -EINVAL;
410 if (len == 0 || to + len < to)
411 return -EINVAL;
412
413 flush = 0;
d8ed45c5 414 mmap_write_lock(gmap->mm);
1e133ab2
MS
415 for (off = 0; off < len; off += PMD_SIZE)
416 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
d8ed45c5 417 mmap_write_unlock(gmap->mm);
1e133ab2
MS
418 if (flush)
419 gmap_flush_tlb(gmap);
420 return 0;
421}
422EXPORT_SYMBOL_GPL(gmap_unmap_segment);
423
424/**
425 * gmap_map_segment - map a segment to the guest address space
426 * @gmap: pointer to the guest address space structure
427 * @from: source address in the parent address space
428 * @to: target address in the guest address space
429 * @len: length of the memory area to map
430 *
431 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
432 */
433int gmap_map_segment(struct gmap *gmap, unsigned long from,
434 unsigned long to, unsigned long len)
435{
436 unsigned long off;
437 int flush;
438
4be130a0 439 BUG_ON(gmap_is_shadow(gmap));
1e133ab2
MS
440 if ((from | to | len) & (PMD_SIZE - 1))
441 return -EINVAL;
442 if (len == 0 || from + len < from || to + len < to ||
ee71d16d 443 from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
1e133ab2
MS
444 return -EINVAL;
445
446 flush = 0;
d8ed45c5 447 mmap_write_lock(gmap->mm);
1e133ab2
MS
448 for (off = 0; off < len; off += PMD_SIZE) {
449 /* Remove old translation */
450 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
451 /* Store new translation */
452 if (radix_tree_insert(&gmap->guest_to_host,
453 (to + off) >> PMD_SHIFT,
454 (void *) from + off))
455 break;
456 }
d8ed45c5 457 mmap_write_unlock(gmap->mm);
1e133ab2
MS
458 if (flush)
459 gmap_flush_tlb(gmap);
460 if (off >= len)
461 return 0;
462 gmap_unmap_segment(gmap, to, len);
463 return -ENOMEM;
464}
465EXPORT_SYMBOL_GPL(gmap_map_segment);
466
467/**
468 * __gmap_translate - translate a guest address to a user space address
469 * @gmap: pointer to guest mapping meta data structure
470 * @gaddr: guest address
471 *
472 * Returns user space address which corresponds to the guest address or
473 * -EFAULT if no such mapping exists.
474 * This function does not establish potentially missing page table entries.
c1e8d7c6 475 * The mmap_lock of the mm that belongs to the address space must be held
1e133ab2 476 * when this function gets called.
4be130a0
MS
477 *
478 * Note: Can also be called for shadow gmaps.
1e133ab2
MS
479 */
480unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
481{
482 unsigned long vmaddr;
483
484 vmaddr = (unsigned long)
485 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
4be130a0 486 /* Note: guest_to_host is empty for a shadow gmap */
1e133ab2
MS
487 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
488}
489EXPORT_SYMBOL_GPL(__gmap_translate);
490
1e133ab2
MS
491/**
492 * gmap_unlink - disconnect a page table from the gmap shadow tables
2e827528 493 * @mm: pointer to the parent mm_struct
1e133ab2
MS
494 * @table: pointer to the host page table
495 * @vmaddr: vm address associated with the host page table
496 */
497void gmap_unlink(struct mm_struct *mm, unsigned long *table,
498 unsigned long vmaddr)
499{
500 struct gmap *gmap;
501 int flush;
502
8ecb1a59
MS
503 rcu_read_lock();
504 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
1e133ab2
MS
505 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
506 if (flush)
507 gmap_flush_tlb(gmap);
508 }
8ecb1a59 509 rcu_read_unlock();
1e133ab2
MS
510}
511
0959e168
JF
512static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
513 unsigned long gaddr);
514
1e133ab2 515/**
2e827528 516 * __gmap_link - set up shadow page tables to connect a host to a guest address
1e133ab2
MS
517 * @gmap: pointer to guest mapping meta data structure
518 * @gaddr: guest address
519 * @vmaddr: vm address
520 *
521 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
522 * if the vm address is already mapped to a different guest segment.
c1e8d7c6 523 * The mmap_lock of the mm that belongs to the address space must be held
1e133ab2
MS
524 * when this function gets called.
525 */
526int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
527{
528 struct mm_struct *mm;
529 unsigned long *table;
530 spinlock_t *ptl;
531 pgd_t *pgd;
1aea9b3f 532 p4d_t *p4d;
1e133ab2
MS
533 pud_t *pud;
534 pmd_t *pmd;
0959e168 535 u64 unprot;
1e133ab2
MS
536 int rc;
537
4be130a0 538 BUG_ON(gmap_is_shadow(gmap));
1e133ab2
MS
539 /* Create higher level tables in the gmap page table */
540 table = gmap->table;
541 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
f1c1174f 542 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
1e133ab2
MS
543 if ((*table & _REGION_ENTRY_INVALID) &&
544 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
f1c1174f 545 gaddr & _REGION1_MASK))
1e133ab2 546 return -ENOMEM;
079f0c21 547 table = __va(*table & _REGION_ENTRY_ORIGIN);
1e133ab2
MS
548 }
549 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
f1c1174f 550 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
1e133ab2
MS
551 if ((*table & _REGION_ENTRY_INVALID) &&
552 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
f1c1174f 553 gaddr & _REGION2_MASK))
1e133ab2 554 return -ENOMEM;
079f0c21 555 table = __va(*table & _REGION_ENTRY_ORIGIN);
1e133ab2
MS
556 }
557 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
f1c1174f 558 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
1e133ab2
MS
559 if ((*table & _REGION_ENTRY_INVALID) &&
560 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
f1c1174f 561 gaddr & _REGION3_MASK))
1e133ab2 562 return -ENOMEM;
079f0c21 563 table = __va(*table & _REGION_ENTRY_ORIGIN);
1e133ab2 564 }
f1c1174f 565 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
1e133ab2
MS
566 /* Walk the parent mm page table */
567 mm = gmap->mm;
568 pgd = pgd_offset(mm, vmaddr);
569 VM_BUG_ON(pgd_none(*pgd));
1aea9b3f
MS
570 p4d = p4d_offset(pgd, vmaddr);
571 VM_BUG_ON(p4d_none(*p4d));
572 pud = pud_offset(p4d, vmaddr);
1e133ab2 573 VM_BUG_ON(pud_none(*pud));
d08de8e2 574 /* large puds cannot yet be handled */
0a845e0f 575 if (pud_leaf(*pud))
d08de8e2 576 return -EFAULT;
1e133ab2
MS
577 pmd = pmd_offset(pud, vmaddr);
578 VM_BUG_ON(pmd_none(*pmd));
a9e00d83 579 /* Are we allowed to use huge pages? */
2f709f7b 580 if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
1e133ab2
MS
581 return -EFAULT;
582 /* Link gmap segment table entry location to page table. */
0cd2a787 583 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
1e133ab2
MS
584 if (rc)
585 return rc;
586 ptl = pmd_lock(mm, pmd);
587 spin_lock(&gmap->guest_table_lock);
54397bb0 588 if (*table == _SEGMENT_ENTRY_EMPTY) {
1e133ab2 589 rc = radix_tree_insert(&gmap->host_to_guest,
37d1b5d8
CI
590 vmaddr >> PMD_SHIFT,
591 (void *)MAKE_VALID_GADDR(gaddr));
58b7e200 592 if (!rc) {
2f709f7b 593 if (pmd_leaf(*pmd)) {
0959e168
JF
594 *table = (pmd_val(*pmd) &
595 _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
03e6db16
GS
596 | _SEGMENT_ENTRY_GMAP_UC
597 | _SEGMENT_ENTRY;
58b7e200
JF
598 } else
599 *table = pmd_val(*pmd) &
600 _SEGMENT_ENTRY_HARDWARE_BITS;
601 }
0959e168
JF
602 } else if (*table & _SEGMENT_ENTRY_PROTECT &&
603 !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
604 unprot = (u64)*table;
605 unprot &= ~_SEGMENT_ENTRY_PROTECT;
606 unprot |= _SEGMENT_ENTRY_GMAP_UC;
607 gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
58b7e200 608 }
1e133ab2
MS
609 spin_unlock(&gmap->guest_table_lock);
610 spin_unlock(ptl);
611 radix_tree_preload_end();
612 return rc;
613}
3762e905 614EXPORT_SYMBOL(__gmap_link);
1e133ab2 615
1e133ab2 616/*
c1e8d7c6 617 * this function is assumed to be called with mmap_lock held
1e133ab2
MS
618 */
619void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
620{
2d8fb8f3 621 struct vm_area_struct *vma;
1e133ab2
MS
622 unsigned long vmaddr;
623 spinlock_t *ptl;
624 pte_t *ptep;
625
626 /* Find the vm address for the guest address */
627 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
628 gaddr >> PMD_SHIFT);
629 if (vmaddr) {
630 vmaddr |= gaddr & ~PMD_MASK;
2d8fb8f3
DH
631
632 vma = vma_lookup(gmap->mm, vmaddr);
633 if (!vma || is_vm_hugetlb_page(vma))
634 return;
635
1e133ab2
MS
636 /* Get pointer to the page table entry */
637 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
b159f94c 638 if (likely(ptep)) {
1e133ab2 639 ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
b159f94c
DH
640 pte_unmap_unlock(ptep, ptl);
641 }
1e133ab2
MS
642 }
643}
644EXPORT_SYMBOL_GPL(__gmap_zap);
645
646void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
647{
648 unsigned long gaddr, vmaddr, size;
649 struct vm_area_struct *vma;
650
d8ed45c5 651 mmap_read_lock(gmap->mm);
1e133ab2
MS
652 for (gaddr = from; gaddr < to;
653 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
654 /* Find the vm address for the guest address */
655 vmaddr = (unsigned long)
656 radix_tree_lookup(&gmap->guest_to_host,
657 gaddr >> PMD_SHIFT);
658 if (!vmaddr)
659 continue;
660 vmaddr |= gaddr & ~PMD_MASK;
661 /* Find vma in the parent mm */
662 vma = find_vma(gmap->mm, vmaddr);
1843abd0
JF
663 if (!vma)
664 continue;
7d735b9a
DD
665 /*
666 * We do not discard pages that are backed by
667 * hugetlbfs, so we don't have to refault them.
668 */
1843abd0 669 if (is_vm_hugetlb_page(vma))
7d735b9a 670 continue;
1e133ab2 671 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
e9adcfec 672 zap_page_range_single(vma, vmaddr, size, NULL);
1e133ab2 673 }
d8ed45c5 674 mmap_read_unlock(gmap->mm);
1e133ab2
MS
675}
676EXPORT_SYMBOL_GPL(gmap_discard);
677
678static LIST_HEAD(gmap_notifier_list);
679static DEFINE_SPINLOCK(gmap_notifier_lock);
680
681/**
b2d73b2a 682 * gmap_register_pte_notifier - register a pte invalidation callback
1e133ab2
MS
683 * @nb: pointer to the gmap notifier block
684 */
b2d73b2a 685void gmap_register_pte_notifier(struct gmap_notifier *nb)
1e133ab2
MS
686{
687 spin_lock(&gmap_notifier_lock);
8ecb1a59 688 list_add_rcu(&nb->list, &gmap_notifier_list);
1e133ab2
MS
689 spin_unlock(&gmap_notifier_lock);
690}
b2d73b2a 691EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
1e133ab2
MS
692
693/**
b2d73b2a 694 * gmap_unregister_pte_notifier - remove a pte invalidation callback
1e133ab2
MS
695 * @nb: pointer to the gmap notifier block
696 */
b2d73b2a 697void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
1e133ab2
MS
698{
699 spin_lock(&gmap_notifier_lock);
8ecb1a59 700 list_del_rcu(&nb->list);
1e133ab2 701 spin_unlock(&gmap_notifier_lock);
8ecb1a59 702 synchronize_rcu();
1e133ab2 703}
b2d73b2a 704EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
1e133ab2 705
414d3b07
MS
706/**
707 * gmap_call_notifier - call all registered invalidation callbacks
708 * @gmap: pointer to guest mapping meta data structure
709 * @start: start virtual address in the guest address space
710 * @end: end virtual address in the guest address space
711 */
712static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
713 unsigned long end)
714{
715 struct gmap_notifier *nb;
716
717 list_for_each_entry(nb, &gmap_notifier_list, list)
718 nb->notifier_call(gmap, start, end);
719}
720
1e133ab2 721/**
b2d73b2a
MS
722 * gmap_table_walk - walk the gmap page tables
723 * @gmap: pointer to guest mapping meta data structure
724 * @gaddr: virtual address in the guest address space
4be130a0
MS
725 * @level: page table level to stop at
726 *
727 * Returns a table entry pointer for the given guest address and @level
728 * @level=0 : returns a pointer to a page table table entry (or NULL)
729 * @level=1 : returns a pointer to a segment table entry (or NULL)
730 * @level=2 : returns a pointer to a region-3 table entry (or NULL)
731 * @level=3 : returns a pointer to a region-2 table entry (or NULL)
732 * @level=4 : returns a pointer to a region-1 table entry (or NULL)
733 *
734 * Returns NULL if the gmap page tables could not be walked to the
735 * requested level.
b2d73b2a 736 *
4be130a0 737 * Note: Can also be called for shadow gmaps.
b2d73b2a 738 */
43656f77 739unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)
b2d73b2a 740{
a1d032a4 741 const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
62cf666e 742 unsigned long *table = gmap->table;
b2d73b2a 743
4be130a0
MS
744 if (gmap_is_shadow(gmap) && gmap->removed)
745 return NULL;
a1d032a4 746
62cf666e
DH
747 if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
748 return NULL;
749
a1d032a4
DH
750 if (asce_type != _ASCE_TYPE_REGION1 &&
751 gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
4be130a0 752 return NULL;
a1d032a4 753
62cf666e 754 switch (asce_type) {
b2d73b2a 755 case _ASCE_TYPE_REGION1:
f1c1174f 756 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
4be130a0
MS
757 if (level == 4)
758 break;
b2d73b2a
MS
759 if (*table & _REGION_ENTRY_INVALID)
760 return NULL;
079f0c21 761 table = __va(*table & _REGION_ENTRY_ORIGIN);
3b684a42 762 fallthrough;
b2d73b2a 763 case _ASCE_TYPE_REGION2:
f1c1174f 764 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
4be130a0
MS
765 if (level == 3)
766 break;
b2d73b2a
MS
767 if (*table & _REGION_ENTRY_INVALID)
768 return NULL;
079f0c21 769 table = __va(*table & _REGION_ENTRY_ORIGIN);
3b684a42 770 fallthrough;
b2d73b2a 771 case _ASCE_TYPE_REGION3:
f1c1174f 772 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
4be130a0
MS
773 if (level == 2)
774 break;
b2d73b2a
MS
775 if (*table & _REGION_ENTRY_INVALID)
776 return NULL;
079f0c21 777 table = __va(*table & _REGION_ENTRY_ORIGIN);
3b684a42 778 fallthrough;
b2d73b2a 779 case _ASCE_TYPE_SEGMENT:
f1c1174f 780 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
4be130a0
MS
781 if (level == 1)
782 break;
783 if (*table & _REGION_ENTRY_INVALID)
784 return NULL;
079f0c21 785 table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
6febe0ef 786 table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT;
b2d73b2a
MS
787 }
788 return table;
789}
43656f77 790EXPORT_SYMBOL(gmap_table_walk);
b2d73b2a
MS
791
792/**
793 * gmap_pte_op_walk - walk the gmap page table, get the page table lock
794 * and return the pte pointer
795 * @gmap: pointer to guest mapping meta data structure
796 * @gaddr: virtual address in the guest address space
797 * @ptl: pointer to the spinlock pointer
798 *
799 * Returns a pointer to the locked pte for a guest address, or NULL
800 */
801static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
802 spinlock_t **ptl)
803{
804 unsigned long *table;
805
96965941 806 BUG_ON(gmap_is_shadow(gmap));
b2d73b2a 807 /* Walk the gmap page table, lock and get pte pointer */
4be130a0 808 table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
96965941 809 if (!table || *table & _SEGMENT_ENTRY_INVALID)
b2d73b2a
MS
810 return NULL;
811 return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
812}
813
814/**
815 * gmap_pte_op_fixup - force a page in and connect the gmap page table
816 * @gmap: pointer to guest mapping meta data structure
817 * @gaddr: virtual address in the guest address space
818 * @vmaddr: address in the host process address space
01f71917 819 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
b2d73b2a
MS
820 *
821 * Returns 0 if the caller can retry __gmap_translate (might fail again),
822 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
823 * up or connecting the gmap page table.
824 */
825static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
01f71917 826 unsigned long vmaddr, int prot)
b2d73b2a
MS
827{
828 struct mm_struct *mm = gmap->mm;
01f71917 829 unsigned int fault_flags;
b2d73b2a
MS
830 bool unlocked = false;
831
4be130a0 832 BUG_ON(gmap_is_shadow(gmap));
01f71917 833 fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
64019a2e 834 if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
b2d73b2a
MS
835 return -EFAULT;
836 if (unlocked)
c1e8d7c6 837 /* lost mmap_lock, caller has to retry __gmap_translate */
b2d73b2a
MS
838 return 0;
839 /* Connect the page tables */
840 return __gmap_link(gmap, gaddr, vmaddr);
1e133ab2 841}
1e133ab2
MS
842
843/**
b2d73b2a 844 * gmap_pte_op_end - release the page table lock
b2f58941
HD
845 * @ptep: pointer to the locked pte
846 * @ptl: pointer to the page table spinlock
b2d73b2a 847 */
b2f58941 848static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
b2d73b2a 849{
b2f58941 850 pte_unmap_unlock(ptep, ptl);
5a045bb9
JF
851}
852
853/**
854 * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
855 * and return the pmd pointer
856 * @gmap: pointer to guest mapping meta data structure
857 * @gaddr: virtual address in the guest address space
858 *
859 * Returns a pointer to the pmd for a guest address, or NULL
860 */
861static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
862{
863 pmd_t *pmdp;
864
865 BUG_ON(gmap_is_shadow(gmap));
5a045bb9 866 pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
af4bf6c3
DH
867 if (!pmdp)
868 return NULL;
5a045bb9 869
af4bf6c3
DH
870 /* without huge pages, there is no need to take the table lock */
871 if (!gmap->mm->context.allow_gmap_hpage_1m)
872 return pmd_none(*pmdp) ? NULL : pmdp;
873
874 spin_lock(&gmap->guest_table_lock);
875 if (pmd_none(*pmdp)) {
5a045bb9
JF
876 spin_unlock(&gmap->guest_table_lock);
877 return NULL;
878 }
879
880 /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
2f709f7b 881 if (!pmd_leaf(*pmdp))
5a045bb9
JF
882 spin_unlock(&gmap->guest_table_lock);
883 return pmdp;
884}
885
886/**
887 * gmap_pmd_op_end - release the guest_table_lock if needed
888 * @gmap: pointer to the guest mapping meta data structure
889 * @pmdp: pointer to the pmd
890 */
891static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
892{
2f709f7b 893 if (pmd_leaf(*pmdp))
5a045bb9
JF
894 spin_unlock(&gmap->guest_table_lock);
895}
896
7c4b13a7
JF
897/*
898 * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
899 * @pmdp: pointer to the pmd to be protected
900 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
901 * @bits: notification bits to set
902 *
903 * Returns:
904 * 0 if successfully protected
905 * -EAGAIN if a fixup is needed
906 * -EINVAL if unsupported notifier bits have been specified
907 *
c1e8d7c6 908 * Expected to be called with sg->mm->mmap_lock in read and
7c4b13a7
JF
909 * guest_table_lock held.
910 */
911static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
912 pmd_t *pmdp, int prot, unsigned long bits)
913{
914 int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
915 int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
0959e168 916 pmd_t new = *pmdp;
7c4b13a7
JF
917
918 /* Fixup needed */
919 if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
920 return -EAGAIN;
921
0959e168 922 if (prot == PROT_NONE && !pmd_i) {
e1fc74ff 923 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
0959e168
JF
924 gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
925 }
926
927 if (prot == PROT_READ && !pmd_p) {
e1fc74ff
HC
928 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
929 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT));
0959e168
JF
930 gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
931 }
932
7c4b13a7 933 if (bits & GMAP_NOTIFY_MPROT)
b8e3b379 934 set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
7c4b13a7
JF
935
936 /* Shadow GMAP protection needs split PMDs */
937 if (bits & GMAP_NOTIFY_SHADOW)
938 return -EINVAL;
939
940 return 0;
941}
942
5a045bb9
JF
943/*
944 * gmap_protect_pte - remove access rights to memory and set pgste bits
945 * @gmap: pointer to guest mapping meta data structure
946 * @gaddr: virtual address in the guest address space
947 * @pmdp: pointer to the pmd associated with the pte
948 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
2c46e974 949 * @bits: notification bits to set
5a045bb9
JF
950 *
951 * Returns 0 if successfully protected, -ENOMEM if out of memory and
952 * -EAGAIN if a fixup is needed.
953 *
c1e8d7c6 954 * Expected to be called with sg->mm->mmap_lock in read
5a045bb9
JF
955 */
956static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
957 pmd_t *pmdp, int prot, unsigned long bits)
958{
959 int rc;
960 pte_t *ptep;
b2f58941 961 spinlock_t *ptl;
2c46e974 962 unsigned long pbits = 0;
5a045bb9
JF
963
964 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
965 return -EAGAIN;
966
967 ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
968 if (!ptep)
969 return -ENOMEM;
970
2c46e974
JF
971 pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
972 pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
5a045bb9 973 /* Protect and unlock. */
2c46e974 974 rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
b2f58941 975 gmap_pte_op_end(ptep, ptl);
5a045bb9 976 return rc;
b2d73b2a
MS
977}
978
4be130a0
MS
979/*
980 * gmap_protect_range - remove access rights to memory and set pgste bits
1e133ab2
MS
981 * @gmap: pointer to guest mapping meta data structure
982 * @gaddr: virtual address in the guest address space
983 * @len: size of area
4be130a0
MS
984 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
985 * @bits: pgste notification bits to set
986 *
c9f721ed
CI
987 * Returns:
988 * PAGE_SIZE if a small page was successfully protected;
989 * HPAGE_SIZE if a large page was successfully protected;
990 * -ENOMEM if out of memory;
991 * -EFAULT if gaddr is invalid (or mapping for shadows is missing);
992 * -EAGAIN if the guest mapping is missing and should be fixed by the caller.
4be130a0 993 *
c9f721ed 994 * Context: Called with sg->mm->mmap_lock in read.
1e133ab2 995 */
c9f721ed 996int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)
1e133ab2 997{
5a045bb9 998 pmd_t *pmdp;
c9f721ed 999 int rc = 0;
4be130a0 1000
96965941 1001 BUG_ON(gmap_is_shadow(gmap));
7c4b13a7 1002
c9f721ed
CI
1003 pmdp = gmap_pmd_op_walk(gmap, gaddr);
1004 if (!pmdp)
1005 return -EAGAIN;
1e133ab2 1006
c9f721ed
CI
1007 if (!pmd_leaf(*pmdp)) {
1008 rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);
1009 if (!rc)
1010 rc = PAGE_SIZE;
1011 } else {
1012 rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);
1013 if (!rc)
1014 rc = HPAGE_SIZE;
1015 }
1016 gmap_pmd_op_end(gmap, pmdp);
1e133ab2 1017
4be130a0
MS
1018 return rc;
1019}
c9f721ed 1020EXPORT_SYMBOL_GPL(gmap_protect_one);
4be130a0
MS
1021
1022/**
1023 * gmap_read_table - get an unsigned long value from a guest page table using
1024 * absolute addressing, without marking the page referenced.
1025 * @gmap: pointer to guest mapping meta data structure
1026 * @gaddr: virtual address in the guest address space
1027 * @val: pointer to the unsigned long value to return
1028 *
1029 * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
96965941
DH
1030 * if reading using the virtual address failed. -EINVAL if called on a gmap
1031 * shadow.
4be130a0 1032 *
c1e8d7c6 1033 * Called with gmap->mm->mmap_lock in read.
4be130a0
MS
1034 */
1035int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
1036{
1037 unsigned long address, vmaddr;
1038 spinlock_t *ptl;
1039 pte_t *ptep, pte;
1040 int rc;
1041
96965941
DH
1042 if (gmap_is_shadow(gmap))
1043 return -EINVAL;
1044
4be130a0 1045 while (1) {
b2d73b2a
MS
1046 rc = -EAGAIN;
1047 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
1048 if (ptep) {
4be130a0
MS
1049 pte = *ptep;
1050 if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
1051 address = pte_val(pte) & PAGE_MASK;
1052 address += gaddr & ~PAGE_MASK;
079f0c21 1053 *val = *(unsigned long *)__va(address);
b8e3b379 1054 set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
4be130a0
MS
1055 /* Do *NOT* clear the _PAGE_INVALID bit! */
1056 rc = 0;
1057 }
b2f58941 1058 gmap_pte_op_end(ptep, ptl);
1e133ab2 1059 }
4be130a0
MS
1060 if (!rc)
1061 break;
1062 vmaddr = __gmap_translate(gmap, gaddr);
1063 if (IS_ERR_VALUE(vmaddr)) {
1064 rc = vmaddr;
1e133ab2
MS
1065 break;
1066 }
01f71917 1067 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);
1e133ab2
MS
1068 if (rc)
1069 break;
1e133ab2 1070 }
1e133ab2
MS
1071 return rc;
1072}
4be130a0 1073EXPORT_SYMBOL_GPL(gmap_read_table);
1e133ab2
MS
1074
1075/**
4be130a0
MS
1076 * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
1077 * @sg: pointer to the shadow guest address space structure
1078 * @vmaddr: vm address associated with the rmap
1079 * @rmap: pointer to the rmap structure
1e133ab2 1080 *
4be130a0 1081 * Called with the sg->guest_table_lock
1e133ab2 1082 */
4be130a0
MS
1083static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
1084 struct gmap_rmap *rmap)
1e133ab2 1085{
a06afe83 1086 struct gmap_rmap *temp;
d12a3d60 1087 void __rcu **slot;
1e133ab2 1088
4be130a0
MS
1089 BUG_ON(!gmap_is_shadow(sg));
1090 slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
1091 if (slot) {
1092 rmap->next = radix_tree_deref_slot_protected(slot,
1093 &sg->guest_table_lock);
a06afe83
CB
1094 for (temp = rmap->next; temp; temp = temp->next) {
1095 if (temp->raddr == rmap->raddr) {
1096 kfree(rmap);
1097 return;
1098 }
1099 }
6d75f366 1100 radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
4be130a0
MS
1101 } else {
1102 rmap->next = NULL;
1103 radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
1104 rmap);
1105 }
1106}
1107
1108/**
5c528db0 1109 * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
4be130a0
MS
1110 * @sg: pointer to the shadow guest address space structure
1111 * @raddr: rmap address in the shadow gmap
1112 * @paddr: address in the parent guest address space
1113 * @len: length of the memory area to protect
4be130a0
MS
1114 *
1115 * Returns 0 if successfully protected and the rmap was created, -ENOMEM
1116 * if out of memory and -EFAULT if paddr is invalid.
1117 */
1118static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
5c528db0 1119 unsigned long paddr, unsigned long len)
4be130a0
MS
1120{
1121 struct gmap *parent;
1122 struct gmap_rmap *rmap;
1123 unsigned long vmaddr;
1124 spinlock_t *ptl;
1125 pte_t *ptep;
1126 int rc;
1127
1128 BUG_ON(!gmap_is_shadow(sg));
1129 parent = sg->parent;
1130 while (len) {
1131 vmaddr = __gmap_translate(parent, paddr);
1132 if (IS_ERR_VALUE(vmaddr))
1133 return vmaddr;
0cd2a787 1134 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
4be130a0
MS
1135 if (!rmap)
1136 return -ENOMEM;
1137 rmap->raddr = raddr;
0cd2a787 1138 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
b2d73b2a 1139 if (rc) {
4be130a0
MS
1140 kfree(rmap);
1141 return rc;
1142 }
1143 rc = -EAGAIN;
1144 ptep = gmap_pte_op_walk(parent, paddr, &ptl);
1145 if (ptep) {
1146 spin_lock(&sg->guest_table_lock);
5c528db0 1147 rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
4be130a0
MS
1148 PGSTE_VSIE_BIT);
1149 if (!rc)
1150 gmap_insert_rmap(sg, vmaddr, rmap);
1151 spin_unlock(&sg->guest_table_lock);
b2f58941 1152 gmap_pte_op_end(ptep, ptl);
4be130a0
MS
1153 }
1154 radix_tree_preload_end();
1155 if (rc) {
1156 kfree(rmap);
5c528db0 1157 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
b2d73b2a 1158 if (rc)
4be130a0 1159 return rc;
1e133ab2 1160 continue;
1e133ab2 1161 }
4be130a0 1162 paddr += PAGE_SIZE;
b2d73b2a 1163 len -= PAGE_SIZE;
1e133ab2 1164 }
4be130a0
MS
1165 return 0;
1166}
1167
1168#define _SHADOW_RMAP_MASK 0x7
1169#define _SHADOW_RMAP_REGION1 0x5
1170#define _SHADOW_RMAP_REGION2 0x4
1171#define _SHADOW_RMAP_REGION3 0x3
1172#define _SHADOW_RMAP_SEGMENT 0x2
1173#define _SHADOW_RMAP_PGTABLE 0x1
1174
1175/**
1176 * gmap_idte_one - invalidate a single region or segment table entry
1177 * @asce: region or segment table *origin* + table-type bits
1178 * @vaddr: virtual address to identify the table entry to flush
1179 *
1180 * The invalid bit of a single region or segment table entry is set
1181 * and the associated TLB entries depending on the entry are flushed.
1182 * The table-type of the @asce identifies the portion of the @vaddr
1183 * that is used as the invalidation index.
1184 */
1185static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
1186{
1187 asm volatile(
731efc96 1188 " idte %0,0,%1"
4be130a0
MS
1189 : : "a" (asce), "a" (vaddr) : "cc", "memory");
1190}
1191
1192/**
1193 * gmap_unshadow_page - remove a page from a shadow page table
1194 * @sg: pointer to the shadow guest address space structure
1195 * @raddr: rmap address in the shadow guest address space
1196 *
1197 * Called with the sg->guest_table_lock
1198 */
1199static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
1200{
1201 unsigned long *table;
1202
1203 BUG_ON(!gmap_is_shadow(sg));
1204 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
1205 if (!table || *table & _PAGE_INVALID)
1206 return;
6febe0ef 1207 gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1);
4be130a0
MS
1208 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
1209}
1210
1211/**
1212 * __gmap_unshadow_pgt - remove all entries from a shadow page table
1213 * @sg: pointer to the shadow guest address space structure
1214 * @raddr: rmap address in the shadow guest address space
1215 * @pgt: pointer to the start of a shadow page table
1216 *
1217 * Called with the sg->guest_table_lock
1218 */
1219static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
1220 unsigned long *pgt)
1221{
1222 int i;
1223
1224 BUG_ON(!gmap_is_shadow(sg));
6febe0ef 1225 for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE)
4be130a0
MS
1226 pgt[i] = _PAGE_INVALID;
1227}
1228
1229/**
1230 * gmap_unshadow_pgt - remove a shadow page table from a segment entry
1231 * @sg: pointer to the shadow guest address space structure
1232 * @raddr: address in the shadow guest address space
1233 *
1234 * Called with the sg->guest_table_lock
1235 */
1236static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
1237{
079f0c21
NB
1238 unsigned long *ste;
1239 phys_addr_t sto, pgt;
57b77b75 1240 struct ptdesc *ptdesc;
4be130a0
MS
1241
1242 BUG_ON(!gmap_is_shadow(sg));
1243 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
998f637c 1244 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
4be130a0 1245 return;
f1c1174f 1246 gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
079f0c21 1247 sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
4be130a0 1248 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
079f0c21 1249 pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
4be130a0 1250 *ste = _SEGMENT_ENTRY_EMPTY;
079f0c21 1251 __gmap_unshadow_pgt(sg, raddr, __va(pgt));
4be130a0 1252 /* Free page table */
57b77b75 1253 ptdesc = page_ptdesc(phys_to_page(pgt));
57b77b75 1254 page_table_free_pgste(ptdesc);
4be130a0
MS
1255}
1256
1257/**
1258 * __gmap_unshadow_sgt - remove all entries from a shadow segment table
1259 * @sg: pointer to the shadow guest address space structure
1260 * @raddr: rmap address in the shadow guest address space
1261 * @sgt: pointer to the start of a shadow segment table
1262 *
1263 * Called with the sg->guest_table_lock
1264 */
1265static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
1266 unsigned long *sgt)
1267{
57b77b75 1268 struct ptdesc *ptdesc;
079f0c21 1269 phys_addr_t pgt;
4be130a0
MS
1270 int i;
1271
1272 BUG_ON(!gmap_is_shadow(sg));
f1c1174f 1273 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
998f637c 1274 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
4be130a0 1275 continue;
079f0c21 1276 pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
4be130a0 1277 sgt[i] = _SEGMENT_ENTRY_EMPTY;
079f0c21 1278 __gmap_unshadow_pgt(sg, raddr, __va(pgt));
4be130a0 1279 /* Free page table */
57b77b75 1280 ptdesc = page_ptdesc(phys_to_page(pgt));
57b77b75 1281 page_table_free_pgste(ptdesc);
4be130a0
MS
1282 }
1283}
1284
1285/**
1286 * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
1287 * @sg: pointer to the shadow guest address space structure
1288 * @raddr: rmap address in the shadow guest address space
1289 *
1290 * Called with the shadow->guest_table_lock
1291 */
1292static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
1293{
079f0c21
NB
1294 unsigned long r3o, *r3e;
1295 phys_addr_t sgt;
4be130a0
MS
1296 struct page *page;
1297
1298 BUG_ON(!gmap_is_shadow(sg));
1299 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
998f637c 1300 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
4be130a0 1301 return;
f1c1174f
HC
1302 gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
1303 r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
079f0c21
NB
1304 gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
1305 sgt = *r3e & _REGION_ENTRY_ORIGIN;
4be130a0 1306 *r3e = _REGION3_ENTRY_EMPTY;
079f0c21 1307 __gmap_unshadow_sgt(sg, raddr, __va(sgt));
4be130a0 1308 /* Free segment table */
079f0c21 1309 page = phys_to_page(sgt);
f1c1174f 1310 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1311}
1312
1313/**
1314 * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
1315 * @sg: pointer to the shadow guest address space structure
1316 * @raddr: address in the shadow guest address space
1317 * @r3t: pointer to the start of a shadow region-3 table
1318 *
1319 * Called with the sg->guest_table_lock
1320 */
1321static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
1322 unsigned long *r3t)
1323{
4be130a0 1324 struct page *page;
079f0c21 1325 phys_addr_t sgt;
4be130a0
MS
1326 int i;
1327
1328 BUG_ON(!gmap_is_shadow(sg));
f1c1174f 1329 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
998f637c 1330 if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
4be130a0 1331 continue;
079f0c21 1332 sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
4be130a0 1333 r3t[i] = _REGION3_ENTRY_EMPTY;
079f0c21 1334 __gmap_unshadow_sgt(sg, raddr, __va(sgt));
4be130a0 1335 /* Free segment table */
079f0c21 1336 page = phys_to_page(sgt);
f1c1174f 1337 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1338 }
1339}
1340
1341/**
1342 * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
1343 * @sg: pointer to the shadow guest address space structure
1344 * @raddr: rmap address in the shadow guest address space
1345 *
1346 * Called with the sg->guest_table_lock
1347 */
1348static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
1349{
079f0c21
NB
1350 unsigned long r2o, *r2e;
1351 phys_addr_t r3t;
4be130a0
MS
1352 struct page *page;
1353
1354 BUG_ON(!gmap_is_shadow(sg));
1355 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
998f637c 1356 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
4be130a0 1357 return;
f1c1174f
HC
1358 gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
1359 r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
079f0c21
NB
1360 gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
1361 r3t = *r2e & _REGION_ENTRY_ORIGIN;
4be130a0 1362 *r2e = _REGION2_ENTRY_EMPTY;
079f0c21 1363 __gmap_unshadow_r3t(sg, raddr, __va(r3t));
4be130a0 1364 /* Free region 3 table */
079f0c21 1365 page = phys_to_page(r3t);
f1c1174f 1366 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1367}
1368
1369/**
1370 * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
1371 * @sg: pointer to the shadow guest address space structure
1372 * @raddr: rmap address in the shadow guest address space
1373 * @r2t: pointer to the start of a shadow region-2 table
1374 *
1375 * Called with the sg->guest_table_lock
1376 */
1377static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
1378 unsigned long *r2t)
1379{
079f0c21 1380 phys_addr_t r3t;
4be130a0
MS
1381 struct page *page;
1382 int i;
1383
1384 BUG_ON(!gmap_is_shadow(sg));
f1c1174f 1385 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
998f637c 1386 if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
4be130a0 1387 continue;
079f0c21 1388 r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
4be130a0 1389 r2t[i] = _REGION2_ENTRY_EMPTY;
079f0c21 1390 __gmap_unshadow_r3t(sg, raddr, __va(r3t));
4be130a0 1391 /* Free region 3 table */
079f0c21 1392 page = phys_to_page(r3t);
f1c1174f 1393 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1394 }
1395}
1396
1397/**
1398 * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
1399 * @sg: pointer to the shadow guest address space structure
1400 * @raddr: rmap address in the shadow guest address space
1401 *
1402 * Called with the sg->guest_table_lock
1403 */
1404static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
1405{
079f0c21 1406 unsigned long r1o, *r1e;
4be130a0 1407 struct page *page;
079f0c21 1408 phys_addr_t r2t;
4be130a0
MS
1409
1410 BUG_ON(!gmap_is_shadow(sg));
1411 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
998f637c 1412 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
4be130a0 1413 return;
f1c1174f
HC
1414 gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
1415 r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
079f0c21
NB
1416 gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
1417 r2t = *r1e & _REGION_ENTRY_ORIGIN;
4be130a0 1418 *r1e = _REGION1_ENTRY_EMPTY;
079f0c21 1419 __gmap_unshadow_r2t(sg, raddr, __va(r2t));
4be130a0 1420 /* Free region 2 table */
079f0c21 1421 page = phys_to_page(r2t);
f1c1174f 1422 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1423}
1424
1425/**
1426 * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
1427 * @sg: pointer to the shadow guest address space structure
1428 * @raddr: rmap address in the shadow guest address space
1429 * @r1t: pointer to the start of a shadow region-1 table
1430 *
1431 * Called with the shadow->guest_table_lock
1432 */
1433static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
1434 unsigned long *r1t)
1435{
079f0c21 1436 unsigned long asce;
4be130a0 1437 struct page *page;
079f0c21 1438 phys_addr_t r2t;
4be130a0
MS
1439 int i;
1440
1441 BUG_ON(!gmap_is_shadow(sg));
079f0c21 1442 asce = __pa(r1t) | _ASCE_TYPE_REGION1;
f1c1174f 1443 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
998f637c 1444 if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
4be130a0 1445 continue;
079f0c21
NB
1446 r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
1447 __gmap_unshadow_r2t(sg, raddr, __va(r2t));
4be130a0
MS
1448 /* Clear entry and flush translation r1t -> r2t */
1449 gmap_idte_one(asce, raddr);
1450 r1t[i] = _REGION1_ENTRY_EMPTY;
1451 /* Free region 2 table */
079f0c21 1452 page = phys_to_page(r2t);
f1c1174f 1453 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1454 }
1455}
1456
1457/**
1458 * gmap_unshadow - remove a shadow page table completely
1459 * @sg: pointer to the shadow guest address space structure
1460 *
1461 * Called with sg->guest_table_lock
1462 */
c9f721ed 1463void gmap_unshadow(struct gmap *sg)
4be130a0
MS
1464{
1465 unsigned long *table;
1466
1467 BUG_ON(!gmap_is_shadow(sg));
1468 if (sg->removed)
1469 return;
1470 sg->removed = 1;
1471 gmap_call_notifier(sg, 0, -1UL);
eea3678d 1472 gmap_flush_tlb(sg);
079f0c21 1473 table = __va(sg->asce & _ASCE_ORIGIN);
4be130a0
MS
1474 switch (sg->asce & _ASCE_TYPE_MASK) {
1475 case _ASCE_TYPE_REGION1:
1476 __gmap_unshadow_r1t(sg, 0, table);
1477 break;
1478 case _ASCE_TYPE_REGION2:
1479 __gmap_unshadow_r2t(sg, 0, table);
1480 break;
1481 case _ASCE_TYPE_REGION3:
1482 __gmap_unshadow_r3t(sg, 0, table);
1483 break;
1484 case _ASCE_TYPE_SEGMENT:
1485 __gmap_unshadow_sgt(sg, 0, table);
1486 break;
1487 }
1488}
c9f721ed 1489EXPORT_SYMBOL(gmap_unshadow);
4be130a0
MS
1490
1491/**
1492 * gmap_shadow_r2t - create an empty shadow region 2 table
1493 * @sg: pointer to the shadow guest address space structure
1494 * @saddr: faulting address in the shadow gmap
1495 * @r2t: parent gmap address of the region 2 table to get shadowed
3218f709 1496 * @fake: r2t references contiguous guest memory block, not a r2t
4be130a0
MS
1497 *
1498 * The r2t parameter specifies the address of the source table. The
1499 * four pages of the source table are made read-only in the parent gmap
1500 * address space. A write to the source table area @r2t will automatically
cada938a 1501 * remove the shadow r2 table and all of its descendants.
4be130a0
MS
1502 *
1503 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1504 * shadow table structure is incomplete, -ENOMEM if out of memory and
1505 * -EFAULT if an address in the parent gmap could not be resolved.
1506 *
c1e8d7c6 1507 * Called with sg->mm->mmap_lock in read.
4be130a0 1508 */
3218f709
DH
1509int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
1510 int fake)
4be130a0
MS
1511{
1512 unsigned long raddr, origin, offset, len;
079f0c21
NB
1513 unsigned long *table;
1514 phys_addr_t s_r2t;
4be130a0
MS
1515 struct page *page;
1516 int rc;
1517
1518 BUG_ON(!gmap_is_shadow(sg));
1519 /* Allocate a shadow region second table */
1954da4a 1520 page = gmap_alloc_crst();
4be130a0
MS
1521 if (!page)
1522 return -ENOMEM;
079f0c21 1523 s_r2t = page_to_phys(page);
4be130a0
MS
1524 /* Install shadow region second table */
1525 spin_lock(&sg->guest_table_lock);
1526 table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
1527 if (!table) {
1528 rc = -EAGAIN; /* Race with unshadow */
1529 goto out_free;
1530 }
1531 if (!(*table & _REGION_ENTRY_INVALID)) {
1532 rc = 0; /* Already established */
1533 goto out_free;
998f637c
DH
1534 } else if (*table & _REGION_ENTRY_ORIGIN) {
1535 rc = -EAGAIN; /* Race with shadow */
1536 goto out_free;
4be130a0 1537 }
079f0c21 1538 crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
998f637c 1539 /* mark as invalid as long as the parent table is not protected */
079f0c21 1540 *table = s_r2t | _REGION_ENTRY_LENGTH |
998f637c 1541 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
fd8d4e3a
DH
1542 if (sg->edat_level >= 1)
1543 *table |= (r2t & _REGION_ENTRY_PROTECT);
3218f709
DH
1544 if (fake) {
1545 /* nothing to protect for fake tables */
1546 *table &= ~_REGION_ENTRY_INVALID;
1547 spin_unlock(&sg->guest_table_lock);
1548 return 0;
1549 }
4be130a0
MS
1550 spin_unlock(&sg->guest_table_lock);
1551 /* Make r2t read-only in parent gmap page table */
f1c1174f 1552 raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
4be130a0 1553 origin = r2t & _REGION_ENTRY_ORIGIN;
f1c1174f
HC
1554 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1555 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
5c528db0 1556 rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
998f637c
DH
1557 spin_lock(&sg->guest_table_lock);
1558 if (!rc) {
1559 table = gmap_table_walk(sg, saddr, 4);
079f0c21 1560 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
998f637c
DH
1561 rc = -EAGAIN; /* Race with unshadow */
1562 else
1563 *table &= ~_REGION_ENTRY_INVALID;
1564 } else {
4be130a0 1565 gmap_unshadow_r2t(sg, raddr);
4be130a0 1566 }
998f637c 1567 spin_unlock(&sg->guest_table_lock);
4be130a0
MS
1568 return rc;
1569out_free:
1570 spin_unlock(&sg->guest_table_lock);
f1c1174f 1571 __free_pages(page, CRST_ALLOC_ORDER);
1e133ab2
MS
1572 return rc;
1573}
4be130a0
MS
1574EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
1575
1576/**
1577 * gmap_shadow_r3t - create a shadow region 3 table
1578 * @sg: pointer to the shadow guest address space structure
1579 * @saddr: faulting address in the shadow gmap
1580 * @r3t: parent gmap address of the region 3 table to get shadowed
3218f709 1581 * @fake: r3t references contiguous guest memory block, not a r3t
4be130a0
MS
1582 *
1583 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1584 * shadow table structure is incomplete, -ENOMEM if out of memory and
1585 * -EFAULT if an address in the parent gmap could not be resolved.
1586 *
c1e8d7c6 1587 * Called with sg->mm->mmap_lock in read.
4be130a0 1588 */
3218f709
DH
1589int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
1590 int fake)
4be130a0
MS
1591{
1592 unsigned long raddr, origin, offset, len;
079f0c21
NB
1593 unsigned long *table;
1594 phys_addr_t s_r3t;
4be130a0
MS
1595 struct page *page;
1596 int rc;
1597
1598 BUG_ON(!gmap_is_shadow(sg));
1599 /* Allocate a shadow region second table */
1954da4a 1600 page = gmap_alloc_crst();
4be130a0
MS
1601 if (!page)
1602 return -ENOMEM;
079f0c21 1603 s_r3t = page_to_phys(page);
4be130a0
MS
1604 /* Install shadow region second table */
1605 spin_lock(&sg->guest_table_lock);
1606 table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
1607 if (!table) {
1608 rc = -EAGAIN; /* Race with unshadow */
1609 goto out_free;
1610 }
1611 if (!(*table & _REGION_ENTRY_INVALID)) {
1612 rc = 0; /* Already established */
1613 goto out_free;
998f637c
DH
1614 } else if (*table & _REGION_ENTRY_ORIGIN) {
1615 rc = -EAGAIN; /* Race with shadow */
1493e0f9 1616 goto out_free;
4be130a0 1617 }
079f0c21 1618 crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
998f637c 1619 /* mark as invalid as long as the parent table is not protected */
079f0c21 1620 *table = s_r3t | _REGION_ENTRY_LENGTH |
998f637c 1621 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
fd8d4e3a
DH
1622 if (sg->edat_level >= 1)
1623 *table |= (r3t & _REGION_ENTRY_PROTECT);
3218f709
DH
1624 if (fake) {
1625 /* nothing to protect for fake tables */
1626 *table &= ~_REGION_ENTRY_INVALID;
1627 spin_unlock(&sg->guest_table_lock);
1628 return 0;
1629 }
4be130a0
MS
1630 spin_unlock(&sg->guest_table_lock);
1631 /* Make r3t read-only in parent gmap page table */
f1c1174f 1632 raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
4be130a0 1633 origin = r3t & _REGION_ENTRY_ORIGIN;
f1c1174f
HC
1634 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1635 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
5c528db0 1636 rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
998f637c
DH
1637 spin_lock(&sg->guest_table_lock);
1638 if (!rc) {
1639 table = gmap_table_walk(sg, saddr, 3);
079f0c21 1640 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
998f637c
DH
1641 rc = -EAGAIN; /* Race with unshadow */
1642 else
1643 *table &= ~_REGION_ENTRY_INVALID;
1644 } else {
4be130a0 1645 gmap_unshadow_r3t(sg, raddr);
4be130a0 1646 }
998f637c 1647 spin_unlock(&sg->guest_table_lock);
4be130a0
MS
1648 return rc;
1649out_free:
1650 spin_unlock(&sg->guest_table_lock);
f1c1174f 1651 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1652 return rc;
1653}
1654EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
1655
1656/**
1657 * gmap_shadow_sgt - create a shadow segment table
1658 * @sg: pointer to the shadow guest address space structure
1659 * @saddr: faulting address in the shadow gmap
1660 * @sgt: parent gmap address of the segment table to get shadowed
18b89809 1661 * @fake: sgt references contiguous guest memory block, not a sgt
4be130a0
MS
1662 *
1663 * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
1664 * shadow table structure is incomplete, -ENOMEM if out of memory and
1665 * -EFAULT if an address in the parent gmap could not be resolved.
1666 *
c1e8d7c6 1667 * Called with sg->mm->mmap_lock in read.
4be130a0 1668 */
18b89809
DH
1669int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
1670 int fake)
4be130a0
MS
1671{
1672 unsigned long raddr, origin, offset, len;
079f0c21
NB
1673 unsigned long *table;
1674 phys_addr_t s_sgt;
4be130a0
MS
1675 struct page *page;
1676 int rc;
1677
18b89809 1678 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
4be130a0 1679 /* Allocate a shadow segment table */
1954da4a 1680 page = gmap_alloc_crst();
4be130a0
MS
1681 if (!page)
1682 return -ENOMEM;
079f0c21 1683 s_sgt = page_to_phys(page);
4be130a0
MS
1684 /* Install shadow region second table */
1685 spin_lock(&sg->guest_table_lock);
1686 table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
1687 if (!table) {
1688 rc = -EAGAIN; /* Race with unshadow */
1689 goto out_free;
1690 }
1691 if (!(*table & _REGION_ENTRY_INVALID)) {
1692 rc = 0; /* Already established */
1693 goto out_free;
998f637c
DH
1694 } else if (*table & _REGION_ENTRY_ORIGIN) {
1695 rc = -EAGAIN; /* Race with shadow */
1696 goto out_free;
4be130a0 1697 }
079f0c21 1698 crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
998f637c 1699 /* mark as invalid as long as the parent table is not protected */
079f0c21 1700 *table = s_sgt | _REGION_ENTRY_LENGTH |
998f637c 1701 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
fd8d4e3a
DH
1702 if (sg->edat_level >= 1)
1703 *table |= sgt & _REGION_ENTRY_PROTECT;
18b89809
DH
1704 if (fake) {
1705 /* nothing to protect for fake tables */
1706 *table &= ~_REGION_ENTRY_INVALID;
1707 spin_unlock(&sg->guest_table_lock);
1708 return 0;
1709 }
4be130a0
MS
1710 spin_unlock(&sg->guest_table_lock);
1711 /* Make sgt read-only in parent gmap page table */
f1c1174f 1712 raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
4be130a0 1713 origin = sgt & _REGION_ENTRY_ORIGIN;
f1c1174f
HC
1714 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
1715 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
5c528db0 1716 rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
998f637c
DH
1717 spin_lock(&sg->guest_table_lock);
1718 if (!rc) {
1719 table = gmap_table_walk(sg, saddr, 2);
079f0c21 1720 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
998f637c
DH
1721 rc = -EAGAIN; /* Race with unshadow */
1722 else
1723 *table &= ~_REGION_ENTRY_INVALID;
1724 } else {
4be130a0 1725 gmap_unshadow_sgt(sg, raddr);
4be130a0 1726 }
998f637c 1727 spin_unlock(&sg->guest_table_lock);
4be130a0
MS
1728 return rc;
1729out_free:
1730 spin_unlock(&sg->guest_table_lock);
f1c1174f 1731 __free_pages(page, CRST_ALLOC_ORDER);
4be130a0
MS
1732 return rc;
1733}
1734EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
1735
84b73876
CI
1736static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)
1737{
1738 unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));
1739
1740 pgstes += _PAGE_ENTRIES;
1741
1742 pgstes[0] &= ~PGSTE_ST2_MASK;
1743 pgstes[1] &= ~PGSTE_ST2_MASK;
1744 pgstes[2] &= ~PGSTE_ST2_MASK;
1745 pgstes[3] &= ~PGSTE_ST2_MASK;
1746
1747 pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;
1748 pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;
1749 pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;
1750 pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;
1751}
1752
4be130a0
MS
1753/**
1754 * gmap_shadow_pgt - instantiate a shadow page table
1755 * @sg: pointer to the shadow guest address space structure
1756 * @saddr: faulting address in the shadow gmap
1757 * @pgt: parent gmap address of the page table to get shadowed
fd8d4e3a 1758 * @fake: pgt references contiguous guest memory block, not a pgtable
4be130a0
MS
1759 *
1760 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1761 * shadow table structure is incomplete, -ENOMEM if out of memory,
1762 * -EFAULT if an address in the parent gmap could not be resolved and
1763 *
c1e8d7c6 1764 * Called with gmap->mm->mmap_lock in read
4be130a0 1765 */
fd8d4e3a
DH
1766int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
1767 int fake)
4be130a0
MS
1768{
1769 unsigned long raddr, origin;
079f0c21 1770 unsigned long *table;
57b77b75 1771 struct ptdesc *ptdesc;
079f0c21 1772 phys_addr_t s_pgt;
4be130a0
MS
1773 int rc;
1774
fd8d4e3a 1775 BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
4be130a0 1776 /* Allocate a shadow page table */
57b77b75
QZ
1777 ptdesc = page_table_alloc_pgste(sg->mm);
1778 if (!ptdesc)
4be130a0 1779 return -ENOMEM;
84b73876 1780 origin = pgt & _SEGMENT_ENTRY_ORIGIN;
fd8d4e3a 1781 if (fake)
84b73876
CI
1782 origin |= GMAP_SHADOW_FAKE_TABLE;
1783 gmap_pgste_set_pgt_addr(ptdesc, origin);
57b77b75 1784 s_pgt = page_to_phys(ptdesc_page(ptdesc));
4be130a0
MS
1785 /* Install shadow page table */
1786 spin_lock(&sg->guest_table_lock);
1787 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
1788 if (!table) {
1789 rc = -EAGAIN; /* Race with unshadow */
1790 goto out_free;
1791 }
1792 if (!(*table & _SEGMENT_ENTRY_INVALID)) {
1793 rc = 0; /* Already established */
1794 goto out_free;
998f637c
DH
1795 } else if (*table & _SEGMENT_ENTRY_ORIGIN) {
1796 rc = -EAGAIN; /* Race with shadow */
1797 goto out_free;
4be130a0 1798 }
998f637c 1799 /* mark as invalid as long as the parent table is not protected */
4be130a0 1800 *table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
998f637c 1801 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
fd8d4e3a
DH
1802 if (fake) {
1803 /* nothing to protect for fake tables */
1804 *table &= ~_SEGMENT_ENTRY_INVALID;
1805 spin_unlock(&sg->guest_table_lock);
1806 return 0;
1807 }
4be130a0
MS
1808 spin_unlock(&sg->guest_table_lock);
1809 /* Make pgt read-only in parent gmap page table (not the pgste) */
f1c1174f 1810 raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
4be130a0 1811 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
5c528db0 1812 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
998f637c
DH
1813 spin_lock(&sg->guest_table_lock);
1814 if (!rc) {
1815 table = gmap_table_walk(sg, saddr, 1);
079f0c21 1816 if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
998f637c
DH
1817 rc = -EAGAIN; /* Race with unshadow */
1818 else
1819 *table &= ~_SEGMENT_ENTRY_INVALID;
1820 } else {
4be130a0 1821 gmap_unshadow_pgt(sg, raddr);
4be130a0 1822 }
998f637c 1823 spin_unlock(&sg->guest_table_lock);
4be130a0
MS
1824 return rc;
1825out_free:
1826 spin_unlock(&sg->guest_table_lock);
57b77b75 1827 page_table_free_pgste(ptdesc);
4be130a0
MS
1828 return rc;
1829
1830}
1831EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
1832
1833/**
1834 * gmap_shadow_page - create a shadow page mapping
1835 * @sg: pointer to the shadow guest address space structure
1836 * @saddr: faulting address in the shadow gmap
a9d23e71 1837 * @pte: pte in parent gmap address space to get shadowed
4be130a0
MS
1838 *
1839 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1840 * shadow table structure is incomplete, -ENOMEM if out of memory and
1841 * -EFAULT if an address in the parent gmap could not be resolved.
1842 *
c1e8d7c6 1843 * Called with sg->mm->mmap_lock in read.
4be130a0 1844 */
a9d23e71 1845int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
4be130a0
MS
1846{
1847 struct gmap *parent;
1848 struct gmap_rmap *rmap;
a9d23e71 1849 unsigned long vmaddr, paddr;
4be130a0
MS
1850 spinlock_t *ptl;
1851 pte_t *sptep, *tptep;
01f71917 1852 int prot;
4be130a0
MS
1853 int rc;
1854
1855 BUG_ON(!gmap_is_shadow(sg));
1856 parent = sg->parent;
01f71917 1857 prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
4be130a0 1858
0cd2a787 1859 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
4be130a0
MS
1860 if (!rmap)
1861 return -ENOMEM;
1862 rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
1863
1864 while (1) {
a9d23e71 1865 paddr = pte_val(pte) & PAGE_MASK;
4be130a0
MS
1866 vmaddr = __gmap_translate(parent, paddr);
1867 if (IS_ERR_VALUE(vmaddr)) {
1868 rc = vmaddr;
1869 break;
1870 }
0cd2a787 1871 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
4be130a0
MS
1872 if (rc)
1873 break;
1874 rc = -EAGAIN;
1875 sptep = gmap_pte_op_walk(parent, paddr, &ptl);
1876 if (sptep) {
1877 spin_lock(&sg->guest_table_lock);
1878 /* Get page table pointer */
1879 tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
1880 if (!tptep) {
1881 spin_unlock(&sg->guest_table_lock);
b2f58941 1882 gmap_pte_op_end(sptep, ptl);
4be130a0
MS
1883 radix_tree_preload_end();
1884 break;
1885 }
a9d23e71 1886 rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);
4be130a0
MS
1887 if (rc > 0) {
1888 /* Success and a new mapping */
1889 gmap_insert_rmap(sg, vmaddr, rmap);
1890 rmap = NULL;
1891 rc = 0;
1892 }
b2f58941 1893 gmap_pte_op_end(sptep, ptl);
4be130a0
MS
1894 spin_unlock(&sg->guest_table_lock);
1895 }
1896 radix_tree_preload_end();
1897 if (!rc)
1898 break;
01f71917 1899 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
4be130a0
MS
1900 if (rc)
1901 break;
1902 }
1903 kfree(rmap);
1904 return rc;
1905}
1906EXPORT_SYMBOL_GPL(gmap_shadow_page);
1907
2e827528 1908/*
4be130a0
MS
1909 * gmap_shadow_notify - handle notifications for shadow gmap
1910 *
1911 * Called with sg->parent->shadow_lock.
1912 */
1913static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
c0b4bd21 1914 unsigned long gaddr)
4be130a0
MS
1915{
1916 struct gmap_rmap *rmap, *rnext, *head;
2fa5ed7d 1917 unsigned long start, end, bits, raddr;
4be130a0
MS
1918
1919 BUG_ON(!gmap_is_shadow(sg));
4be130a0
MS
1920
1921 spin_lock(&sg->guest_table_lock);
1922 if (sg->removed) {
1923 spin_unlock(&sg->guest_table_lock);
1924 return;
1925 }
1926 /* Check for top level table */
1927 start = sg->orig_asce & _ASCE_ORIGIN;
f1c1174f 1928 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
3218f709
DH
1929 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
1930 gaddr < end) {
4be130a0
MS
1931 /* The complete shadow table has to go */
1932 gmap_unshadow(sg);
1933 spin_unlock(&sg->guest_table_lock);
1934 list_del(&sg->list);
1935 gmap_put(sg);
1936 return;
1937 }
1938 /* Remove the page table tree from on specific entry */
f1c1174f 1939 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
4be130a0
MS
1940 gmap_for_each_rmap_safe(rmap, rnext, head) {
1941 bits = rmap->raddr & _SHADOW_RMAP_MASK;
1942 raddr = rmap->raddr ^ bits;
1943 switch (bits) {
1944 case _SHADOW_RMAP_REGION1:
1945 gmap_unshadow_r2t(sg, raddr);
1946 break;
1947 case _SHADOW_RMAP_REGION2:
1948 gmap_unshadow_r3t(sg, raddr);
1949 break;
1950 case _SHADOW_RMAP_REGION3:
1951 gmap_unshadow_sgt(sg, raddr);
1952 break;
1953 case _SHADOW_RMAP_SEGMENT:
1954 gmap_unshadow_pgt(sg, raddr);
1955 break;
1956 case _SHADOW_RMAP_PGTABLE:
1957 gmap_unshadow_page(sg, raddr);
1958 break;
1959 }
1960 kfree(rmap);
1961 }
1962 spin_unlock(&sg->guest_table_lock);
1963}
1e133ab2
MS
1964
1965/**
1966 * ptep_notify - call all invalidation callbacks for a specific pte.
1967 * @mm: pointer to the process mm_struct
2e827528 1968 * @vmaddr: virtual address in the process address space
1e133ab2 1969 * @pte: pointer to the page table entry
4be130a0 1970 * @bits: bits from the pgste that caused the notify call
1e133ab2
MS
1971 *
1972 * This function is assumed to be called with the page table lock held
1973 * for the pte to notify.
1974 */
4be130a0
MS
1975void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
1976 pte_t *pte, unsigned long bits)
1e133ab2 1977{
2fa5ed7d 1978 unsigned long offset, gaddr = 0;
4be130a0 1979 struct gmap *gmap, *sg, *next;
1e133ab2
MS
1980
1981 offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
f1c1174f 1982 offset = offset * (PAGE_SIZE / sizeof(pte_t));
8ecb1a59
MS
1983 rcu_read_lock();
1984 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
1985 spin_lock(&gmap->guest_table_lock);
37d1b5d8 1986 gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;
8ecb1a59 1987 spin_unlock(&gmap->guest_table_lock);
37d1b5d8 1988 if (!IS_GADDR_VALID(gaddr))
2fa5ed7d
JF
1989 continue;
1990
1991 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
1992 spin_lock(&gmap->shadow_lock);
1993 list_for_each_entry_safe(sg, next,
1994 &gmap->children, list)
c0b4bd21 1995 gmap_shadow_notify(sg, vmaddr, gaddr);
2fa5ed7d
JF
1996 spin_unlock(&gmap->shadow_lock);
1997 }
1998 if (bits & PGSTE_IN_BIT)
8ecb1a59 1999 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
1e133ab2 2000 }
8ecb1a59 2001 rcu_read_unlock();
1e133ab2
MS
2002}
2003EXPORT_SYMBOL_GPL(ptep_notify);
2004
6a376277
JF
2005static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
2006 unsigned long gaddr)
2007{
b8e3b379 2008 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
6a376277
JF
2009 gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
2010}
2011
0959e168
JF
2012/**
2013 * gmap_pmdp_xchg - exchange a gmap pmd with another
2014 * @gmap: pointer to the guest address space structure
2015 * @pmdp: pointer to the pmd entry
2016 * @new: replacement entry
2017 * @gaddr: the affected guest address
2018 *
2019 * This function is assumed to be called with the guest_table_lock
2020 * held.
2021 */
2022static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
2023 unsigned long gaddr)
2024{
2025 gaddr &= HPAGE_MASK;
2026 pmdp_notify_gmap(gmap, pmdp, gaddr);
e1fc74ff 2027 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
0959e168
JF
2028 if (MACHINE_HAS_TLB_GUEST)
2029 __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
2030 IDTE_GLOBAL);
2031 else if (MACHINE_HAS_IDTE)
2032 __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
2033 else
2034 __pmdp_csp(pmdp);
b8e3b379 2035 set_pmd(pmdp, new);
0959e168
JF
2036}
2037
6a376277
JF
2038static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
2039 int purge)
2040{
2041 pmd_t *pmdp;
2042 struct gmap *gmap;
2043 unsigned long gaddr;
2044
2045 rcu_read_lock();
2046 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2047 spin_lock(&gmap->guest_table_lock);
37d1b5d8 2048 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
6a376277 2049 if (pmdp) {
6a376277 2050 pmdp_notify_gmap(gmap, pmdp, gaddr);
0959e168 2051 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
03e6db16
GS
2052 _SEGMENT_ENTRY_GMAP_UC |
2053 _SEGMENT_ENTRY));
6a376277
JF
2054 if (purge)
2055 __pmdp_csp(pmdp);
b8e3b379 2056 set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
6a376277
JF
2057 }
2058 spin_unlock(&gmap->guest_table_lock);
2059 }
2060 rcu_read_unlock();
2061}
2062
2063/**
2064 * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
2065 * flushing
2066 * @mm: pointer to the process mm_struct
2067 * @vmaddr: virtual address in the process address space
2068 */
2069void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
2070{
2071 gmap_pmdp_clear(mm, vmaddr, 0);
2072}
2073EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
2074
2075/**
2076 * gmap_pmdp_csp - csp all affected guest pmd entries
2077 * @mm: pointer to the process mm_struct
2078 * @vmaddr: virtual address in the process address space
2079 */
2080void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
2081{
2082 gmap_pmdp_clear(mm, vmaddr, 1);
2083}
2084EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
2085
2086/**
2087 * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
2088 * @mm: pointer to the process mm_struct
2089 * @vmaddr: virtual address in the process address space
2090 */
2091void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
2092{
37d1b5d8 2093 unsigned long gaddr;
6a376277
JF
2094 struct gmap *gmap;
2095 pmd_t *pmdp;
2096
2097 rcu_read_lock();
2098 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2099 spin_lock(&gmap->guest_table_lock);
37d1b5d8
CI
2100 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
2101 if (pmdp) {
6a376277 2102 pmdp_notify_gmap(gmap, pmdp, gaddr);
37d1b5d8
CI
2103 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2104 _SEGMENT_ENTRY_GMAP_UC |
2105 _SEGMENT_ENTRY));
6a376277
JF
2106 if (MACHINE_HAS_TLB_GUEST)
2107 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
2108 gmap->asce, IDTE_LOCAL);
2109 else if (MACHINE_HAS_IDTE)
2110 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
37d1b5d8 2111 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
6a376277
JF
2112 }
2113 spin_unlock(&gmap->guest_table_lock);
2114 }
2115 rcu_read_unlock();
2116}
2117EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
2118
2119/**
2120 * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
2121 * @mm: pointer to the process mm_struct
2122 * @vmaddr: virtual address in the process address space
2123 */
2124void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
2125{
37d1b5d8 2126 unsigned long gaddr;
6a376277
JF
2127 struct gmap *gmap;
2128 pmd_t *pmdp;
2129
2130 rcu_read_lock();
2131 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
2132 spin_lock(&gmap->guest_table_lock);
37d1b5d8
CI
2133 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
2134 if (pmdp) {
6a376277 2135 pmdp_notify_gmap(gmap, pmdp, gaddr);
37d1b5d8
CI
2136 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
2137 _SEGMENT_ENTRY_GMAP_UC |
2138 _SEGMENT_ENTRY));
6a376277
JF
2139 if (MACHINE_HAS_TLB_GUEST)
2140 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
2141 gmap->asce, IDTE_GLOBAL);
2142 else if (MACHINE_HAS_IDTE)
2143 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
2144 else
2145 __pmdp_csp(pmdp);
37d1b5d8 2146 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
6a376277
JF
2147 }
2148 spin_unlock(&gmap->guest_table_lock);
2149 }
2150 rcu_read_unlock();
2151}
2152EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
2153
0959e168
JF
2154/**
2155 * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
2156 * @gmap: pointer to guest address space
2157 * @pmdp: pointer to the pmd to be tested
2158 * @gaddr: virtual address in the guest address space
2159 *
2160 * This function is assumed to be called with the guest_table_lock
2161 * held.
2162 */
ffbd2685
VG
2163static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
2164 unsigned long gaddr)
0959e168
JF
2165{
2166 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
2167 return false;
2168
2169 /* Already protected memory, which did not change is clean */
2170 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
2171 !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
2172 return false;
2173
2174 /* Clear UC indication and reset protection */
b8e3b379 2175 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC)));
0959e168
JF
2176 gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
2177 return true;
2178}
2179
2180/**
2181 * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
2182 * @gmap: pointer to guest address space
2183 * @bitmap: dirty bitmap for this pmd
2184 * @gaddr: virtual address in the guest address space
2185 * @vmaddr: virtual address in the host address space
2186 *
2187 * This function is assumed to be called with the guest_table_lock
2188 * held.
2189 */
2190void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
2191 unsigned long gaddr, unsigned long vmaddr)
2192{
2193 int i;
2194 pmd_t *pmdp;
2195 pte_t *ptep;
2196 spinlock_t *ptl;
2197
2198 pmdp = gmap_pmd_op_walk(gmap, gaddr);
2199 if (!pmdp)
2200 return;
2201
2f709f7b 2202 if (pmd_leaf(*pmdp)) {
0959e168
JF
2203 if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
2204 bitmap_fill(bitmap, _PAGE_ENTRIES);
2205 } else {
2206 for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
2207 ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
2208 if (!ptep)
2209 continue;
2210 if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
2211 set_bit(i, bitmap);
b2f58941 2212 pte_unmap_unlock(ptep, ptl);
0959e168
JF
2213 }
2214 }
2215 gmap_pmd_op_end(gmap, pmdp);
2216}
2217EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
2218
ba925fa3
GS
2219#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2220static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
2221 unsigned long end, struct mm_walk *walk)
2222{
2223 struct vm_area_struct *vma = walk->vma;
2224
2225 split_huge_pmd(vma, pmd, addr);
2226 return 0;
2227}
2228
2229static const struct mm_walk_ops thp_split_walk_ops = {
2230 .pmd_entry = thp_split_walk_pmd_entry,
49b06385 2231 .walk_lock = PGWALK_WRLOCK_VERIFY,
ba925fa3
GS
2232};
2233
1e133ab2
MS
2234static inline void thp_split_mm(struct mm_struct *mm)
2235{
1e133ab2 2236 struct vm_area_struct *vma;
e7b6b990 2237 VMA_ITERATOR(vmi, mm, 0);
1e133ab2 2238
e7b6b990 2239 for_each_vma(vmi, vma) {
1c71222e 2240 vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
ba925fa3 2241 walk_page_vma(vma, &thp_split_walk_ops, NULL);
1e133ab2
MS
2242 }
2243 mm->def_flags |= VM_NOHUGEPAGE;
1e133ab2 2244}
ba925fa3
GS
2245#else
2246static inline void thp_split_mm(struct mm_struct *mm)
2247{
2248}
2249#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1e133ab2
MS
2250
2251/*
2252 * switch on pgstes for its userspace process (for kvm)
2253 */
2254int s390_enable_sie(void)
2255{
2256 struct mm_struct *mm = current->mm;
2257
2258 /* Do we have pgstes? if yes, we are done */
2259 if (mm_has_pgste(mm))
2260 return 0;
2261 /* Fail if the page tables are 2K */
2262 if (!mm_alloc_pgste(mm))
2263 return -EINVAL;
d8ed45c5 2264 mmap_write_lock(mm);
1e133ab2
MS
2265 mm->context.has_pgste = 1;
2266 /* split thp mappings and disable thp for future mappings */
2267 thp_split_mm(mm);
d8ed45c5 2268 mmap_write_unlock(mm);
1e133ab2
MS
2269 return 0;
2270}
2271EXPORT_SYMBOL_GPL(s390_enable_sie);
2272
06201e00
DH
2273static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
2274 unsigned long end, struct mm_walk *walk)
fa0c5eab 2275{
06201e00
DH
2276 unsigned long *found_addr = walk->private;
2277
2278 /* Return 1 of the page is a zeropage. */
2279 if (is_zero_pfn(pte_pfn(*pte))) {
2280 /*
2281 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
2282 * right thing and likely don't care: FAULT_FLAG_UNSHARE
2283 * currently only works in COW mappings, which is also where
2284 * mm_forbids_zeropage() is checked.
2285 */
2286 if (!is_cow_mapping(walk->vma->vm_flags))
2287 return -EFAULT;
2288
2289 *found_addr = addr;
2290 return 1;
2291 }
2292 return 0;
2293}
2294
2295static const struct mm_walk_ops find_zeropage_ops = {
2296 .pte_entry = find_zeropage_pte_entry,
2297 .walk_lock = PGWALK_WRLOCK,
2298};
2299
2300/*
2301 * Unshare all shared zeropages, replacing them by anonymous pages. Note that
2302 * we cannot simply zap all shared zeropages, because this could later
2303 * trigger unexpected userfaultfd missing events.
2304 *
2305 * This must be called after mm->context.allow_cow_sharing was
2306 * set to 0, to avoid future mappings of shared zeropages.
2307 *
2308 * mm contracts with s390, that even if mm were to remove a page table,
2309 * and racing with walk_page_range_vma() calling pte_offset_map_lock()
2310 * would fail, it will never insert a page table containing empty zero
2311 * pages once mm_forbids_zeropage(mm) i.e.
2312 * mm->context.allow_cow_sharing is set to 0.
2313 */
2314static int __s390_unshare_zeropages(struct mm_struct *mm)
2315{
2316 struct vm_area_struct *vma;
2317 VMA_ITERATOR(vmi, mm, 0);
2318 unsigned long addr;
2319 vm_fault_t fault;
2320 int rc;
2321
2322 for_each_vma(vmi, vma) {
2323 /*
2324 * We could only look at COW mappings, but it's more future
2325 * proof to catch unexpected zeropages in other mappings and
2326 * fail.
2327 */
2328 if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
2329 continue;
2330 addr = vma->vm_start;
2331
2332retry:
2333 rc = walk_page_range_vma(vma, addr, vma->vm_end,
2334 &find_zeropage_ops, &addr);
2335 if (rc < 0)
2336 return rc;
2337 else if (!rc)
2338 continue;
2339
2340 /* addr was updated by find_zeropage_pte_entry() */
2341 fault = handle_mm_fault(vma, addr,
2342 FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
2343 NULL);
2344 if (fault & VM_FAULT_OOM)
2345 return -ENOMEM;
2346 /*
2347 * See break_ksm(): even after handle_mm_fault() returned 0, we
2348 * must start the lookup from the current address, because
2349 * handle_mm_fault() may back out if there's any difficulty.
2350 *
2351 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
2352 * maybe they could trigger in the future on concurrent
2353 * truncation. In that case, the shared zeropage would be gone
2354 * and we can simply retry and make progress.
2355 */
2356 cond_resched();
2357 goto retry;
2358 }
2359
2360 return 0;
2361}
2362
2363static int __s390_disable_cow_sharing(struct mm_struct *mm)
2364{
2365 int rc;
2366
2367 if (!mm->context.allow_cow_sharing)
2368 return 0;
2369
2370 mm->context.allow_cow_sharing = 0;
2371
2372 /* Replace all shared zeropages by anonymous pages. */
2373 rc = __s390_unshare_zeropages(mm);
d7597f59
SR
2374 /*
2375 * Make sure to disable KSM (if enabled for the whole process or
2376 * individual VMAs). Note that nothing currently hinders user space
2377 * from re-enabling it.
2378 */
06201e00
DH
2379 if (!rc)
2380 rc = ksm_disable(mm);
2381 if (rc)
2382 mm->context.allow_cow_sharing = 1;
2383 return rc;
2384}
2385
2386/*
2387 * Disable most COW-sharing of memory pages for the whole process:
2388 * (1) Disable KSM and unmerge/unshare any KSM pages.
2389 * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
2390 *
2391 * Not that we currently don't bother with COW-shared pages that are shared
2392 * with parent/child processes due to fork().
2393 */
2394int s390_disable_cow_sharing(void)
2395{
2396 int rc;
2397
2398 mmap_write_lock(current->mm);
2399 rc = __s390_disable_cow_sharing(current->mm);
2400 mmap_write_unlock(current->mm);
2401 return rc;
fa0c5eab 2402}
06201e00 2403EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
fa0c5eab 2404
1e133ab2
MS
2405/*
2406 * Enable storage key handling from now on and initialize the storage
2407 * keys with the default key.
2408 */
964c2c05
DD
2409static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
2410 unsigned long next, struct mm_walk *walk)
1e133ab2 2411{
1e133ab2
MS
2412 /* Clear storage key */
2413 ptep_zap_key(walk->mm, addr, pte);
2414 return 0;
2415}
2416
6d594627
CB
2417/*
2418 * Give a chance to schedule after setting a key to 256 pages.
2419 * We only hold the mm lock, which is a rwsem and the kvm srcu.
2420 * Both can sleep.
2421 */
2422static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
2423 unsigned long next, struct mm_walk *walk)
2424{
2425 cond_resched();
2426 return 0;
2427}
2428
964c2c05
DD
2429static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
2430 unsigned long hmask, unsigned long next,
2431 struct mm_walk *walk)
2432{
2433 pmd_t *pmd = (pmd_t *)pte;
2434 unsigned long start, end;
1433b36e 2435 struct folio *folio = page_folio(pmd_page(*pmd));
964c2c05
DD
2436
2437 /*
2438 * The write check makes sure we do not set a key on shared
2439 * memory. This is needed as the walker does not differentiate
2440 * between actual guest memory and the process executable or
2441 * shared libraries.
2442 */
2443 if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
2444 !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
2445 return 0;
2446
2447 start = pmd_val(*pmd) & HPAGE_MASK;
843c3280 2448 end = start + HPAGE_SIZE;
964c2c05 2449 __storage_key_init_range(start, end);
1433b36e 2450 set_bit(PG_arch_1, &folio->flags);
6d594627 2451 cond_resched();
964c2c05
DD
2452 return 0;
2453}
2454
7b86ac33
CH
2455static const struct mm_walk_ops enable_skey_walk_ops = {
2456 .hugetlb_entry = __s390_enable_skey_hugetlb,
2457 .pte_entry = __s390_enable_skey_pte,
6d594627 2458 .pmd_entry = __s390_enable_skey_pmd,
49b06385 2459 .walk_lock = PGWALK_WRLOCK,
7b86ac33
CH
2460};
2461
1e133ab2
MS
2462int s390_enable_skey(void)
2463{
1e133ab2 2464 struct mm_struct *mm = current->mm;
1e133ab2
MS
2465 int rc = 0;
2466
d8ed45c5 2467 mmap_write_lock(mm);
55531b74 2468 if (mm_uses_skeys(mm))
1e133ab2
MS
2469 goto out_up;
2470
55531b74 2471 mm->context.uses_skeys = 1;
06201e00 2472 rc = __s390_disable_cow_sharing(mm);
fa0c5eab
JF
2473 if (rc) {
2474 mm->context.uses_skeys = 0;
2475 goto out_up;
1e133ab2 2476 }
7b86ac33 2477 walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
1e133ab2
MS
2478
2479out_up:
d8ed45c5 2480 mmap_write_unlock(mm);
1e133ab2
MS
2481 return rc;
2482}
2483EXPORT_SYMBOL_GPL(s390_enable_skey);
2484
2485/*
2486 * Reset CMMA state, make all pages stable again.
2487 */
2488static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
2489 unsigned long next, struct mm_walk *walk)
2490{
2491 ptep_zap_unused(walk->mm, addr, pte, 1);
2492 return 0;
2493}
2494
7b86ac33
CH
2495static const struct mm_walk_ops reset_cmma_walk_ops = {
2496 .pte_entry = __s390_reset_cmma,
49b06385 2497 .walk_lock = PGWALK_WRLOCK,
7b86ac33
CH
2498};
2499
1e133ab2
MS
2500void s390_reset_cmma(struct mm_struct *mm)
2501{
d8ed45c5 2502 mmap_write_lock(mm);
7b86ac33 2503 walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
d8ed45c5 2504 mmap_write_unlock(mm);
1e133ab2
MS
2505}
2506EXPORT_SYMBOL_GPL(s390_reset_cmma);
12748007 2507
6f73517d
CI
2508#define GATHER_GET_PAGES 32
2509
2510struct reset_walk_state {
2511 unsigned long next;
2512 unsigned long count;
2513 unsigned long pfns[GATHER_GET_PAGES];
2514};
2515
2516static int s390_gather_pages(pte_t *ptep, unsigned long addr,
2517 unsigned long next, struct mm_walk *walk)
12748007 2518{
6f73517d 2519 struct reset_walk_state *p = walk->private;
12748007
CB
2520 pte_t pte = READ_ONCE(*ptep);
2521
6f73517d
CI
2522 if (pte_present(pte)) {
2523 /* we have a reference from the mapping, take an extra one */
2524 get_page(phys_to_page(pte_val(pte)));
2525 p->pfns[p->count] = phys_to_pfn(pte_val(pte));
2526 p->next = next;
2527 p->count++;
2528 }
2529 return p->count >= GATHER_GET_PAGES;
12748007
CB
2530}
2531
6f73517d
CI
2532static const struct mm_walk_ops gather_pages_ops = {
2533 .pte_entry = s390_gather_pages,
49b06385 2534 .walk_lock = PGWALK_RDLOCK,
12748007
CB
2535};
2536
6f73517d
CI
2537/*
2538 * Call the Destroy secure page UVC on each page in the given array of PFNs.
2539 * Each page needs to have an extra reference, which will be released here.
2540 */
2541void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
12748007 2542{
70631506 2543 struct folio *folio;
6f73517d
CI
2544 unsigned long i;
2545
2546 for (i = 0; i < count; i++) {
70631506 2547 folio = pfn_folio(pfns[i]);
6f73517d 2548 /* we always have an extra reference */
70631506 2549 uv_destroy_folio(folio);
6f73517d 2550 /* get rid of the extra reference */
70631506 2551 folio_put(folio);
6f73517d
CI
2552 cond_resched();
2553 }
2554}
2555EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
2556
2557/**
2558 * __s390_uv_destroy_range - Call the destroy secure page UVC on each page
2559 * in the given range of the given address space.
2560 * @mm: the mm to operate on
2561 * @start: the start of the range
2562 * @end: the end of the range
2563 * @interruptible: if not 0, stop when a fatal signal is received
2564 *
2565 * Walk the given range of the given address space and call the destroy
2566 * secure page UVC on each page. Optionally exit early if a fatal signal is
2567 * pending.
2568 *
2569 * Return: 0 on success, -EINTR if the function stopped before completing
2570 */
2571int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
2572 unsigned long end, bool interruptible)
2573{
2574 struct reset_walk_state state = { .next = start };
2575 int r = 1;
2576
2577 while (r > 0) {
2578 state.count = 0;
2579 mmap_read_lock(mm);
2580 r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
2581 mmap_read_unlock(mm);
2582 cond_resched();
2583 s390_uv_destroy_pfns(state.count, state.pfns);
2584 if (interruptible && fatal_signal_pending(current))
2585 return -EINTR;
2586 }
2587 return 0;
12748007 2588}
6f73517d 2589EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
faa2f72c 2590
faa2f72c
CI
2591/**
2592 * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
2593 * @gmap: the gmap whose ASCE needs to be replaced
2594 *
292a7d6f
CI
2595 * If the ASCE is a SEGMENT type then this function will return -EINVAL,
2596 * otherwise the pointers in the host_to_guest radix tree will keep pointing
2597 * to the wrong pages, causing use-after-free and memory corruption.
faa2f72c
CI
2598 * If the allocation of the new top level page table fails, the ASCE is not
2599 * replaced.
2600 * In any case, the old ASCE is always removed from the gmap CRST list.
2601 * Therefore the caller has to make sure to save a pointer to it
2602 * beforehand, unless a leak is actually intended.
2603 */
2604int s390_replace_asce(struct gmap *gmap)
2605{
2606 unsigned long asce;
2607 struct page *page;
2608 void *table;
2609
292a7d6f
CI
2610 /* Replacing segment type ASCEs would cause serious issues */
2611 if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
2612 return -EINVAL;
2613
1954da4a 2614 page = gmap_alloc_crst();
faa2f72c
CI
2615 if (!page)
2616 return -ENOMEM;
2617 table = page_to_virt(page);
2618 memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
2619
faa2f72c
CI
2620 /* Set new table origin while preserving existing ASCE control bits */
2621 asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
2622 WRITE_ONCE(gmap->asce, asce);
2623 WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
2624 WRITE_ONCE(gmap->table, table);
2625
2626 return 0;
2627}
2628EXPORT_SYMBOL_GPL(s390_replace_asce);
5cbe2435
CI
2629
2630/**
2631 * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split
2632 * @mm: the mm containing the folio to work on
2633 * @folio: the folio
2634 * @split: whether to split a large folio
2635 *
2636 * Context: Must be called while holding an extra reference to the folio;
2637 * the mm lock should not be held.
2638 */
2639int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
2640{
2641 int rc;
2642
2643 lockdep_assert_not_held(&mm->mmap_lock);
2644 folio_wait_writeback(folio);
2645 lru_add_drain_all();
2646 if (split) {
2647 folio_lock(folio);
2648 rc = split_folio(folio);
2649 folio_unlock(folio);
2650
2651 if (rc != -EBUSY)
2652 return rc;
2653 }
2654 return -EAGAIN;
2655}
2656EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio);
This page took 0.848002 seconds and 4 git commands to generate.