]> Git Repo - J-linux.git/blob - arch/s390/mm/fault.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / arch / s390 / mm / fault.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  S390 version
4  *    Copyright IBM Corp. 1999
5  *    Author(s): Hartmut Penner ([email protected])
6  *               Ulrich Weigand ([email protected])
7  *
8  *  Derived from "arch/i386/mm/fault.c"
9  *    Copyright (C) 1995  Linus Torvalds
10  */
11
12 #include <linux/kernel_stat.h>
13 #include <linux/mmu_context.h>
14 #include <linux/perf_event.h>
15 #include <linux/signal.h>
16 #include <linux/sched.h>
17 #include <linux/sched/debug.h>
18 #include <linux/jump_label.h>
19 #include <linux/kernel.h>
20 #include <linux/errno.h>
21 #include <linux/string.h>
22 #include <linux/types.h>
23 #include <linux/ptrace.h>
24 #include <linux/mman.h>
25 #include <linux/mm.h>
26 #include <linux/compat.h>
27 #include <linux/smp.h>
28 #include <linux/kdebug.h>
29 #include <linux/init.h>
30 #include <linux/console.h>
31 #include <linux/extable.h>
32 #include <linux/hardirq.h>
33 #include <linux/kprobes.h>
34 #include <linux/uaccess.h>
35 #include <linux/hugetlb.h>
36 #include <linux/kfence.h>
37 #include <linux/pagewalk.h>
38 #include <asm/asm-extable.h>
39 #include <asm/asm-offsets.h>
40 #include <asm/ptrace.h>
41 #include <asm/fault.h>
42 #include <asm/diag.h>
43 #include <asm/gmap.h>
44 #include <asm/irq.h>
45 #include <asm/facility.h>
46 #include <asm/uv.h>
47 #include "../kernel/entry.h"
48
49 static DEFINE_STATIC_KEY_FALSE(have_store_indication);
50
51 static int __init fault_init(void)
52 {
53         if (test_facility(75))
54                 static_branch_enable(&have_store_indication);
55         return 0;
56 }
57 early_initcall(fault_init);
58
59 /*
60  * Find out which address space caused the exception.
61  */
62 static bool is_kernel_fault(struct pt_regs *regs)
63 {
64         union teid teid = { .val = regs->int_parm_long };
65
66         if (user_mode(regs))
67                 return false;
68         if (teid.as == PSW_BITS_AS_SECONDARY)
69                 return false;
70         return true;
71 }
72
73 static unsigned long get_fault_address(struct pt_regs *regs)
74 {
75         union teid teid = { .val = regs->int_parm_long };
76
77         return teid.addr * PAGE_SIZE;
78 }
79
80 static __always_inline bool fault_is_write(struct pt_regs *regs)
81 {
82         union teid teid = { .val = regs->int_parm_long };
83
84         if (static_branch_likely(&have_store_indication))
85                 return teid.fsi == TEID_FSI_STORE;
86         return false;
87 }
88
89 static void dump_pagetable(unsigned long asce, unsigned long address)
90 {
91         unsigned long entry, *table = __va(asce & _ASCE_ORIGIN);
92
93         pr_alert("AS:%016lx ", asce);
94         switch (asce & _ASCE_TYPE_MASK) {
95         case _ASCE_TYPE_REGION1:
96                 table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
97                 if (get_kernel_nofault(entry, table))
98                         goto bad;
99                 pr_cont("R1:%016lx ", entry);
100                 if (entry & _REGION_ENTRY_INVALID)
101                         goto out;
102                 table = __va(entry & _REGION_ENTRY_ORIGIN);
103                 fallthrough;
104         case _ASCE_TYPE_REGION2:
105                 table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
106                 if (get_kernel_nofault(entry, table))
107                         goto bad;
108                 pr_cont("R2:%016lx ", entry);
109                 if (entry & _REGION_ENTRY_INVALID)
110                         goto out;
111                 table = __va(entry & _REGION_ENTRY_ORIGIN);
112                 fallthrough;
113         case _ASCE_TYPE_REGION3:
114                 table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
115                 if (get_kernel_nofault(entry, table))
116                         goto bad;
117                 pr_cont("R3:%016lx ", entry);
118                 if (entry & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
119                         goto out;
120                 table = __va(entry & _REGION_ENTRY_ORIGIN);
121                 fallthrough;
122         case _ASCE_TYPE_SEGMENT:
123                 table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
124                 if (get_kernel_nofault(entry, table))
125                         goto bad;
126                 pr_cont("S:%016lx ", entry);
127                 if (entry & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
128                         goto out;
129                 table = __va(entry & _SEGMENT_ENTRY_ORIGIN);
130         }
131         table += (address & _PAGE_INDEX) >> PAGE_SHIFT;
132         if (get_kernel_nofault(entry, table))
133                 goto bad;
134         pr_cont("P:%016lx ", entry);
135 out:
136         pr_cont("\n");
137         return;
138 bad:
139         pr_cont("BAD\n");
140 }
141
142 static void dump_fault_info(struct pt_regs *regs)
143 {
144         union teid teid = { .val = regs->int_parm_long };
145         unsigned long asce;
146
147         pr_alert("Failing address: %016lx TEID: %016lx\n",
148                  get_fault_address(regs), teid.val);
149         pr_alert("Fault in ");
150         switch (teid.as) {
151         case PSW_BITS_AS_HOME:
152                 pr_cont("home space ");
153                 break;
154         case PSW_BITS_AS_SECONDARY:
155                 pr_cont("secondary space ");
156                 break;
157         case PSW_BITS_AS_ACCREG:
158                 pr_cont("access register ");
159                 break;
160         case PSW_BITS_AS_PRIMARY:
161                 pr_cont("primary space ");
162                 break;
163         }
164         pr_cont("mode while using ");
165         if (is_kernel_fault(regs)) {
166                 asce = get_lowcore()->kernel_asce.val;
167                 pr_cont("kernel ");
168         } else {
169                 asce = get_lowcore()->user_asce.val;
170                 pr_cont("user ");
171         }
172         pr_cont("ASCE.\n");
173         dump_pagetable(asce, get_fault_address(regs));
174 }
175
176 int show_unhandled_signals = 1;
177
178 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
179 {
180         static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
181
182         if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
183                 return;
184         if (!unhandled_signal(current, signr))
185                 return;
186         if (!__ratelimit(&rs))
187                 return;
188         pr_alert("User process fault: interruption code %04x ilc:%d ",
189                  regs->int_code & 0xffff, regs->int_code >> 17);
190         print_vma_addr(KERN_CONT "in ", regs->psw.addr);
191         pr_cont("\n");
192         if (is_mm_fault)
193                 dump_fault_info(regs);
194         show_regs(regs);
195 }
196
197 static void do_sigsegv(struct pt_regs *regs, int si_code)
198 {
199         report_user_fault(regs, SIGSEGV, 1);
200         force_sig_fault(SIGSEGV, si_code, (void __user *)get_fault_address(regs));
201 }
202
203 static void handle_fault_error_nolock(struct pt_regs *regs, int si_code)
204 {
205         unsigned long address;
206         bool is_write;
207
208         if (user_mode(regs)) {
209                 if (WARN_ON_ONCE(!si_code))
210                         si_code = SEGV_MAPERR;
211                 return do_sigsegv(regs, si_code);
212         }
213         if (fixup_exception(regs))
214                 return;
215         if (is_kernel_fault(regs)) {
216                 address = get_fault_address(regs);
217                 is_write = fault_is_write(regs);
218                 if (kfence_handle_page_fault(address, is_write, regs))
219                         return;
220                 pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n");
221         } else {
222                 pr_alert("Unable to handle kernel paging request in virtual user address space\n");
223         }
224         dump_fault_info(regs);
225         die(regs, "Oops");
226 }
227
228 static void handle_fault_error(struct pt_regs *regs, int si_code)
229 {
230         struct mm_struct *mm = current->mm;
231
232         mmap_read_unlock(mm);
233         handle_fault_error_nolock(regs, si_code);
234 }
235
236 static void do_sigbus(struct pt_regs *regs)
237 {
238         force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)get_fault_address(regs));
239 }
240
241 /*
242  * This routine handles page faults.  It determines the address,
243  * and the problem, and then passes it off to one of the appropriate
244  * routines.
245  *
246  * interruption code (int_code):
247  *   04       Protection           ->  Write-Protection  (suppression)
248  *   10       Segment translation  ->  Not present       (nullification)
249  *   11       Page translation     ->  Not present       (nullification)
250  *   3b       Region third trans.  ->  Not present       (nullification)
251  */
252 static void do_exception(struct pt_regs *regs, int access)
253 {
254         struct vm_area_struct *vma;
255         unsigned long address;
256         struct mm_struct *mm;
257         unsigned int flags;
258         vm_fault_t fault;
259         bool is_write;
260
261         /*
262          * The instruction that caused the program check has
263          * been nullified. Don't signal single step via SIGTRAP.
264          */
265         clear_thread_flag(TIF_PER_TRAP);
266         if (kprobe_page_fault(regs, 14))
267                 return;
268         mm = current->mm;
269         address = get_fault_address(regs);
270         is_write = fault_is_write(regs);
271         if (is_kernel_fault(regs) || faulthandler_disabled() || !mm)
272                 return handle_fault_error_nolock(regs, 0);
273         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
274         flags = FAULT_FLAG_DEFAULT;
275         if (user_mode(regs))
276                 flags |= FAULT_FLAG_USER;
277         if (is_write)
278                 access = VM_WRITE;
279         if (access == VM_WRITE)
280                 flags |= FAULT_FLAG_WRITE;
281         if (!(flags & FAULT_FLAG_USER))
282                 goto lock_mmap;
283         vma = lock_vma_under_rcu(mm, address);
284         if (!vma)
285                 goto lock_mmap;
286         if (!(vma->vm_flags & access)) {
287                 vma_end_read(vma);
288                 count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
289                 return handle_fault_error_nolock(regs, SEGV_ACCERR);
290         }
291         fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
292         if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
293                 vma_end_read(vma);
294         if (!(fault & VM_FAULT_RETRY)) {
295                 count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
296                 goto done;
297         }
298         count_vm_vma_lock_event(VMA_LOCK_RETRY);
299         if (fault & VM_FAULT_MAJOR)
300                 flags |= FAULT_FLAG_TRIED;
301         /* Quick path to respond to signals */
302         if (fault_signal_pending(fault, regs)) {
303                 if (!user_mode(regs))
304                         handle_fault_error_nolock(regs, 0);
305                 return;
306         }
307 lock_mmap:
308 retry:
309         vma = lock_mm_and_find_vma(mm, address, regs);
310         if (!vma)
311                 return handle_fault_error_nolock(regs, SEGV_MAPERR);
312         if (unlikely(!(vma->vm_flags & access)))
313                 return handle_fault_error(regs, SEGV_ACCERR);
314         fault = handle_mm_fault(vma, address, flags, regs);
315         if (fault_signal_pending(fault, regs)) {
316                 if (!user_mode(regs))
317                         handle_fault_error_nolock(regs, 0);
318                 return;
319         }
320         /* The fault is fully completed (including releasing mmap lock) */
321         if (fault & VM_FAULT_COMPLETED)
322                 return;
323         if (fault & VM_FAULT_RETRY) {
324                 flags |= FAULT_FLAG_TRIED;
325                 goto retry;
326         }
327         mmap_read_unlock(mm);
328 done:
329         if (!(fault & VM_FAULT_ERROR))
330                 return;
331         if (fault & VM_FAULT_OOM) {
332                 if (!user_mode(regs))
333                         handle_fault_error_nolock(regs, 0);
334                 else
335                         pagefault_out_of_memory();
336         } else if (fault & VM_FAULT_SIGSEGV) {
337                 if (!user_mode(regs))
338                         handle_fault_error_nolock(regs, 0);
339                 else
340                         do_sigsegv(regs, SEGV_MAPERR);
341         } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON |
342                             VM_FAULT_HWPOISON_LARGE)) {
343                 if (!user_mode(regs))
344                         handle_fault_error_nolock(regs, 0);
345                 else
346                         do_sigbus(regs);
347         } else {
348                 pr_emerg("Unexpected fault flags: %08x\n", fault);
349                 BUG();
350         }
351 }
352
353 void do_protection_exception(struct pt_regs *regs)
354 {
355         union teid teid = { .val = regs->int_parm_long };
356
357         /*
358          * Protection exceptions are suppressing, decrement psw address.
359          * The exception to this rule are aborted transactions, for these
360          * the PSW already points to the correct location.
361          */
362         if (!(regs->int_code & 0x200))
363                 regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
364         /*
365          * Check for low-address protection.  This needs to be treated
366          * as a special case because the translation exception code
367          * field is not guaranteed to contain valid data in this case.
368          */
369         if (unlikely(!teid.b61)) {
370                 if (user_mode(regs)) {
371                         /* Low-address protection in user mode: cannot happen */
372                         die(regs, "Low-address protection");
373                 }
374                 /*
375                  * Low-address protection in kernel mode means
376                  * NULL pointer write access in kernel mode.
377                  */
378                 return handle_fault_error_nolock(regs, 0);
379         }
380         if (unlikely(MACHINE_HAS_NX && teid.b56)) {
381                 regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK);
382                 return handle_fault_error_nolock(regs, SEGV_ACCERR);
383         }
384         do_exception(regs, VM_WRITE);
385 }
386 NOKPROBE_SYMBOL(do_protection_exception);
387
388 void do_dat_exception(struct pt_regs *regs)
389 {
390         do_exception(regs, VM_ACCESS_FLAGS);
391 }
392 NOKPROBE_SYMBOL(do_dat_exception);
393
394 #if IS_ENABLED(CONFIG_PGSTE)
395
396 void do_secure_storage_access(struct pt_regs *regs)
397 {
398         union teid teid = { .val = regs->int_parm_long };
399         unsigned long addr = get_fault_address(regs);
400         struct vm_area_struct *vma;
401         struct folio_walk fw;
402         struct mm_struct *mm;
403         struct folio *folio;
404         int rc;
405
406         /*
407          * Bit 61 indicates if the address is valid, if it is not the
408          * kernel should be stopped or SIGSEGV should be sent to the
409          * process. Bit 61 is not reliable without the misc UV feature,
410          * therefore this needs to be checked too.
411          */
412         if (uv_has_feature(BIT_UV_FEAT_MISC) && !teid.b61) {
413                 /*
414                  * When this happens, userspace did something that it
415                  * was not supposed to do, e.g. branching into secure
416                  * memory. Trigger a segmentation fault.
417                  */
418                 if (user_mode(regs)) {
419                         send_sig(SIGSEGV, current, 0);
420                         return;
421                 }
422                 /*
423                  * The kernel should never run into this case and
424                  * there is no way out of this situation.
425                  */
426                 panic("Unexpected PGM 0x3d with TEID bit 61=0");
427         }
428         if (is_kernel_fault(regs)) {
429                 folio = phys_to_folio(addr);
430                 if (unlikely(!folio_try_get(folio)))
431                         return;
432                 rc = arch_make_folio_accessible(folio);
433                 folio_put(folio);
434                 if (rc)
435                         BUG();
436         } else {
437                 mm = current->mm;
438                 mmap_read_lock(mm);
439                 vma = find_vma(mm, addr);
440                 if (!vma)
441                         return handle_fault_error(regs, SEGV_MAPERR);
442                 folio = folio_walk_start(&fw, vma, addr, 0);
443                 if (!folio) {
444                         mmap_read_unlock(mm);
445                         return;
446                 }
447                 /* arch_make_folio_accessible() needs a raised refcount. */
448                 folio_get(folio);
449                 rc = arch_make_folio_accessible(folio);
450                 folio_put(folio);
451                 folio_walk_end(&fw, vma);
452                 if (rc)
453                         send_sig(SIGSEGV, current, 0);
454                 mmap_read_unlock(mm);
455         }
456 }
457 NOKPROBE_SYMBOL(do_secure_storage_access);
458
459 #endif /* CONFIG_PGSTE */
This page took 0.051721 seconds and 4 git commands to generate.