]> Git Repo - J-linux.git/blob - arch/powerpc/lib/code-patching.c
powerpc/code-patching: Use temporary mm for Radix MMU
[J-linux.git] / arch / powerpc / lib / code-patching.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright 2008 Michael Ellerman, IBM Corporation.
4  */
5
6 #include <linux/kprobes.h>
7 #include <linux/mmu_context.h>
8 #include <linux/random.h>
9 #include <linux/vmalloc.h>
10 #include <linux/init.h>
11 #include <linux/cpuhotplug.h>
12 #include <linux/uaccess.h>
13 #include <linux/jump_label.h>
14
15 #include <asm/debug.h>
16 #include <asm/pgalloc.h>
17 #include <asm/tlb.h>
18 #include <asm/tlbflush.h>
19 #include <asm/page.h>
20 #include <asm/code-patching.h>
21 #include <asm/inst.h>
22
23 static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
24 {
25         if (!ppc_inst_prefixed(instr)) {
26                 u32 val = ppc_inst_val(instr);
27
28                 __put_kernel_nofault(patch_addr, &val, u32, failed);
29         } else {
30                 u64 val = ppc_inst_as_ulong(instr);
31
32                 __put_kernel_nofault(patch_addr, &val, u64, failed);
33         }
34
35         asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
36                                                             "r" (exec_addr));
37
38         return 0;
39
40 failed:
41         return -EPERM;
42 }
43
44 int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
45 {
46         return __patch_instruction(addr, instr, addr);
47 }
48
49 #ifdef CONFIG_STRICT_KERNEL_RWX
50
51 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
52 static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm);
53 static DEFINE_PER_CPU(unsigned long, cpu_patching_addr);
54 static DEFINE_PER_CPU(pte_t *, cpu_patching_pte);
55
56 static int map_patch_area(void *addr, unsigned long text_poke_addr);
57 static void unmap_patch_area(unsigned long addr);
58
59 static bool mm_patch_enabled(void)
60 {
61         return IS_ENABLED(CONFIG_SMP) && radix_enabled();
62 }
63
64 /*
65  * The following applies for Radix MMU. Hash MMU has different requirements,
66  * and so is not supported.
67  *
68  * Changing mm requires context synchronising instructions on both sides of
69  * the context switch, as well as a hwsync between the last instruction for
70  * which the address of an associated storage access was translated using
71  * the current context.
72  *
73  * switch_mm_irqs_off() performs an isync after the context switch. It is
74  * the responsibility of the caller to perform the CSI and hwsync before
75  * starting/stopping the temp mm.
76  */
77 static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
78 {
79         struct mm_struct *orig_mm = current->active_mm;
80
81         lockdep_assert_irqs_disabled();
82         switch_mm_irqs_off(orig_mm, temp_mm, current);
83
84         WARN_ON(!mm_is_thread_local(temp_mm));
85
86         suspend_breakpoints();
87         return orig_mm;
88 }
89
90 static void stop_using_temp_mm(struct mm_struct *temp_mm,
91                                struct mm_struct *orig_mm)
92 {
93         lockdep_assert_irqs_disabled();
94         switch_mm_irqs_off(temp_mm, orig_mm, current);
95         restore_breakpoints();
96 }
97
98 static int text_area_cpu_up(unsigned int cpu)
99 {
100         struct vm_struct *area;
101         unsigned long addr;
102         int err;
103
104         area = get_vm_area(PAGE_SIZE, VM_ALLOC);
105         if (!area) {
106                 WARN_ONCE(1, "Failed to create text area for cpu %d\n",
107                         cpu);
108                 return -1;
109         }
110
111         // Map/unmap the area to ensure all page tables are pre-allocated
112         addr = (unsigned long)area->addr;
113         err = map_patch_area(empty_zero_page, addr);
114         if (err)
115                 return err;
116
117         unmap_patch_area(addr);
118
119         this_cpu_write(text_poke_area, area);
120
121         return 0;
122 }
123
124 static int text_area_cpu_down(unsigned int cpu)
125 {
126         free_vm_area(this_cpu_read(text_poke_area));
127         return 0;
128 }
129
130 static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
131 {
132         struct mmu_gather tlb;
133
134         tlb_gather_mmu(&tlb, mm);
135         free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
136         mmput(mm);
137 }
138
139 static int text_area_cpu_up_mm(unsigned int cpu)
140 {
141         struct mm_struct *mm;
142         unsigned long addr;
143         pte_t *pte;
144         spinlock_t *ptl;
145
146         mm = mm_alloc();
147         if (WARN_ON(!mm))
148                 goto fail_no_mm;
149
150         /*
151          * Choose a random page-aligned address from the interval
152          * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
153          * The lower address bound is PAGE_SIZE to avoid the zero-page.
154          */
155         addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
156
157         /*
158          * PTE allocation uses GFP_KERNEL which means we need to
159          * pre-allocate the PTE here because we cannot do the
160          * allocation during patching when IRQs are disabled.
161          *
162          * Using get_locked_pte() to avoid open coding, the lock
163          * is unnecessary.
164          */
165         pte = get_locked_pte(mm, addr, &ptl);
166         if (!pte)
167                 goto fail_no_pte;
168         pte_unmap_unlock(pte, ptl);
169
170         this_cpu_write(cpu_patching_mm, mm);
171         this_cpu_write(cpu_patching_addr, addr);
172         this_cpu_write(cpu_patching_pte, pte);
173
174         return 0;
175
176 fail_no_pte:
177         put_patching_mm(mm, addr);
178 fail_no_mm:
179         return -ENOMEM;
180 }
181
182 static int text_area_cpu_down_mm(unsigned int cpu)
183 {
184         put_patching_mm(this_cpu_read(cpu_patching_mm),
185                         this_cpu_read(cpu_patching_addr));
186
187         this_cpu_write(cpu_patching_mm, NULL);
188         this_cpu_write(cpu_patching_addr, 0);
189         this_cpu_write(cpu_patching_pte, NULL);
190
191         return 0;
192 }
193
194 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
195
196 void __init poking_init(void)
197 {
198         int ret;
199
200         if (mm_patch_enabled())
201                 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
202                                         "powerpc/text_poke_mm:online",
203                                         text_area_cpu_up_mm,
204                                         text_area_cpu_down_mm);
205         else
206                 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
207                                         "powerpc/text_poke:online",
208                                         text_area_cpu_up,
209                                         text_area_cpu_down);
210
211         /* cpuhp_setup_state returns >= 0 on success */
212         if (WARN_ON(ret < 0))
213                 return;
214
215         static_branch_enable(&poking_init_done);
216 }
217
218 static unsigned long get_patch_pfn(void *addr)
219 {
220         if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
221                 return vmalloc_to_pfn(addr);
222         else
223                 return __pa_symbol(addr) >> PAGE_SHIFT;
224 }
225
226 /*
227  * This can be called for kernel text or a module.
228  */
229 static int map_patch_area(void *addr, unsigned long text_poke_addr)
230 {
231         unsigned long pfn = get_patch_pfn(addr);
232
233         return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
234 }
235
236 static void unmap_patch_area(unsigned long addr)
237 {
238         pte_t *ptep;
239         pmd_t *pmdp;
240         pud_t *pudp;
241         p4d_t *p4dp;
242         pgd_t *pgdp;
243
244         pgdp = pgd_offset_k(addr);
245         if (WARN_ON(pgd_none(*pgdp)))
246                 return;
247
248         p4dp = p4d_offset(pgdp, addr);
249         if (WARN_ON(p4d_none(*p4dp)))
250                 return;
251
252         pudp = pud_offset(p4dp, addr);
253         if (WARN_ON(pud_none(*pudp)))
254                 return;
255
256         pmdp = pmd_offset(pudp, addr);
257         if (WARN_ON(pmd_none(*pmdp)))
258                 return;
259
260         ptep = pte_offset_kernel(pmdp, addr);
261         if (WARN_ON(pte_none(*ptep)))
262                 return;
263
264         /*
265          * In hash, pte_clear flushes the tlb, in radix, we have to
266          */
267         pte_clear(&init_mm, addr, ptep);
268         flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
269 }
270
271 static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
272 {
273         int err;
274         u32 *patch_addr;
275         unsigned long text_poke_addr;
276         pte_t *pte;
277         unsigned long pfn = get_patch_pfn(addr);
278         struct mm_struct *patching_mm;
279         struct mm_struct *orig_mm;
280
281         patching_mm = __this_cpu_read(cpu_patching_mm);
282         pte = __this_cpu_read(cpu_patching_pte);
283         text_poke_addr = __this_cpu_read(cpu_patching_addr);
284         patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
285
286         __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
287
288         /* order PTE update before use, also serves as the hwsync */
289         asm volatile("ptesync": : :"memory");
290
291         /* order context switch after arbitrary prior code */
292         isync();
293
294         orig_mm = start_using_temp_mm(patching_mm);
295
296         err = __patch_instruction(addr, instr, patch_addr);
297
298         /* hwsync performed by __patch_instruction (sync) if successful */
299         if (err)
300                 mb();  /* sync */
301
302         /* context synchronisation performed by __patch_instruction (isync or exception) */
303         stop_using_temp_mm(patching_mm, orig_mm);
304
305         pte_clear(patching_mm, text_poke_addr, pte);
306         /*
307          * ptesync to order PTE update before TLB invalidation done
308          * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
309          */
310         local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
311
312         return err;
313 }
314
315 static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
316 {
317         int err;
318         u32 *patch_addr;
319         unsigned long text_poke_addr;
320         pte_t *pte;
321         unsigned long pfn = get_patch_pfn(addr);
322
323         text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
324         patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
325
326         pte = virt_to_kpte(text_poke_addr);
327         __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
328         /* See ptesync comment in radix__set_pte_at() */
329         if (radix_enabled())
330                 asm volatile("ptesync": : :"memory");
331
332         err = __patch_instruction(addr, instr, patch_addr);
333
334         pte_clear(&init_mm, text_poke_addr, pte);
335         flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
336
337         return err;
338 }
339
340 static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
341 {
342         int err;
343         unsigned long flags;
344
345         /*
346          * During early early boot patch_instruction is called
347          * when text_poke_area is not ready, but we still need
348          * to allow patching. We just do the plain old patching
349          */
350         if (!static_branch_likely(&poking_init_done))
351                 return raw_patch_instruction(addr, instr);
352
353         local_irq_save(flags);
354         if (mm_patch_enabled())
355                 err = __do_patch_instruction_mm(addr, instr);
356         else
357                 err = __do_patch_instruction(addr, instr);
358         local_irq_restore(flags);
359
360         return err;
361 }
362 #else /* !CONFIG_STRICT_KERNEL_RWX */
363
364 static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
365 {
366         return raw_patch_instruction(addr, instr);
367 }
368
369 #endif /* CONFIG_STRICT_KERNEL_RWX */
370
371 __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free);
372
373 int patch_instruction(u32 *addr, ppc_inst_t instr)
374 {
375         /* Make sure we aren't patching a freed init section */
376         if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4))
377                 return 0;
378
379         return do_patch_instruction(addr, instr);
380 }
381 NOKPROBE_SYMBOL(patch_instruction);
382
383 int patch_branch(u32 *addr, unsigned long target, int flags)
384 {
385         ppc_inst_t instr;
386
387         if (create_branch(&instr, addr, target, flags))
388                 return -ERANGE;
389
390         return patch_instruction(addr, instr);
391 }
392
393 /*
394  * Helper to check if a given instruction is a conditional branch
395  * Derived from the conditional checks in analyse_instr()
396  */
397 bool is_conditional_branch(ppc_inst_t instr)
398 {
399         unsigned int opcode = ppc_inst_primary_opcode(instr);
400
401         if (opcode == 16)       /* bc, bca, bcl, bcla */
402                 return true;
403         if (opcode == 19) {
404                 switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
405                 case 16:        /* bclr, bclrl */
406                 case 528:       /* bcctr, bcctrl */
407                 case 560:       /* bctar, bctarl */
408                         return true;
409                 }
410         }
411         return false;
412 }
413 NOKPROBE_SYMBOL(is_conditional_branch);
414
415 int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
416                        unsigned long target, int flags)
417 {
418         long offset;
419
420         offset = target;
421         if (! (flags & BRANCH_ABSOLUTE))
422                 offset = offset - (unsigned long)addr;
423
424         /* Check we can represent the target in the instruction format */
425         if (!is_offset_in_cond_branch_range(offset))
426                 return 1;
427
428         /* Mask out the flags and target, so they don't step on each other. */
429         *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
430
431         return 0;
432 }
433
434 int instr_is_relative_branch(ppc_inst_t instr)
435 {
436         if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
437                 return 0;
438
439         return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
440 }
441
442 int instr_is_relative_link_branch(ppc_inst_t instr)
443 {
444         return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
445 }
446
447 static unsigned long branch_iform_target(const u32 *instr)
448 {
449         signed long imm;
450
451         imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
452
453         /* If the top bit of the immediate value is set this is negative */
454         if (imm & 0x2000000)
455                 imm -= 0x4000000;
456
457         if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
458                 imm += (unsigned long)instr;
459
460         return (unsigned long)imm;
461 }
462
463 static unsigned long branch_bform_target(const u32 *instr)
464 {
465         signed long imm;
466
467         imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
468
469         /* If the top bit of the immediate value is set this is negative */
470         if (imm & 0x8000)
471                 imm -= 0x10000;
472
473         if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
474                 imm += (unsigned long)instr;
475
476         return (unsigned long)imm;
477 }
478
479 unsigned long branch_target(const u32 *instr)
480 {
481         if (instr_is_branch_iform(ppc_inst_read(instr)))
482                 return branch_iform_target(instr);
483         else if (instr_is_branch_bform(ppc_inst_read(instr)))
484                 return branch_bform_target(instr);
485
486         return 0;
487 }
488
489 int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
490 {
491         unsigned long target;
492         target = branch_target(src);
493
494         if (instr_is_branch_iform(ppc_inst_read(src)))
495                 return create_branch(instr, dest, target,
496                                      ppc_inst_val(ppc_inst_read(src)));
497         else if (instr_is_branch_bform(ppc_inst_read(src)))
498                 return create_cond_branch(instr, dest, target,
499                                           ppc_inst_val(ppc_inst_read(src)));
500
501         return 1;
502 }
This page took 0.058193 seconds and 4 git commands to generate.