]> Git Repo - linux.git/blame - kernel/events/uprobes.c
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / kernel / events / uprobes.c
CommitLineData
2b144498 1/*
7b2d81d4 2 * User-space Probes (UProbes)
2b144498
SD
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
35aa621b 18 * Copyright (C) IBM Corporation, 2008-2012
2b144498
SD
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
90eec103 22 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
2b144498
SD
23 */
24
25#include <linux/kernel.h>
26#include <linux/highmem.h>
27#include <linux/pagemap.h> /* read_mapping_page */
28#include <linux/slab.h>
29#include <linux/sched.h>
6e84f315 30#include <linux/sched/mm.h>
f7ccbae4 31#include <linux/sched/coredump.h>
e8440c14 32#include <linux/export.h>
2b144498
SD
33#include <linux/rmap.h> /* anon_vma_prepare */
34#include <linux/mmu_notifier.h> /* set_pte_at_notify */
35#include <linux/swap.h> /* try_to_free_swap */
0326f5a9
SD
36#include <linux/ptrace.h> /* user_enable_single_step */
37#include <linux/kdebug.h> /* notifier mechanism */
194f8dcb 38#include "../../mm/internal.h" /* munlock_vma_page */
32cdba1e 39#include <linux/percpu-rwsem.h>
aa59c53f 40#include <linux/task_work.h>
40814f68 41#include <linux/shmem_fs.h>
7b2d81d4 42
2b144498
SD
43#include <linux/uprobes.h>
44
d4b3b638
SD
45#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
46#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
47
2b144498 48static struct rb_root uprobes_tree = RB_ROOT;
441f1eb7
ON
49/*
50 * allows us to skip the uprobe_mmap if there are no uprobe events active
51 * at this time. Probably a fine grained per inode count is better?
52 */
53#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
7b2d81d4 54
2b144498
SD
55static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
56
57#define UPROBES_HASH_SZ 13
2b144498
SD
58/* serialize uprobe->pending_list */
59static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
7b2d81d4 60#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
2b144498 61
32cdba1e
ON
62static struct percpu_rw_semaphore dup_mmap_sem;
63
cb9a19fe 64/* Have a copy of original instruction */
71434f2f 65#define UPROBE_COPY_INSN 0
cb9a19fe 66
3ff54efd
SD
67struct uprobe {
68 struct rb_node rb_node; /* node in the rb tree */
69 atomic_t ref;
e591c8d7 70 struct rw_semaphore register_rwsem;
3ff54efd
SD
71 struct rw_semaphore consumer_rwsem;
72 struct list_head pending_list;
73 struct uprobe_consumer *consumers;
74 struct inode *inode; /* Also hold a ref to inode */
75 loff_t offset;
1cc33161 76 loff_t ref_ctr_offset;
71434f2f 77 unsigned long flags;
ad439356
ON
78
79 /*
80 * The generic code assumes that it has two members of unknown type
81 * owned by the arch-specific code:
82 *
83 * insn - copy_insn() saves the original instruction here for
84 * arch_uprobe_analyze_insn().
85 *
86 * ixol - potentially modified instruction to execute out of
87 * line, copied to xol_area by xol_get_insn_slot().
88 */
3ff54efd
SD
89 struct arch_uprobe arch;
90};
91
1cc33161
RB
92struct delayed_uprobe {
93 struct list_head list;
94 struct uprobe *uprobe;
95 struct mm_struct *mm;
96};
97
98static DEFINE_MUTEX(delayed_uprobe_lock);
99static LIST_HEAD(delayed_uprobe_list);
100
c912dae6 101/*
ad439356
ON
102 * Execute out of line area: anonymous executable mapping installed
103 * by the probed task to execute the copy of the original instruction
104 * mangled by set_swbp().
105 *
c912dae6
ON
106 * On a breakpoint hit, thread contests for a slot. It frees the
107 * slot after singlestep. Currently a fixed number of slots are
108 * allocated.
109 */
110struct xol_area {
704bde3c
ON
111 wait_queue_head_t wq; /* if all slots are busy */
112 atomic_t slot_count; /* number of in-use slots */
113 unsigned long *bitmap; /* 0 = free slot */
c912dae6 114
704bde3c
ON
115 struct vm_special_mapping xol_mapping;
116 struct page *pages[2];
c912dae6
ON
117 /*
118 * We keep the vma's vm_start rather than a pointer to the vma
119 * itself. The probed process or a naughty kernel module could make
120 * the vma go away, and we must handle that reasonably gracefully.
121 */
704bde3c 122 unsigned long vaddr; /* Page(s) of instruction slots */
c912dae6
ON
123};
124
2b144498
SD
125/*
126 * valid_vma: Verify if the specified vma is an executable vma
127 * Relax restrictions while unregistering: vm_flags might have
128 * changed after breakpoint was inserted.
129 * - is_register: indicates if we are in register context.
130 * - Return 1 if the specified virtual address is in an
131 * executable vma.
132 */
133static bool valid_vma(struct vm_area_struct *vma, bool is_register)
134{
13f59c5e 135 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
2b144498 136
e40cfce6
ON
137 if (is_register)
138 flags |= VM_WRITE;
2b144498 139
e40cfce6 140 return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC;
2b144498
SD
141}
142
57683f72 143static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
2b144498 144{
57683f72 145 return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
2b144498
SD
146}
147
cb113b47
ON
148static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
149{
150 return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start);
151}
152
2b144498
SD
153/**
154 * __replace_page - replace page in vma by new page.
155 * based on replace_page in mm/ksm.c
156 *
157 * @vma: vma that holds the pte pointing to page
c517ee74 158 * @addr: address the old @page is mapped at
2b144498
SD
159 * @page: the cowed page we are replacing by kpage
160 * @kpage: the modified page we replace page by
161 *
162 * Returns 0 on success, -EFAULT on failure.
163 */
c517ee74 164static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
bdfaa2ee 165 struct page *old_page, struct page *new_page)
2b144498
SD
166{
167 struct mm_struct *mm = vma->vm_mm;
14fa2daa
KS
168 struct page_vma_mapped_walk pvmw = {
169 .page = old_page,
170 .vma = vma,
171 .address = addr,
172 };
9f92448c 173 int err;
6bdb913f
HE
174 /* For mmu_notifiers */
175 const unsigned long mmun_start = addr;
176 const unsigned long mmun_end = addr + PAGE_SIZE;
00501b53
JW
177 struct mem_cgroup *memcg;
178
14fa2daa
KS
179 VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
180
bdfaa2ee 181 err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
f627c2f5 182 false);
00501b53
JW
183 if (err)
184 return err;
2b144498 185
194f8dcb 186 /* For try_to_free_swap() and munlock_vma_page() below */
bdfaa2ee 187 lock_page(old_page);
9f92448c 188
6bdb913f 189 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
9f92448c 190 err = -EAGAIN;
14fa2daa 191 if (!page_vma_mapped_walk(&pvmw)) {
bdfaa2ee 192 mem_cgroup_cancel_charge(new_page, memcg, false);
9f92448c 193 goto unlock;
6c4687cc 194 }
14fa2daa 195 VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
2b144498 196
bdfaa2ee
ON
197 get_page(new_page);
198 page_add_new_anon_rmap(new_page, vma, addr, false);
199 mem_cgroup_commit_charge(new_page, memcg, false, false);
200 lru_cache_add_active_or_unevictable(new_page, vma);
2b144498 201
bdfaa2ee
ON
202 if (!PageAnon(old_page)) {
203 dec_mm_counter(mm, mm_counter_file(old_page));
7396fa81
SD
204 inc_mm_counter(mm, MM_ANONPAGES);
205 }
206
14fa2daa
KS
207 flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
208 ptep_clear_flush_notify(vma, addr, pvmw.pte);
209 set_pte_at_notify(mm, addr, pvmw.pte,
210 mk_pte(new_page, vma->vm_page_prot));
2b144498 211
bdfaa2ee
ON
212 page_remove_rmap(old_page, false);
213 if (!page_mapped(old_page))
214 try_to_free_swap(old_page);
14fa2daa 215 page_vma_mapped_walk_done(&pvmw);
2b144498 216
194f8dcb 217 if (vma->vm_flags & VM_LOCKED)
bdfaa2ee
ON
218 munlock_vma_page(old_page);
219 put_page(old_page);
194f8dcb 220
9f92448c
ON
221 err = 0;
222 unlock:
6bdb913f 223 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
bdfaa2ee 224 unlock_page(old_page);
9f92448c 225 return err;
2b144498
SD
226}
227
228/**
5cb4ac3a 229 * is_swbp_insn - check if instruction is breakpoint instruction.
2b144498 230 * @insn: instruction to be checked.
5cb4ac3a 231 * Default implementation of is_swbp_insn
2b144498
SD
232 * Returns true if @insn is a breakpoint instruction.
233 */
5cb4ac3a 234bool __weak is_swbp_insn(uprobe_opcode_t *insn)
2b144498 235{
5cb4ac3a 236 return *insn == UPROBE_SWBP_INSN;
2b144498
SD
237}
238
0908ad6e
AM
239/**
240 * is_trap_insn - check if instruction is breakpoint instruction.
241 * @insn: instruction to be checked.
242 * Default implementation of is_trap_insn
243 * Returns true if @insn is a breakpoint instruction.
244 *
245 * This function is needed for the case where an architecture has multiple
246 * trap instructions (like powerpc).
247 */
248bool __weak is_trap_insn(uprobe_opcode_t *insn)
249{
250 return is_swbp_insn(insn);
251}
252
ab0d805c 253static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
cceb55aa
ON
254{
255 void *kaddr = kmap_atomic(page);
ab0d805c 256 memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len);
cceb55aa
ON
257 kunmap_atomic(kaddr);
258}
259
5669ccee
ON
260static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
261{
262 void *kaddr = kmap_atomic(page);
263 memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
264 kunmap_atomic(kaddr);
265}
266
ed6f6a50
ON
267static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode)
268{
269 uprobe_opcode_t old_opcode;
270 bool is_swbp;
271
0908ad6e
AM
272 /*
273 * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here.
274 * We do not check if it is any other 'trap variant' which could
275 * be conditional trap instruction such as the one powerpc supports.
276 *
277 * The logic is that we do not care if the underlying instruction
278 * is a trap variant; uprobes always wins over any other (gdb)
279 * breakpoint.
280 */
ab0d805c 281 copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE);
ed6f6a50
ON
282 is_swbp = is_swbp_insn(&old_opcode);
283
284 if (is_swbp_insn(new_opcode)) {
285 if (is_swbp) /* register: already installed? */
286 return 0;
287 } else {
288 if (!is_swbp) /* unregister: was it changed by us? */
076a365b 289 return 0;
ed6f6a50
ON
290 }
291
292 return 1;
293}
294
1cc33161
RB
295static struct delayed_uprobe *
296delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
297{
298 struct delayed_uprobe *du;
299
300 list_for_each_entry(du, &delayed_uprobe_list, list)
301 if (du->uprobe == uprobe && du->mm == mm)
302 return du;
303 return NULL;
304}
305
306static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
307{
308 struct delayed_uprobe *du;
309
310 if (delayed_uprobe_check(uprobe, mm))
311 return 0;
312
313 du = kzalloc(sizeof(*du), GFP_KERNEL);
314 if (!du)
315 return -ENOMEM;
316
317 du->uprobe = uprobe;
318 du->mm = mm;
319 list_add(&du->list, &delayed_uprobe_list);
320 return 0;
321}
322
323static void delayed_uprobe_delete(struct delayed_uprobe *du)
324{
325 if (WARN_ON(!du))
326 return;
327 list_del(&du->list);
328 kfree(du);
329}
330
331static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
332{
333 struct list_head *pos, *q;
334 struct delayed_uprobe *du;
335
336 if (!uprobe && !mm)
337 return;
338
339 list_for_each_safe(pos, q, &delayed_uprobe_list) {
340 du = list_entry(pos, struct delayed_uprobe, list);
341
342 if (uprobe && du->uprobe != uprobe)
343 continue;
344 if (mm && du->mm != mm)
345 continue;
346
347 delayed_uprobe_delete(du);
348 }
349}
350
351static bool valid_ref_ctr_vma(struct uprobe *uprobe,
352 struct vm_area_struct *vma)
353{
354 unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
355
356 return uprobe->ref_ctr_offset &&
357 vma->vm_file &&
358 file_inode(vma->vm_file) == uprobe->inode &&
359 (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
360 vma->vm_start <= vaddr &&
361 vma->vm_end > vaddr;
362}
363
364static struct vm_area_struct *
365find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
366{
367 struct vm_area_struct *tmp;
368
369 for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
370 if (valid_ref_ctr_vma(uprobe, tmp))
371 return tmp;
372
373 return NULL;
374}
375
376static int
377__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
378{
379 void *kaddr;
380 struct page *page;
381 struct vm_area_struct *vma;
382 int ret;
383 short *ptr;
384
385 if (!vaddr || !d)
386 return -EINVAL;
387
388 ret = get_user_pages_remote(NULL, mm, vaddr, 1,
389 FOLL_WRITE, &page, &vma, NULL);
390 if (unlikely(ret <= 0)) {
391 /*
392 * We are asking for 1 page. If get_user_pages_remote() fails,
393 * it may return 0, in that case we have to return error.
394 */
395 return ret == 0 ? -EBUSY : ret;
396 }
397
398 kaddr = kmap_atomic(page);
399 ptr = kaddr + (vaddr & ~PAGE_MASK);
400
401 if (unlikely(*ptr + d < 0)) {
402 pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
403 "curr val: %d, delta: %d\n", vaddr, *ptr, d);
404 ret = -EINVAL;
405 goto out;
406 }
407
408 *ptr += d;
409 ret = 0;
410out:
411 kunmap_atomic(kaddr);
412 put_page(page);
413 return ret;
414}
415
416static void update_ref_ctr_warn(struct uprobe *uprobe,
417 struct mm_struct *mm, short d)
418{
419 pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
420 "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
421 d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
422 (unsigned long long) uprobe->offset,
423 (unsigned long long) uprobe->ref_ctr_offset, mm);
424}
425
426static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
427 short d)
428{
429 struct vm_area_struct *rc_vma;
430 unsigned long rc_vaddr;
431 int ret = 0;
432
433 rc_vma = find_ref_ctr_vma(uprobe, mm);
434
435 if (rc_vma) {
436 rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
437 ret = __update_ref_ctr(mm, rc_vaddr, d);
438 if (ret)
439 update_ref_ctr_warn(uprobe, mm, d);
440
441 if (d > 0)
442 return ret;
443 }
444
445 mutex_lock(&delayed_uprobe_lock);
446 if (d > 0)
447 ret = delayed_uprobe_add(uprobe, mm);
448 else
449 delayed_uprobe_remove(uprobe, mm);
450 mutex_unlock(&delayed_uprobe_lock);
451
452 return ret;
453}
454
2b144498
SD
455/*
456 * NOTE:
457 * Expect the breakpoint instruction to be the smallest size instruction for
458 * the architecture. If an arch has variable length instruction and the
459 * breakpoint instruction is not of the smallest length instruction
0908ad6e 460 * supported by that architecture then we need to modify is_trap_at_addr and
f72d41fa
ON
461 * uprobe_write_opcode accordingly. This would never be a problem for archs
462 * that have fixed length instructions.
29dedee0 463 *
f72d41fa 464 * uprobe_write_opcode - write the opcode at a given virtual address.
2b144498 465 * @mm: the probed process address space.
2b144498
SD
466 * @vaddr: the virtual address to store the opcode.
467 * @opcode: opcode to be written at @vaddr.
468 *
29dedee0 469 * Called with mm->mmap_sem held for write.
2b144498
SD
470 * Return 0 (success) or a negative errno.
471 */
6d43743e
RB
472int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
473 unsigned long vaddr, uprobe_opcode_t opcode)
2b144498 474{
1cc33161 475 struct uprobe *uprobe;
2b144498 476 struct page *old_page, *new_page;
2b144498 477 struct vm_area_struct *vma;
1cc33161
RB
478 int ret, is_register, ref_ctr_updated = 0;
479
480 is_register = is_swbp_insn(&opcode);
481 uprobe = container_of(auprobe, struct uprobe, arch);
f403072c 482
5323ce71 483retry:
2b144498 484 /* Read the page with vaddr into memory */
c8394812
KS
485 ret = get_user_pages_remote(NULL, mm, vaddr, 1,
486 FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
2b144498
SD
487 if (ret <= 0)
488 return ret;
7b2d81d4 489
ed6f6a50
ON
490 ret = verify_opcode(old_page, vaddr, &opcode);
491 if (ret <= 0)
492 goto put_old;
493
1cc33161
RB
494 /* We are going to replace instruction, update ref_ctr. */
495 if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
496 ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
497 if (ret)
498 goto put_old;
499
500 ref_ctr_updated = 1;
501 }
502
29dedee0
ON
503 ret = anon_vma_prepare(vma);
504 if (ret)
505 goto put_old;
506
2b144498
SD
507 ret = -ENOMEM;
508 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
509 if (!new_page)
9f92448c 510 goto put_old;
2b144498 511
29dedee0 512 __SetPageUptodate(new_page);
3f47107c
ON
513 copy_highpage(new_page, old_page);
514 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
2b144498 515
c517ee74 516 ret = __replace_page(vma, vaddr, old_page, new_page);
09cbfeaf 517 put_page(new_page);
9f92448c 518put_old:
7b2d81d4
IM
519 put_page(old_page);
520
5323ce71
ON
521 if (unlikely(ret == -EAGAIN))
522 goto retry;
1cc33161
RB
523
524 /* Revert back reference counter if instruction update failed. */
525 if (ret && is_register && ref_ctr_updated)
526 update_ref_ctr(uprobe, mm, -1);
527
2b144498
SD
528 return ret;
529}
530
2b144498 531/**
5cb4ac3a 532 * set_swbp - store breakpoint at a given address.
e3343e6a 533 * @auprobe: arch specific probepoint information.
2b144498 534 * @mm: the probed process address space.
2b144498
SD
535 * @vaddr: the virtual address to insert the opcode.
536 *
537 * For mm @mm, store the breakpoint instruction at @vaddr.
538 * Return 0 (success) or a negative errno.
539 */
5cb4ac3a 540int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
2b144498 541{
6d43743e 542 return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
2b144498
SD
543}
544
545/**
546 * set_orig_insn - Restore the original instruction.
547 * @mm: the probed process address space.
e3343e6a 548 * @auprobe: arch specific probepoint information.
2b144498 549 * @vaddr: the virtual address to insert the opcode.
2b144498
SD
550 *
551 * For mm @mm, restore the original opcode (opcode) at @vaddr.
552 * Return 0 (success) or a negative errno.
553 */
7b2d81d4 554int __weak
ded86e7c 555set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
2b144498 556{
6d43743e
RB
557 return uprobe_write_opcode(auprobe, mm, vaddr,
558 *(uprobe_opcode_t *)&auprobe->insn);
2b144498
SD
559}
560
f231722a
ON
561static struct uprobe *get_uprobe(struct uprobe *uprobe)
562{
563 atomic_inc(&uprobe->ref);
564 return uprobe;
565}
566
567static void put_uprobe(struct uprobe *uprobe)
568{
1cc33161
RB
569 if (atomic_dec_and_test(&uprobe->ref)) {
570 /*
571 * If application munmap(exec_vma) before uprobe_unregister()
572 * gets called, we don't get a chance to remove uprobe from
573 * delayed_uprobe_list from remove_breakpoint(). Do it here.
574 */
575 delayed_uprobe_remove(uprobe, NULL);
f231722a 576 kfree(uprobe);
1cc33161 577 }
f231722a
ON
578}
579
2b144498
SD
580static int match_uprobe(struct uprobe *l, struct uprobe *r)
581{
582 if (l->inode < r->inode)
583 return -1;
7b2d81d4 584
2b144498
SD
585 if (l->inode > r->inode)
586 return 1;
2b144498 587
7b2d81d4
IM
588 if (l->offset < r->offset)
589 return -1;
590
591 if (l->offset > r->offset)
592 return 1;
2b144498
SD
593
594 return 0;
595}
596
597static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
598{
599 struct uprobe u = { .inode = inode, .offset = offset };
600 struct rb_node *n = uprobes_tree.rb_node;
601 struct uprobe *uprobe;
602 int match;
603
604 while (n) {
605 uprobe = rb_entry(n, struct uprobe, rb_node);
606 match = match_uprobe(&u, uprobe);
f231722a
ON
607 if (!match)
608 return get_uprobe(uprobe);
7b2d81d4 609
2b144498
SD
610 if (match < 0)
611 n = n->rb_left;
612 else
613 n = n->rb_right;
614 }
615 return NULL;
616}
617
618/*
619 * Find a uprobe corresponding to a given inode:offset
620 * Acquires uprobes_treelock
621 */
622static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
623{
624 struct uprobe *uprobe;
2b144498 625
6f47caa0 626 spin_lock(&uprobes_treelock);
2b144498 627 uprobe = __find_uprobe(inode, offset);
6f47caa0 628 spin_unlock(&uprobes_treelock);
7b2d81d4 629
2b144498
SD
630 return uprobe;
631}
632
633static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
634{
635 struct rb_node **p = &uprobes_tree.rb_node;
636 struct rb_node *parent = NULL;
637 struct uprobe *u;
638 int match;
639
640 while (*p) {
641 parent = *p;
642 u = rb_entry(parent, struct uprobe, rb_node);
643 match = match_uprobe(uprobe, u);
f231722a
ON
644 if (!match)
645 return get_uprobe(u);
2b144498
SD
646
647 if (match < 0)
648 p = &parent->rb_left;
649 else
650 p = &parent->rb_right;
651
652 }
7b2d81d4 653
2b144498
SD
654 u = NULL;
655 rb_link_node(&uprobe->rb_node, parent, p);
656 rb_insert_color(&uprobe->rb_node, &uprobes_tree);
657 /* get access + creation ref */
658 atomic_set(&uprobe->ref, 2);
7b2d81d4 659
2b144498
SD
660 return u;
661}
662
663/*
7b2d81d4 664 * Acquire uprobes_treelock.
2b144498
SD
665 * Matching uprobe already exists in rbtree;
666 * increment (access refcount) and return the matching uprobe.
667 *
668 * No matching uprobe; insert the uprobe in rb_tree;
669 * get a double refcount (access + creation) and return NULL.
670 */
671static struct uprobe *insert_uprobe(struct uprobe *uprobe)
672{
2b144498
SD
673 struct uprobe *u;
674
6f47caa0 675 spin_lock(&uprobes_treelock);
2b144498 676 u = __insert_uprobe(uprobe);
6f47caa0 677 spin_unlock(&uprobes_treelock);
7b2d81d4 678
2b144498
SD
679 return u;
680}
681
22bad382
RB
682static void
683ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
684{
685 pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
686 "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
687 uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
688 (unsigned long long) cur_uprobe->ref_ctr_offset,
689 (unsigned long long) uprobe->ref_ctr_offset);
690}
691
1cc33161
RB
692static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
693 loff_t ref_ctr_offset)
2b144498
SD
694{
695 struct uprobe *uprobe, *cur_uprobe;
696
697 uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL);
698 if (!uprobe)
699 return NULL;
700
61f94203 701 uprobe->inode = inode;
2b144498 702 uprobe->offset = offset;
1cc33161 703 uprobe->ref_ctr_offset = ref_ctr_offset;
e591c8d7 704 init_rwsem(&uprobe->register_rwsem);
2b144498 705 init_rwsem(&uprobe->consumer_rwsem);
2b144498
SD
706
707 /* add to uprobes_tree, sorted on inode:offset */
708 cur_uprobe = insert_uprobe(uprobe);
2b144498
SD
709 /* a uprobe exists for this inode:offset combination */
710 if (cur_uprobe) {
22bad382
RB
711 if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) {
712 ref_ctr_mismatch_warn(cur_uprobe, uprobe);
713 put_uprobe(cur_uprobe);
714 kfree(uprobe);
715 return ERR_PTR(-EINVAL);
716 }
2b144498
SD
717 kfree(uprobe);
718 uprobe = cur_uprobe;
7b2d81d4
IM
719 }
720
2b144498
SD
721 return uprobe;
722}
723
9a98e03c 724static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
2b144498
SD
725{
726 down_write(&uprobe->consumer_rwsem);
e3343e6a
SD
727 uc->next = uprobe->consumers;
728 uprobe->consumers = uc;
2b144498 729 up_write(&uprobe->consumer_rwsem);
2b144498
SD
730}
731
732/*
e3343e6a
SD
733 * For uprobe @uprobe, delete the consumer @uc.
734 * Return true if the @uc is deleted successfully
2b144498
SD
735 * or return false.
736 */
e3343e6a 737static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
2b144498
SD
738{
739 struct uprobe_consumer **con;
740 bool ret = false;
741
742 down_write(&uprobe->consumer_rwsem);
743 for (con = &uprobe->consumers; *con; con = &(*con)->next) {
e3343e6a
SD
744 if (*con == uc) {
745 *con = uc->next;
2b144498
SD
746 ret = true;
747 break;
748 }
749 }
750 up_write(&uprobe->consumer_rwsem);
7b2d81d4 751
2b144498
SD
752 return ret;
753}
754
2ded0980
ON
755static int __copy_insn(struct address_space *mapping, struct file *filp,
756 void *insn, int nbytes, loff_t offset)
2b144498 757{
2b144498 758 struct page *page;
2b144498 759 /*
40814f68
ON
760 * Ensure that the page that has the original instruction is populated
761 * and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
762 * see uprobe_register().
2b144498 763 */
40814f68 764 if (mapping->a_ops->readpage)
09cbfeaf 765 page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp);
40814f68 766 else
09cbfeaf 767 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
2b144498
SD
768 if (IS_ERR(page))
769 return PTR_ERR(page);
770
2edb7b55 771 copy_from_page(page, offset, insn, nbytes);
09cbfeaf 772 put_page(page);
7b2d81d4 773
2b144498
SD
774 return 0;
775}
776
d436615e 777static int copy_insn(struct uprobe *uprobe, struct file *filp)
2b144498 778{
2ded0980
ON
779 struct address_space *mapping = uprobe->inode->i_mapping;
780 loff_t offs = uprobe->offset;
803200e2
ON
781 void *insn = &uprobe->arch.insn;
782 int size = sizeof(uprobe->arch.insn);
2ded0980
ON
783 int len, err = -EIO;
784
785 /* Copy only available bytes, -EIO if nothing was read */
786 do {
787 if (offs >= i_size_read(uprobe->inode))
788 break;
789
790 len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK));
791 err = __copy_insn(mapping, filp, insn, len, offs);
fc36f595 792 if (err)
2ded0980
ON
793 break;
794
795 insn += len;
796 offs += len;
797 size -= len;
798 } while (size);
799
800 return err;
2b144498
SD
801}
802
cb9a19fe
ON
803static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
804 struct mm_struct *mm, unsigned long vaddr)
805{
806 int ret = 0;
807
71434f2f 808 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
cb9a19fe
ON
809 return ret;
810
d4d3ccc6
ON
811 /* TODO: move this into _register, until then we abuse this sem. */
812 down_write(&uprobe->consumer_rwsem);
71434f2f 813 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
4710f05f
ON
814 goto out;
815
cb9a19fe
ON
816 ret = copy_insn(uprobe, file);
817 if (ret)
818 goto out;
819
820 ret = -ENOTSUPP;
803200e2 821 if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn))
cb9a19fe
ON
822 goto out;
823
824 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
825 if (ret)
826 goto out;
827
f72d41fa 828 /* uprobe_write_opcode() assumes we don't cross page boundary */
cb9a19fe
ON
829 BUG_ON((uprobe->offset & ~PAGE_MASK) +
830 UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
831
09d3f015 832 smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */
71434f2f 833 set_bit(UPROBE_COPY_INSN, &uprobe->flags);
cb9a19fe
ON
834
835 out:
d4d3ccc6 836 up_write(&uprobe->consumer_rwsem);
4710f05f 837
cb9a19fe
ON
838 return ret;
839}
840
8a7f2fa0
ON
841static inline bool consumer_filter(struct uprobe_consumer *uc,
842 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
806a98bd 843{
8a7f2fa0 844 return !uc->filter || uc->filter(uc, ctx, mm);
806a98bd
ON
845}
846
8a7f2fa0
ON
847static bool filter_chain(struct uprobe *uprobe,
848 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
63633cbf 849{
1ff6fee5
ON
850 struct uprobe_consumer *uc;
851 bool ret = false;
852
853 down_read(&uprobe->consumer_rwsem);
854 for (uc = uprobe->consumers; uc; uc = uc->next) {
8a7f2fa0 855 ret = consumer_filter(uc, ctx, mm);
1ff6fee5
ON
856 if (ret)
857 break;
858 }
859 up_read(&uprobe->consumer_rwsem);
860
861 return ret;
63633cbf
ON
862}
863
e3343e6a
SD
864static int
865install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
816c03fb 866 struct vm_area_struct *vma, unsigned long vaddr)
2b144498 867{
f8ac4ec9 868 bool first_uprobe;
2b144498
SD
869 int ret;
870
cb9a19fe
ON
871 ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
872 if (ret)
873 return ret;
682968e0 874
f8ac4ec9
ON
875 /*
876 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(),
877 * the task can hit this breakpoint right after __replace_page().
878 */
879 first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags);
880 if (first_uprobe)
881 set_bit(MMF_HAS_UPROBES, &mm->flags);
882
816c03fb 883 ret = set_swbp(&uprobe->arch, mm, vaddr);
9f68f672
ON
884 if (!ret)
885 clear_bit(MMF_RECALC_UPROBES, &mm->flags);
886 else if (first_uprobe)
f8ac4ec9 887 clear_bit(MMF_HAS_UPROBES, &mm->flags);
2b144498
SD
888
889 return ret;
890}
891
076a365b 892static int
816c03fb 893remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
2b144498 894{
9f68f672 895 set_bit(MMF_RECALC_UPROBES, &mm->flags);
076a365b 896 return set_orig_insn(&uprobe->arch, mm, vaddr);
2b144498
SD
897}
898
06b7bcd8
ON
899static inline bool uprobe_is_active(struct uprobe *uprobe)
900{
901 return !RB_EMPTY_NODE(&uprobe->rb_node);
902}
0326f5a9 903/*
778b032d
ON
904 * There could be threads that have already hit the breakpoint. They
905 * will recheck the current insn and restart if find_uprobe() fails.
906 * See find_active_uprobe().
0326f5a9 907 */
2b144498
SD
908static void delete_uprobe(struct uprobe *uprobe)
909{
06b7bcd8
ON
910 if (WARN_ON(!uprobe_is_active(uprobe)))
911 return;
912
6f47caa0 913 spin_lock(&uprobes_treelock);
2b144498 914 rb_erase(&uprobe->rb_node, &uprobes_tree);
6f47caa0 915 spin_unlock(&uprobes_treelock);
06b7bcd8 916 RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
2b144498 917 put_uprobe(uprobe);
2b144498
SD
918}
919
26872090
ON
920struct map_info {
921 struct map_info *next;
922 struct mm_struct *mm;
816c03fb 923 unsigned long vaddr;
26872090
ON
924};
925
926static inline struct map_info *free_map_info(struct map_info *info)
2b144498 927{
26872090
ON
928 struct map_info *next = info->next;
929 kfree(info);
930 return next;
931}
932
933static struct map_info *
934build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
935{
936 unsigned long pgoff = offset >> PAGE_SHIFT;
2b144498 937 struct vm_area_struct *vma;
26872090
ON
938 struct map_info *curr = NULL;
939 struct map_info *prev = NULL;
940 struct map_info *info;
941 int more = 0;
2b144498 942
26872090 943 again:
4a23717a 944 i_mmap_lock_read(mapping);
6b2dbba8 945 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
2b144498
SD
946 if (!valid_vma(vma, is_register))
947 continue;
948
7a5bfb66
ON
949 if (!prev && !more) {
950 /*
c8c06efa 951 * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through
7a5bfb66
ON
952 * reclaim. This is optimistic, no harm done if it fails.
953 */
954 prev = kmalloc(sizeof(struct map_info),
955 GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
956 if (prev)
957 prev->next = NULL;
958 }
26872090
ON
959 if (!prev) {
960 more++;
961 continue;
2b144498 962 }
2b144498 963
388f7934 964 if (!mmget_not_zero(vma->vm_mm))
26872090 965 continue;
7b2d81d4 966
26872090
ON
967 info = prev;
968 prev = prev->next;
969 info->next = curr;
970 curr = info;
2b144498 971
26872090 972 info->mm = vma->vm_mm;
57683f72 973 info->vaddr = offset_to_vaddr(vma, offset);
26872090 974 }
4a23717a 975 i_mmap_unlock_read(mapping);
2b144498 976
26872090
ON
977 if (!more)
978 goto out;
979
980 prev = curr;
981 while (curr) {
982 mmput(curr->mm);
983 curr = curr->next;
984 }
7b2d81d4 985
26872090
ON
986 do {
987 info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
988 if (!info) {
989 curr = ERR_PTR(-ENOMEM);
990 goto out;
991 }
992 info->next = prev;
993 prev = info;
994 } while (--more);
995
996 goto again;
997 out:
998 while (prev)
999 prev = free_map_info(prev);
1000 return curr;
2b144498
SD
1001}
1002
bdf8647c
ON
1003static int
1004register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
2b144498 1005{
bdf8647c 1006 bool is_register = !!new;
26872090
ON
1007 struct map_info *info;
1008 int err = 0;
2b144498 1009
32cdba1e 1010 percpu_down_write(&dup_mmap_sem);
26872090
ON
1011 info = build_map_info(uprobe->inode->i_mapping,
1012 uprobe->offset, is_register);
32cdba1e
ON
1013 if (IS_ERR(info)) {
1014 err = PTR_ERR(info);
1015 goto out;
1016 }
7b2d81d4 1017
26872090
ON
1018 while (info) {
1019 struct mm_struct *mm = info->mm;
1020 struct vm_area_struct *vma;
7b2d81d4 1021
076a365b 1022 if (err && is_register)
26872090 1023 goto free;
7b2d81d4 1024
77fc4af1 1025 down_write(&mm->mmap_sem);
f4d6dfe5
ON
1026 vma = find_vma(mm, info->vaddr);
1027 if (!vma || !valid_vma(vma, is_register) ||
f281769e 1028 file_inode(vma->vm_file) != uprobe->inode)
26872090
ON
1029 goto unlock;
1030
f4d6dfe5
ON
1031 if (vma->vm_start > info->vaddr ||
1032 vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
26872090 1033 goto unlock;
2b144498 1034
806a98bd
ON
1035 if (is_register) {
1036 /* consult only the "caller", new consumer. */
bdf8647c 1037 if (consumer_filter(new,
8a7f2fa0 1038 UPROBE_FILTER_REGISTER, mm))
806a98bd
ON
1039 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
1040 } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
8a7f2fa0
ON
1041 if (!filter_chain(uprobe,
1042 UPROBE_FILTER_UNREGISTER, mm))
806a98bd
ON
1043 err |= remove_breakpoint(uprobe, mm, info->vaddr);
1044 }
78f74116 1045
26872090
ON
1046 unlock:
1047 up_write(&mm->mmap_sem);
1048 free:
1049 mmput(mm);
1050 info = free_map_info(info);
2b144498 1051 }
32cdba1e
ON
1052 out:
1053 percpu_up_write(&dup_mmap_sem);
26872090 1054 return err;
2b144498
SD
1055}
1056
38e967ae
RB
1057static void
1058__uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
2b144498 1059{
04aab9b2
ON
1060 int err;
1061
06d07139 1062 if (WARN_ON(!consumer_del(uprobe, uc)))
04aab9b2 1063 return;
2b144498 1064
bdf8647c 1065 err = register_for_each_vma(uprobe, NULL);
bb929284
ON
1066 /* TODO : cant unregister? schedule a worker thread */
1067 if (!uprobe->consumers && !err)
1068 delete_uprobe(uprobe);
2b144498
SD
1069}
1070
1071/*
7140ad38 1072 * uprobe_unregister - unregister an already registered probe.
38e967ae
RB
1073 * @inode: the file in which the probe has to be removed.
1074 * @offset: offset from the start of the file.
1075 * @uc: identify which probe if multiple probes are colocated.
1076 */
1077void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
1078{
1079 struct uprobe *uprobe;
1080
1081 uprobe = find_uprobe(inode, offset);
1082 if (WARN_ON(!uprobe))
1083 return;
1084
1085 down_write(&uprobe->register_rwsem);
1086 __uprobe_unregister(uprobe, uc);
1087 up_write(&uprobe->register_rwsem);
1088 put_uprobe(uprobe);
1089}
1090EXPORT_SYMBOL_GPL(uprobe_unregister);
1091
1092/*
1093 * __uprobe_register - register a probe
2b144498
SD
1094 * @inode: the file in which the probe has to be placed.
1095 * @offset: offset from the start of the file.
e3343e6a 1096 * @uc: information on howto handle the probe..
2b144498 1097 *
38e967ae 1098 * Apart from the access refcount, __uprobe_register() takes a creation
2b144498
SD
1099 * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
1100 * inserted into the rbtree (i.e first consumer for a @inode:@offset
7b2d81d4 1101 * tuple). Creation refcount stops uprobe_unregister from freeing the
2b144498 1102 * @uprobe even before the register operation is complete. Creation
e3343e6a 1103 * refcount is released when the last @uc for the @uprobe
38e967ae 1104 * unregisters. Caller of __uprobe_register() is required to keep @inode
61f94203 1105 * (and the containing mount) referenced.
2b144498
SD
1106 *
1107 * Return errno if it cannot successully install probes
1108 * else return 0 (success)
1109 */
38e967ae 1110static int __uprobe_register(struct inode *inode, loff_t offset,
1cc33161 1111 loff_t ref_ctr_offset, struct uprobe_consumer *uc)
2b144498
SD
1112{
1113 struct uprobe *uprobe;
7b2d81d4 1114 int ret;
2b144498 1115
ea024870
AA
1116 /* Uprobe must have at least one set consumer */
1117 if (!uc->handler && !uc->ret_handler)
1118 return -EINVAL;
1119
40814f68
ON
1120 /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
1121 if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
41ccba02 1122 return -EIO;
f0744af7 1123 /* Racy, just to catch the obvious mistakes */
2b144498 1124 if (offset > i_size_read(inode))
7b2d81d4 1125 return -EINVAL;
2b144498 1126
66d06dff 1127 retry:
1cc33161 1128 uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
66d06dff
ON
1129 if (!uprobe)
1130 return -ENOMEM;
22bad382
RB
1131 if (IS_ERR(uprobe))
1132 return PTR_ERR(uprobe);
1133
66d06dff
ON
1134 /*
1135 * We can race with uprobe_unregister()->delete_uprobe().
1136 * Check uprobe_is_active() and retry if it is false.
1137 */
1138 down_write(&uprobe->register_rwsem);
1139 ret = -EAGAIN;
1140 if (likely(uprobe_is_active(uprobe))) {
38e967ae
RB
1141 consumer_add(uprobe, uc);
1142 ret = register_for_each_vma(uprobe, uc);
9a98e03c 1143 if (ret)
04aab9b2 1144 __uprobe_unregister(uprobe, uc);
2b144498 1145 }
66d06dff
ON
1146 up_write(&uprobe->register_rwsem);
1147 put_uprobe(uprobe);
2b144498 1148
66d06dff
ON
1149 if (unlikely(ret == -EAGAIN))
1150 goto retry;
2b144498
SD
1151 return ret;
1152}
38e967ae
RB
1153
1154int uprobe_register(struct inode *inode, loff_t offset,
1155 struct uprobe_consumer *uc)
1156{
1cc33161 1157 return __uprobe_register(inode, offset, 0, uc);
38e967ae 1158}
e8440c14 1159EXPORT_SYMBOL_GPL(uprobe_register);
2b144498 1160
1cc33161
RB
1161int uprobe_register_refctr(struct inode *inode, loff_t offset,
1162 loff_t ref_ctr_offset, struct uprobe_consumer *uc)
1163{
1164 return __uprobe_register(inode, offset, ref_ctr_offset, uc);
1165}
1166EXPORT_SYMBOL_GPL(uprobe_register_refctr);
1167
bdf8647c 1168/*
788faab7 1169 * uprobe_apply - unregister an already registered probe.
bdf8647c
ON
1170 * @inode: the file in which the probe has to be removed.
1171 * @offset: offset from the start of the file.
1172 * @uc: consumer which wants to add more or remove some breakpoints
1173 * @add: add or remove the breakpoints
1174 */
1175int uprobe_apply(struct inode *inode, loff_t offset,
1176 struct uprobe_consumer *uc, bool add)
1177{
1178 struct uprobe *uprobe;
1179 struct uprobe_consumer *con;
1180 int ret = -ENOENT;
1181
1182 uprobe = find_uprobe(inode, offset);
06d07139 1183 if (WARN_ON(!uprobe))
bdf8647c
ON
1184 return ret;
1185
1186 down_write(&uprobe->register_rwsem);
1187 for (con = uprobe->consumers; con && con != uc ; con = con->next)
1188 ;
1189 if (con)
1190 ret = register_for_each_vma(uprobe, add ? uc : NULL);
1191 up_write(&uprobe->register_rwsem);
1192 put_uprobe(uprobe);
1193
1194 return ret;
1195}
1196
da1816b1
ON
1197static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
1198{
1199 struct vm_area_struct *vma;
1200 int err = 0;
1201
1202 down_read(&mm->mmap_sem);
1203 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1204 unsigned long vaddr;
1205 loff_t offset;
1206
1207 if (!valid_vma(vma, false) ||
f281769e 1208 file_inode(vma->vm_file) != uprobe->inode)
da1816b1
ON
1209 continue;
1210
1211 offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
1212 if (uprobe->offset < offset ||
1213 uprobe->offset >= offset + vma->vm_end - vma->vm_start)
1214 continue;
1215
1216 vaddr = offset_to_vaddr(vma, uprobe->offset);
1217 err |= remove_breakpoint(uprobe, mm, vaddr);
1218 }
1219 up_read(&mm->mmap_sem);
1220
1221 return err;
1222}
1223
891c3970
ON
1224static struct rb_node *
1225find_node_in_range(struct inode *inode, loff_t min, loff_t max)
2b144498 1226{
2b144498 1227 struct rb_node *n = uprobes_tree.rb_node;
2b144498
SD
1228
1229 while (n) {
891c3970 1230 struct uprobe *u = rb_entry(n, struct uprobe, rb_node);
2b144498 1231
891c3970 1232 if (inode < u->inode) {
2b144498 1233 n = n->rb_left;
891c3970 1234 } else if (inode > u->inode) {
2b144498 1235 n = n->rb_right;
891c3970
ON
1236 } else {
1237 if (max < u->offset)
1238 n = n->rb_left;
1239 else if (min > u->offset)
1240 n = n->rb_right;
1241 else
1242 break;
1243 }
2b144498 1244 }
7b2d81d4 1245
891c3970 1246 return n;
2b144498
SD
1247}
1248
1249/*
891c3970 1250 * For a given range in vma, build a list of probes that need to be inserted.
2b144498 1251 */
891c3970
ON
1252static void build_probe_list(struct inode *inode,
1253 struct vm_area_struct *vma,
1254 unsigned long start, unsigned long end,
1255 struct list_head *head)
2b144498 1256{
891c3970 1257 loff_t min, max;
891c3970
ON
1258 struct rb_node *n, *t;
1259 struct uprobe *u;
7b2d81d4 1260
891c3970 1261 INIT_LIST_HEAD(head);
cb113b47 1262 min = vaddr_to_offset(vma, start);
891c3970 1263 max = min + (end - start) - 1;
2b144498 1264
6f47caa0 1265 spin_lock(&uprobes_treelock);
891c3970
ON
1266 n = find_node_in_range(inode, min, max);
1267 if (n) {
1268 for (t = n; t; t = rb_prev(t)) {
1269 u = rb_entry(t, struct uprobe, rb_node);
1270 if (u->inode != inode || u->offset < min)
1271 break;
1272 list_add(&u->pending_list, head);
f231722a 1273 get_uprobe(u);
891c3970
ON
1274 }
1275 for (t = n; (t = rb_next(t)); ) {
1276 u = rb_entry(t, struct uprobe, rb_node);
1277 if (u->inode != inode || u->offset > max)
1278 break;
1279 list_add(&u->pending_list, head);
f231722a 1280 get_uprobe(u);
891c3970 1281 }
2b144498 1282 }
6f47caa0 1283 spin_unlock(&uprobes_treelock);
2b144498
SD
1284}
1285
1cc33161
RB
1286/* @vma contains reference counter, not the probed instruction. */
1287static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
1288{
1289 struct list_head *pos, *q;
1290 struct delayed_uprobe *du;
1291 unsigned long vaddr;
1292 int ret = 0, err = 0;
1293
1294 mutex_lock(&delayed_uprobe_lock);
1295 list_for_each_safe(pos, q, &delayed_uprobe_list) {
1296 du = list_entry(pos, struct delayed_uprobe, list);
1297
1298 if (du->mm != vma->vm_mm ||
1299 !valid_ref_ctr_vma(du->uprobe, vma))
1300 continue;
1301
1302 vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
1303 ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
1304 if (ret) {
1305 update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
1306 if (!err)
1307 err = ret;
1308 }
1309 delayed_uprobe_delete(du);
1310 }
1311 mutex_unlock(&delayed_uprobe_lock);
1312 return err;
1313}
1314
2b144498 1315/*
5e5be71a 1316 * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
2b144498 1317 *
5e5be71a
ON
1318 * Currently we ignore all errors and always return 0, the callers
1319 * can't handle the failure anyway.
2b144498 1320 */
7b2d81d4 1321int uprobe_mmap(struct vm_area_struct *vma)
2b144498
SD
1322{
1323 struct list_head tmp_list;
665605a2 1324 struct uprobe *uprobe, *u;
2b144498 1325 struct inode *inode;
2b144498 1326
1cc33161
RB
1327 if (no_uprobe_events())
1328 return 0;
1329
1330 if (vma->vm_file &&
1331 (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
1332 test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
1333 delayed_ref_ctr_inc(vma);
1334
1335 if (!valid_vma(vma, true))
7b2d81d4 1336 return 0;
2b144498 1337
f281769e 1338 inode = file_inode(vma->vm_file);
2b144498 1339 if (!inode)
7b2d81d4 1340 return 0;
2b144498 1341
2b144498 1342 mutex_lock(uprobes_mmap_hash(inode));
891c3970 1343 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
806a98bd
ON
1344 /*
1345 * We can race with uprobe_unregister(), this uprobe can be already
1346 * removed. But in this case filter_chain() must return false, all
1347 * consumers have gone away.
1348 */
665605a2 1349 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
806a98bd 1350 if (!fatal_signal_pending(current) &&
8a7f2fa0 1351 filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
57683f72 1352 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
5e5be71a 1353 install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
2b144498
SD
1354 }
1355 put_uprobe(uprobe);
1356 }
2b144498
SD
1357 mutex_unlock(uprobes_mmap_hash(inode));
1358
5e5be71a 1359 return 0;
2b144498
SD
1360}
1361
9f68f672
ON
1362static bool
1363vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1364{
1365 loff_t min, max;
1366 struct inode *inode;
1367 struct rb_node *n;
1368
f281769e 1369 inode = file_inode(vma->vm_file);
9f68f672
ON
1370
1371 min = vaddr_to_offset(vma, start);
1372 max = min + (end - start) - 1;
1373
1374 spin_lock(&uprobes_treelock);
1375 n = find_node_in_range(inode, min, max);
1376 spin_unlock(&uprobes_treelock);
1377
1378 return !!n;
1379}
1380
682968e0
SD
1381/*
1382 * Called in context of a munmap of a vma.
1383 */
cbc91f71 1384void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
682968e0 1385{
441f1eb7 1386 if (no_uprobe_events() || !valid_vma(vma, false))
682968e0
SD
1387 return;
1388
2fd611a9
ON
1389 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
1390 return;
1391
9f68f672
ON
1392 if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) ||
1393 test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags))
f8ac4ec9
ON
1394 return;
1395
9f68f672
ON
1396 if (vma_has_uprobes(vma, start, end))
1397 set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
682968e0
SD
1398}
1399
d4b3b638 1400/* Slot allocation for XOL */
6441ec8b 1401static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
d4b3b638 1402{
704bde3c
ON
1403 struct vm_area_struct *vma;
1404 int ret;
d4b3b638 1405
598fdc1d
MH
1406 if (down_write_killable(&mm->mmap_sem))
1407 return -EINTR;
1408
704bde3c
ON
1409 if (mm->uprobes_state.xol_area) {
1410 ret = -EALREADY;
d4b3b638 1411 goto fail;
704bde3c 1412 }
d4b3b638 1413
af0d95af
ON
1414 if (!area->vaddr) {
1415 /* Try to map as high as possible, this is only a hint. */
1416 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
1417 PAGE_SIZE, 0, 0);
1418 if (area->vaddr & ~PAGE_MASK) {
1419 ret = area->vaddr;
1420 goto fail;
1421 }
d4b3b638
SD
1422 }
1423
704bde3c
ON
1424 vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
1425 VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
1426 &area->xol_mapping);
1427 if (IS_ERR(vma)) {
1428 ret = PTR_ERR(vma);
d4b3b638 1429 goto fail;
704bde3c 1430 }
d4b3b638 1431
704bde3c 1432 ret = 0;
5c6338b4
PM
1433 /* pairs with get_xol_area() */
1434 smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */
c8a82538 1435 fail:
d4b3b638 1436 up_write(&mm->mmap_sem);
d4b3b638
SD
1437
1438 return ret;
1439}
1440
af0d95af 1441static struct xol_area *__create_xol_area(unsigned long vaddr)
d4b3b638 1442{
9b545df8 1443 struct mm_struct *mm = current->mm;
e78aebfd 1444 uprobe_opcode_t insn = UPROBE_SWBP_INSN;
6441ec8b 1445 struct xol_area *area;
9b545df8 1446
af0d95af 1447 area = kmalloc(sizeof(*area), GFP_KERNEL);
d4b3b638 1448 if (unlikely(!area))
c8a82538 1449 goto out;
d4b3b638 1450
6396bb22
KC
1451 area->bitmap = kcalloc(BITS_TO_LONGS(UINSNS_PER_PAGE), sizeof(long),
1452 GFP_KERNEL);
d4b3b638 1453 if (!area->bitmap)
c8a82538
ON
1454 goto free_area;
1455
704bde3c 1456 area->xol_mapping.name = "[uprobes]";
869ae761 1457 area->xol_mapping.fault = NULL;
704bde3c 1458 area->xol_mapping.pages = area->pages;
f58bea2f
ON
1459 area->pages[0] = alloc_page(GFP_HIGHUSER);
1460 if (!area->pages[0])
c8a82538 1461 goto free_bitmap;
f58bea2f 1462 area->pages[1] = NULL;
d4b3b638 1463
af0d95af 1464 area->vaddr = vaddr;
6441ec8b
ON
1465 init_waitqueue_head(&area->wq);
1466 /* Reserve the 1st slot for get_trampoline_vaddr() */
e78aebfd 1467 set_bit(0, area->bitmap);
e78aebfd 1468 atomic_set(&area->slot_count, 1);
297e765e 1469 arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);
e78aebfd 1470
6441ec8b 1471 if (!xol_add_vma(mm, area))
d4b3b638
SD
1472 return area;
1473
f58bea2f 1474 __free_page(area->pages[0]);
c8a82538 1475 free_bitmap:
d4b3b638 1476 kfree(area->bitmap);
c8a82538 1477 free_area:
d4b3b638 1478 kfree(area);
c8a82538 1479 out:
6441ec8b
ON
1480 return NULL;
1481}
1482
1483/*
1484 * get_xol_area - Allocate process's xol_area if necessary.
1485 * This area will be used for storing instructions for execution out of line.
1486 *
1487 * Returns the allocated area or NULL.
1488 */
1489static struct xol_area *get_xol_area(void)
1490{
1491 struct mm_struct *mm = current->mm;
1492 struct xol_area *area;
1493
1494 if (!mm->uprobes_state.xol_area)
af0d95af 1495 __create_xol_area(0);
6441ec8b 1496
5c6338b4
PM
1497 /* Pairs with xol_add_vma() smp_store_release() */
1498 area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */
9b545df8 1499 return area;
d4b3b638
SD
1500}
1501
1502/*
1503 * uprobe_clear_state - Free the area allocated for slots.
1504 */
1505void uprobe_clear_state(struct mm_struct *mm)
1506{
1507 struct xol_area *area = mm->uprobes_state.xol_area;
1508
1cc33161
RB
1509 mutex_lock(&delayed_uprobe_lock);
1510 delayed_uprobe_remove(NULL, mm);
1511 mutex_unlock(&delayed_uprobe_lock);
1512
d4b3b638
SD
1513 if (!area)
1514 return;
1515
f58bea2f 1516 put_page(area->pages[0]);
d4b3b638
SD
1517 kfree(area->bitmap);
1518 kfree(area);
1519}
1520
32cdba1e
ON
1521void uprobe_start_dup_mmap(void)
1522{
1523 percpu_down_read(&dup_mmap_sem);
1524}
1525
1526void uprobe_end_dup_mmap(void)
1527{
1528 percpu_up_read(&dup_mmap_sem);
1529}
1530
f8ac4ec9
ON
1531void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
1532{
9f68f672 1533 if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
f8ac4ec9 1534 set_bit(MMF_HAS_UPROBES, &newmm->flags);
9f68f672
ON
1535 /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */
1536 set_bit(MMF_RECALC_UPROBES, &newmm->flags);
1537 }
f8ac4ec9
ON
1538}
1539
d4b3b638
SD
1540/*
1541 * - search for a free slot.
1542 */
1543static unsigned long xol_take_insn_slot(struct xol_area *area)
1544{
1545 unsigned long slot_addr;
1546 int slot_nr;
1547
1548 do {
1549 slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
1550 if (slot_nr < UINSNS_PER_PAGE) {
1551 if (!test_and_set_bit(slot_nr, area->bitmap))
1552 break;
1553
1554 slot_nr = UINSNS_PER_PAGE;
1555 continue;
1556 }
1557 wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
1558 } while (slot_nr >= UINSNS_PER_PAGE);
1559
1560 slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
1561 atomic_inc(&area->slot_count);
1562
1563 return slot_addr;
1564}
1565
1566/*
a6cb3f6d 1567 * xol_get_insn_slot - allocate a slot for xol.
d4b3b638
SD
1568 * Returns the allocated slot address or 0.
1569 */
a6cb3f6d 1570static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
d4b3b638
SD
1571{
1572 struct xol_area *area;
a6cb3f6d 1573 unsigned long xol_vaddr;
d4b3b638 1574
9b545df8
ON
1575 area = get_xol_area();
1576 if (!area)
1577 return 0;
d4b3b638 1578
a6cb3f6d
ON
1579 xol_vaddr = xol_take_insn_slot(area);
1580 if (unlikely(!xol_vaddr))
d4b3b638
SD
1581 return 0;
1582
f58bea2f 1583 arch_uprobe_copy_ixol(area->pages[0], xol_vaddr,
72e6ae28 1584 &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
d4b3b638 1585
a6cb3f6d 1586 return xol_vaddr;
d4b3b638
SD
1587}
1588
1589/*
1590 * xol_free_insn_slot - If slot was earlier allocated by
1591 * @xol_get_insn_slot(), make the slot available for
1592 * subsequent requests.
1593 */
1594static void xol_free_insn_slot(struct task_struct *tsk)
1595{
1596 struct xol_area *area;
1597 unsigned long vma_end;
1598 unsigned long slot_addr;
1599
1600 if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask)
1601 return;
1602
1603 slot_addr = tsk->utask->xol_vaddr;
af4355e9 1604 if (unlikely(!slot_addr))
d4b3b638
SD
1605 return;
1606
1607 area = tsk->mm->uprobes_state.xol_area;
1608 vma_end = area->vaddr + PAGE_SIZE;
1609 if (area->vaddr <= slot_addr && slot_addr < vma_end) {
1610 unsigned long offset;
1611 int slot_nr;
1612
1613 offset = slot_addr - area->vaddr;
1614 slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
1615 if (slot_nr >= UINSNS_PER_PAGE)
1616 return;
1617
1618 clear_bit(slot_nr, area->bitmap);
1619 atomic_dec(&area->slot_count);
2a742ced 1620 smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
d4b3b638
SD
1621 if (waitqueue_active(&area->wq))
1622 wake_up(&area->wq);
1623
1624 tsk->utask->xol_vaddr = 0;
1625 }
1626}
1627
72e6ae28
VK
1628void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
1629 void *src, unsigned long len)
1630{
1631 /* Initialize the slot */
1632 copy_to_page(page, vaddr, src, len);
1633
1634 /*
1635 * We probably need flush_icache_user_range() but it needs vma.
1636 * This should work on most of architectures by default. If
1637 * architecture needs to do something different it can define
1638 * its own version of the function.
1639 */
1640 flush_dcache_page(page);
1641}
1642
0326f5a9
SD
1643/**
1644 * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
1645 * @regs: Reflects the saved state of the task after it has hit a breakpoint
1646 * instruction.
1647 * Return the address of the breakpoint instruction.
1648 */
1649unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1650{
1651 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
1652}
1653
b02ef20a
ON
1654unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
1655{
1656 struct uprobe_task *utask = current->utask;
1657
1658 if (unlikely(utask && utask->active_uprobe))
1659 return utask->vaddr;
1660
1661 return instruction_pointer(regs);
1662}
1663
2bb5e840
ON
1664static struct return_instance *free_ret_instance(struct return_instance *ri)
1665{
1666 struct return_instance *next = ri->next;
1667 put_uprobe(ri->uprobe);
1668 kfree(ri);
1669 return next;
1670}
1671
0326f5a9
SD
1672/*
1673 * Called with no locks held.
788faab7 1674 * Called in context of an exiting or an exec-ing thread.
0326f5a9
SD
1675 */
1676void uprobe_free_utask(struct task_struct *t)
1677{
1678 struct uprobe_task *utask = t->utask;
2bb5e840 1679 struct return_instance *ri;
0326f5a9 1680
0326f5a9
SD
1681 if (!utask)
1682 return;
1683
1684 if (utask->active_uprobe)
1685 put_uprobe(utask->active_uprobe);
1686
0dfd0eb8 1687 ri = utask->return_instances;
2bb5e840
ON
1688 while (ri)
1689 ri = free_ret_instance(ri);
0dfd0eb8 1690
d4b3b638 1691 xol_free_insn_slot(t);
0326f5a9
SD
1692 kfree(utask);
1693 t->utask = NULL;
1694}
1695
0326f5a9 1696/*
5a2df662
ON
1697 * Allocate a uprobe_task object for the task if if necessary.
1698 * Called when the thread hits a breakpoint.
0326f5a9
SD
1699 *
1700 * Returns:
1701 * - pointer to new uprobe_task on success
1702 * - NULL otherwise
1703 */
5a2df662 1704static struct uprobe_task *get_utask(void)
0326f5a9 1705{
5a2df662
ON
1706 if (!current->utask)
1707 current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
1708 return current->utask;
0326f5a9
SD
1709}
1710
248d3a7b
ON
1711static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
1712{
1713 struct uprobe_task *n_utask;
1714 struct return_instance **p, *o, *n;
1715
1716 n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
1717 if (!n_utask)
1718 return -ENOMEM;
1719 t->utask = n_utask;
1720
1721 p = &n_utask->return_instances;
1722 for (o = o_utask->return_instances; o; o = o->next) {
1723 n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
1724 if (!n)
1725 return -ENOMEM;
1726
1727 *n = *o;
f231722a 1728 get_uprobe(n->uprobe);
248d3a7b
ON
1729 n->next = NULL;
1730
1731 *p = n;
1732 p = &n->next;
1733 n_utask->depth++;
1734 }
1735
1736 return 0;
1737}
1738
1739static void uprobe_warn(struct task_struct *t, const char *msg)
1740{
1741 pr_warn("uprobe: %s:%d failed to %s\n",
1742 current->comm, current->pid, msg);
1743}
1744
aa59c53f
ON
1745static void dup_xol_work(struct callback_head *work)
1746{
aa59c53f
ON
1747 if (current->flags & PF_EXITING)
1748 return;
1749
598fdc1d
MH
1750 if (!__create_xol_area(current->utask->dup_xol_addr) &&
1751 !fatal_signal_pending(current))
aa59c53f
ON
1752 uprobe_warn(current, "dup xol area");
1753}
1754
b68e0749
ON
1755/*
1756 * Called in context of a new clone/fork from copy_process.
1757 */
3ab67966 1758void uprobe_copy_process(struct task_struct *t, unsigned long flags)
b68e0749 1759{
248d3a7b
ON
1760 struct uprobe_task *utask = current->utask;
1761 struct mm_struct *mm = current->mm;
aa59c53f 1762 struct xol_area *area;
248d3a7b 1763
b68e0749 1764 t->utask = NULL;
248d3a7b 1765
3ab67966
ON
1766 if (!utask || !utask->return_instances)
1767 return;
1768
1769 if (mm == t->mm && !(flags & CLONE_VFORK))
248d3a7b
ON
1770 return;
1771
1772 if (dup_utask(t, utask))
1773 return uprobe_warn(t, "dup ret instances");
aa59c53f
ON
1774
1775 /* The task can fork() after dup_xol_work() fails */
1776 area = mm->uprobes_state.xol_area;
1777 if (!area)
1778 return uprobe_warn(t, "dup xol area");
1779
3ab67966
ON
1780 if (mm == t->mm)
1781 return;
1782
32473431
ON
1783 t->utask->dup_xol_addr = area->vaddr;
1784 init_task_work(&t->utask->dup_xol_work, dup_xol_work);
1785 task_work_add(t, &t->utask->dup_xol_work, true);
b68e0749
ON
1786}
1787
e78aebfd
AA
1788/*
1789 * Current area->vaddr notion assume the trampoline address is always
1790 * equal area->vaddr.
1791 *
1792 * Returns -1 in case the xol_area is not allocated.
1793 */
1794static unsigned long get_trampoline_vaddr(void)
1795{
1796 struct xol_area *area;
1797 unsigned long trampoline_vaddr = -1;
1798
5c6338b4
PM
1799 /* Pairs with xol_add_vma() smp_store_release() */
1800 area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */
e78aebfd
AA
1801 if (area)
1802 trampoline_vaddr = area->vaddr;
1803
1804 return trampoline_vaddr;
1805}
1806
db087ef6
ON
1807static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
1808 struct pt_regs *regs)
a5b7e1a8
ON
1809{
1810 struct return_instance *ri = utask->return_instances;
db087ef6 1811 enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
86dcb702
ON
1812
1813 while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
a5b7e1a8
ON
1814 ri = free_ret_instance(ri);
1815 utask->depth--;
1816 }
1817 utask->return_instances = ri;
1818}
1819
0dfd0eb8
AA
1820static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
1821{
1822 struct return_instance *ri;
1823 struct uprobe_task *utask;
1824 unsigned long orig_ret_vaddr, trampoline_vaddr;
db087ef6 1825 bool chained;
0dfd0eb8
AA
1826
1827 if (!get_xol_area())
1828 return;
1829
1830 utask = get_utask();
1831 if (!utask)
1832 return;
1833
ded49c55
AA
1834 if (utask->depth >= MAX_URETPROBE_DEPTH) {
1835 printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
1836 " nestedness limit pid/tgid=%d/%d\n",
1837 current->pid, current->tgid);
1838 return;
1839 }
1840
6c58d0e4 1841 ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
0dfd0eb8 1842 if (!ri)
6c58d0e4 1843 return;
0dfd0eb8
AA
1844
1845 trampoline_vaddr = get_trampoline_vaddr();
1846 orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
1847 if (orig_ret_vaddr == -1)
1848 goto fail;
1849
a5b7e1a8 1850 /* drop the entries invalidated by longjmp() */
db087ef6
ON
1851 chained = (orig_ret_vaddr == trampoline_vaddr);
1852 cleanup_return_instances(utask, chained, regs);
a5b7e1a8 1853
0dfd0eb8
AA
1854 /*
1855 * We don't want to keep trampoline address in stack, rather keep the
1856 * original return address of first caller thru all the consequent
1857 * instances. This also makes breakpoint unwrapping easier.
1858 */
db087ef6 1859 if (chained) {
0dfd0eb8
AA
1860 if (!utask->return_instances) {
1861 /*
1862 * This situation is not possible. Likely we have an
1863 * attack from user-space.
1864 */
6c58d0e4 1865 uprobe_warn(current, "handle tail call");
0dfd0eb8
AA
1866 goto fail;
1867 }
0dfd0eb8
AA
1868 orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
1869 }
1870
f231722a 1871 ri->uprobe = get_uprobe(uprobe);
0dfd0eb8 1872 ri->func = instruction_pointer(regs);
7b868e48 1873 ri->stack = user_stack_pointer(regs);
0dfd0eb8
AA
1874 ri->orig_ret_vaddr = orig_ret_vaddr;
1875 ri->chained = chained;
1876
ded49c55 1877 utask->depth++;
0dfd0eb8
AA
1878 ri->next = utask->return_instances;
1879 utask->return_instances = ri;
1880
1881 return;
0dfd0eb8
AA
1882 fail:
1883 kfree(ri);
1884}
1885
0326f5a9
SD
1886/* Prepare to single-step probed instruction out of line. */
1887static int
a6cb3f6d 1888pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
0326f5a9 1889{
a6cb3f6d
ON
1890 struct uprobe_task *utask;
1891 unsigned long xol_vaddr;
aba51024 1892 int err;
a6cb3f6d 1893
608e7427
ON
1894 utask = get_utask();
1895 if (!utask)
1896 return -ENOMEM;
a6cb3f6d
ON
1897
1898 xol_vaddr = xol_get_insn_slot(uprobe);
1899 if (!xol_vaddr)
1900 return -ENOMEM;
1901
1902 utask->xol_vaddr = xol_vaddr;
1903 utask->vaddr = bp_vaddr;
d4b3b638 1904
aba51024
ON
1905 err = arch_uprobe_pre_xol(&uprobe->arch, regs);
1906 if (unlikely(err)) {
1907 xol_free_insn_slot(current);
1908 return err;
1909 }
1910
608e7427
ON
1911 utask->active_uprobe = uprobe;
1912 utask->state = UTASK_SSTEP;
aba51024 1913 return 0;
0326f5a9
SD
1914}
1915
1916/*
1917 * If we are singlestepping, then ensure this thread is not connected to
1918 * non-fatal signals until completion of singlestep. When xol insn itself
1919 * triggers the signal, restart the original insn even if the task is
1920 * already SIGKILL'ed (since coredump should report the correct ip). This
1921 * is even more important if the task has a handler for SIGSEGV/etc, The
1922 * _same_ instruction should be repeated again after return from the signal
1923 * handler, and SSTEP can never finish in this case.
1924 */
1925bool uprobe_deny_signal(void)
1926{
1927 struct task_struct *t = current;
1928 struct uprobe_task *utask = t->utask;
1929
1930 if (likely(!utask || !utask->active_uprobe))
1931 return false;
1932
1933 WARN_ON_ONCE(utask->state != UTASK_SSTEP);
1934
1935 if (signal_pending(t)) {
1936 spin_lock_irq(&t->sighand->siglock);
1937 clear_tsk_thread_flag(t, TIF_SIGPENDING);
1938 spin_unlock_irq(&t->sighand->siglock);
1939
1940 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
1941 utask->state = UTASK_SSTEP_TRAPPED;
1942 set_tsk_thread_flag(t, TIF_UPROBE);
0326f5a9
SD
1943 }
1944 }
1945
1946 return true;
1947}
1948
499a4f3e
ON
1949static void mmf_recalc_uprobes(struct mm_struct *mm)
1950{
1951 struct vm_area_struct *vma;
1952
1953 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1954 if (!valid_vma(vma, false))
1955 continue;
1956 /*
1957 * This is not strictly accurate, we can race with
1958 * uprobe_unregister() and see the already removed
1959 * uprobe if delete_uprobe() was not yet called.
63633cbf 1960 * Or this uprobe can be filtered out.
499a4f3e
ON
1961 */
1962 if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
1963 return;
1964 }
1965
1966 clear_bit(MMF_HAS_UPROBES, &mm->flags);
1967}
1968
0908ad6e 1969static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
ec75fba9
ON
1970{
1971 struct page *page;
1972 uprobe_opcode_t opcode;
1973 int result;
1974
1975 pagefault_disable();
bd28b145 1976 result = __get_user(opcode, (uprobe_opcode_t __user *)vaddr);
ec75fba9
ON
1977 pagefault_enable();
1978
1979 if (likely(result == 0))
1980 goto out;
1981
1e987790
DH
1982 /*
1983 * The NULL 'tsk' here ensures that any faults that occur here
1984 * will not be accounted to the task. 'mm' *is* current->mm,
1985 * but we treat this as a 'remote' access since it is
1986 * essentially a kernel access to the memory.
1987 */
9beae1ea 1988 result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
5b56d49f 1989 NULL, NULL);
ec75fba9
ON
1990 if (result < 0)
1991 return result;
1992
ab0d805c 1993 copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
ec75fba9
ON
1994 put_page(page);
1995 out:
0908ad6e
AM
1996 /* This needs to return true for any variant of the trap insn */
1997 return is_trap_insn(&opcode);
ec75fba9
ON
1998}
1999
d790d346 2000static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
0326f5a9 2001{
3a9ea052
ON
2002 struct mm_struct *mm = current->mm;
2003 struct uprobe *uprobe = NULL;
0326f5a9 2004 struct vm_area_struct *vma;
0326f5a9 2005
0326f5a9
SD
2006 down_read(&mm->mmap_sem);
2007 vma = find_vma(mm, bp_vaddr);
3a9ea052
ON
2008 if (vma && vma->vm_start <= bp_vaddr) {
2009 if (valid_vma(vma, false)) {
f281769e 2010 struct inode *inode = file_inode(vma->vm_file);
cb113b47 2011 loff_t offset = vaddr_to_offset(vma, bp_vaddr);
0326f5a9 2012
3a9ea052
ON
2013 uprobe = find_uprobe(inode, offset);
2014 }
d790d346
ON
2015
2016 if (!uprobe)
0908ad6e 2017 *is_swbp = is_trap_at_addr(mm, bp_vaddr);
d790d346
ON
2018 } else {
2019 *is_swbp = -EFAULT;
0326f5a9 2020 }
499a4f3e
ON
2021
2022 if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
2023 mmf_recalc_uprobes(mm);
0326f5a9
SD
2024 up_read(&mm->mmap_sem);
2025
3a9ea052
ON
2026 return uprobe;
2027}
2028
da1816b1
ON
2029static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
2030{
2031 struct uprobe_consumer *uc;
2032 int remove = UPROBE_HANDLER_REMOVE;
0dfd0eb8 2033 bool need_prep = false; /* prepare return uprobe, when needed */
da1816b1
ON
2034
2035 down_read(&uprobe->register_rwsem);
2036 for (uc = uprobe->consumers; uc; uc = uc->next) {
ea024870 2037 int rc = 0;
da1816b1 2038
ea024870
AA
2039 if (uc->handler) {
2040 rc = uc->handler(uc, regs);
2041 WARN(rc & ~UPROBE_HANDLER_MASK,
2042 "bad rc=0x%x from %pf()\n", rc, uc->handler);
2043 }
0dfd0eb8
AA
2044
2045 if (uc->ret_handler)
2046 need_prep = true;
2047
da1816b1
ON
2048 remove &= rc;
2049 }
2050
0dfd0eb8
AA
2051 if (need_prep && !remove)
2052 prepare_uretprobe(uprobe, regs); /* put bp at return */
2053
da1816b1
ON
2054 if (remove && uprobe->consumers) {
2055 WARN_ON(!uprobe_is_active(uprobe));
2056 unapply_uprobe(uprobe, current->mm);
2057 }
2058 up_read(&uprobe->register_rwsem);
2059}
2060
fec8898d
AA
2061static void
2062handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
2063{
2064 struct uprobe *uprobe = ri->uprobe;
2065 struct uprobe_consumer *uc;
2066
2067 down_read(&uprobe->register_rwsem);
2068 for (uc = uprobe->consumers; uc; uc = uc->next) {
2069 if (uc->ret_handler)
2070 uc->ret_handler(uc, ri->func, regs);
2071 }
2072 up_read(&uprobe->register_rwsem);
2073}
2074
a83cfeb9
ON
2075static struct return_instance *find_next_ret_chain(struct return_instance *ri)
2076{
2077 bool chained;
2078
2079 do {
2080 chained = ri->chained;
2081 ri = ri->next; /* can't be NULL if chained */
2082 } while (chained);
2083
2084 return ri;
2085}
2086
0b5256c7 2087static void handle_trampoline(struct pt_regs *regs)
fec8898d
AA
2088{
2089 struct uprobe_task *utask;
a83cfeb9 2090 struct return_instance *ri, *next;
5eeb50de 2091 bool valid;
fec8898d
AA
2092
2093 utask = current->utask;
2094 if (!utask)
0b5256c7 2095 goto sigill;
fec8898d
AA
2096
2097 ri = utask->return_instances;
2098 if (!ri)
0b5256c7 2099 goto sigill;
fec8898d 2100
a83cfeb9 2101 do {
5eeb50de
ON
2102 /*
2103 * We should throw out the frames invalidated by longjmp().
2104 * If this chain is valid, then the next one should be alive
2105 * or NULL; the latter case means that nobody but ri->func
2106 * could hit this trampoline on return. TODO: sigaltstack().
2107 */
2108 next = find_next_ret_chain(ri);
86dcb702 2109 valid = !next || arch_uretprobe_is_alive(next, RP_CHECK_RET, regs);
5eeb50de
ON
2110
2111 instruction_pointer_set(regs, ri->orig_ret_vaddr);
2112 do {
2113 if (valid)
2114 handle_uretprobe_chain(ri, regs);
2115 ri = free_ret_instance(ri);
2116 utask->depth--;
2117 } while (ri != next);
2118 } while (!valid);
fec8898d
AA
2119
2120 utask->return_instances = ri;
0b5256c7
ON
2121 return;
2122
2123 sigill:
2124 uprobe_warn(current, "handle uretprobe, sending SIGILL.");
55a3235f 2125 force_sig(SIGILL, current);
fec8898d 2126
fec8898d
AA
2127}
2128
6fe50a28
DL
2129bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
2130{
2131 return false;
2132}
2133
86dcb702
ON
2134bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
2135 struct pt_regs *regs)
97da8976
ON
2136{
2137 return true;
2138}
2139
3a9ea052
ON
2140/*
2141 * Run handler and ask thread to singlestep.
2142 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
2143 */
2144static void handle_swbp(struct pt_regs *regs)
2145{
3a9ea052
ON
2146 struct uprobe *uprobe;
2147 unsigned long bp_vaddr;
56bb4cf6 2148 int uninitialized_var(is_swbp);
3a9ea052
ON
2149
2150 bp_vaddr = uprobe_get_swbp_addr(regs);
0b5256c7
ON
2151 if (bp_vaddr == get_trampoline_vaddr())
2152 return handle_trampoline(regs);
fec8898d
AA
2153
2154 uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
0326f5a9 2155 if (!uprobe) {
56bb4cf6
ON
2156 if (is_swbp > 0) {
2157 /* No matching uprobe; signal SIGTRAP. */
2158 send_sig(SIGTRAP, current, 0);
2159 } else {
2160 /*
2161 * Either we raced with uprobe_unregister() or we can't
2162 * access this memory. The latter is only possible if
2163 * another thread plays with our ->mm. In both cases
2164 * we can simply restart. If this vma was unmapped we
2165 * can pretend this insn was not executed yet and get
2166 * the (correct) SIGSEGV after restart.
2167 */
2168 instruction_pointer_set(regs, bp_vaddr);
2169 }
0326f5a9
SD
2170 return;
2171 }
74e59dfc
ON
2172
2173 /* change it in advance for ->handler() and restart */
2174 instruction_pointer_set(regs, bp_vaddr);
2175
142b18dd
ON
2176 /*
2177 * TODO: move copy_insn/etc into _register and remove this hack.
2178 * After we hit the bp, _unregister + _register can install the
2179 * new and not-yet-analyzed uprobe at the same address, restart.
2180 */
71434f2f 2181 if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
74e59dfc 2182 goto out;
0326f5a9 2183
09d3f015
AP
2184 /*
2185 * Pairs with the smp_wmb() in prepare_uprobe().
2186 *
2187 * Guarantees that if we see the UPROBE_COPY_INSN bit set, then
2188 * we must also see the stores to &uprobe->arch performed by the
2189 * prepare_uprobe() call.
2190 */
2191 smp_rmb();
2192
72fd293a
ON
2193 /* Tracing handlers use ->utask to communicate with fetch methods */
2194 if (!get_utask())
2195 goto out;
2196
6fe50a28
DL
2197 if (arch_uprobe_ignore(&uprobe->arch, regs))
2198 goto out;
2199
0326f5a9 2200 handler_chain(uprobe, regs);
6fe50a28 2201
8a6b1732 2202 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
0578a970 2203 goto out;
0326f5a9 2204
608e7427 2205 if (!pre_ssout(uprobe, regs, bp_vaddr))
0326f5a9 2206 return;
0326f5a9 2207
8a6b1732 2208 /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
0578a970 2209out:
8bd87445 2210 put_uprobe(uprobe);
0326f5a9
SD
2211}
2212
2213/*
2214 * Perform required fix-ups and disable singlestep.
2215 * Allow pending signals to take effect.
2216 */
2217static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
2218{
2219 struct uprobe *uprobe;
014940ba 2220 int err = 0;
0326f5a9
SD
2221
2222 uprobe = utask->active_uprobe;
2223 if (utask->state == UTASK_SSTEP_ACK)
014940ba 2224 err = arch_uprobe_post_xol(&uprobe->arch, regs);
0326f5a9
SD
2225 else if (utask->state == UTASK_SSTEP_TRAPPED)
2226 arch_uprobe_abort_xol(&uprobe->arch, regs);
2227 else
2228 WARN_ON_ONCE(1);
2229
2230 put_uprobe(uprobe);
2231 utask->active_uprobe = NULL;
2232 utask->state = UTASK_RUNNING;
d4b3b638 2233 xol_free_insn_slot(current);
0326f5a9
SD
2234
2235 spin_lock_irq(&current->sighand->siglock);
2236 recalc_sigpending(); /* see uprobe_deny_signal() */
2237 spin_unlock_irq(&current->sighand->siglock);
014940ba
ON
2238
2239 if (unlikely(err)) {
2240 uprobe_warn(current, "execute the probed insn, sending SIGILL.");
55a3235f 2241 force_sig(SIGILL, current);
014940ba 2242 }
0326f5a9
SD
2243}
2244
2245/*
1b08e907
ON
2246 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and
2247 * allows the thread to return from interrupt. After that handle_swbp()
2248 * sets utask->active_uprobe.
0326f5a9 2249 *
1b08e907
ON
2250 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag
2251 * and allows the thread to return from interrupt.
0326f5a9
SD
2252 *
2253 * While returning to userspace, thread notices the TIF_UPROBE flag and calls
2254 * uprobe_notify_resume().
2255 */
2256void uprobe_notify_resume(struct pt_regs *regs)
2257{
2258 struct uprobe_task *utask;
2259
db023ea5
ON
2260 clear_thread_flag(TIF_UPROBE);
2261
0326f5a9 2262 utask = current->utask;
1b08e907 2263 if (utask && utask->active_uprobe)
0326f5a9 2264 handle_singlestep(utask, regs);
1b08e907
ON
2265 else
2266 handle_swbp(regs);
0326f5a9
SD
2267}
2268
2269/*
2270 * uprobe_pre_sstep_notifier gets called from interrupt context as part of
2271 * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit.
2272 */
2273int uprobe_pre_sstep_notifier(struct pt_regs *regs)
2274{
0dfd0eb8
AA
2275 if (!current->mm)
2276 return 0;
2277
2278 if (!test_bit(MMF_HAS_UPROBES, &current->mm->flags) &&
2279 (!current->utask || !current->utask->return_instances))
0326f5a9
SD
2280 return 0;
2281
0326f5a9 2282 set_thread_flag(TIF_UPROBE);
0326f5a9
SD
2283 return 1;
2284}
2285
2286/*
2287 * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier
2288 * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep.
2289 */
2290int uprobe_post_sstep_notifier(struct pt_regs *regs)
2291{
2292 struct uprobe_task *utask = current->utask;
2293
2294 if (!current->mm || !utask || !utask->active_uprobe)
2295 /* task is currently not uprobed */
2296 return 0;
2297
2298 utask->state = UTASK_SSTEP_ACK;
2299 set_thread_flag(TIF_UPROBE);
2300 return 1;
2301}
2302
2303static struct notifier_block uprobe_exception_nb = {
2304 .notifier_call = arch_uprobe_exception_notify,
2305 .priority = INT_MAX-1, /* notified after kprobes, kgdb */
2306};
2307
2b144498
SD
2308static int __init init_uprobes(void)
2309{
2310 int i;
2311
66d06dff 2312 for (i = 0; i < UPROBES_HASH_SZ; i++)
2b144498 2313 mutex_init(&uprobes_mmap_mutex[i]);
0326f5a9 2314
32cdba1e
ON
2315 if (percpu_init_rwsem(&dup_mmap_sem))
2316 return -ENOMEM;
2317
0326f5a9 2318 return register_die_notifier(&uprobe_exception_nb);
2b144498 2319}
736e89d9 2320__initcall(init_uprobes);
This page took 0.693874 seconds and 4 git commands to generate.