]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/mm/mlock.c | |
3 | * | |
4 | * (C) Copyright 1995 Linus Torvalds | |
5 | * (C) Copyright 2002 Christoph Hellwig | |
6 | */ | |
7 | ||
c59ede7b | 8 | #include <linux/capability.h> |
1da177e4 LT |
9 | #include <linux/mman.h> |
10 | #include <linux/mm.h> | |
b291f000 NP |
11 | #include <linux/swap.h> |
12 | #include <linux/swapops.h> | |
13 | #include <linux/pagemap.h> | |
1da177e4 LT |
14 | #include <linux/mempolicy.h> |
15 | #include <linux/syscalls.h> | |
e8edc6e0 AD |
16 | #include <linux/sched.h> |
17 | #include <linux/module.h> | |
b291f000 NP |
18 | #include <linux/rmap.h> |
19 | #include <linux/mmzone.h> | |
20 | #include <linux/hugetlb.h> | |
21 | ||
22 | #include "internal.h" | |
1da177e4 | 23 | |
e8edc6e0 AD |
24 | int can_do_mlock(void) |
25 | { | |
26 | if (capable(CAP_IPC_LOCK)) | |
27 | return 1; | |
28 | if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0) | |
29 | return 1; | |
30 | return 0; | |
31 | } | |
32 | EXPORT_SYMBOL(can_do_mlock); | |
1da177e4 | 33 | |
b291f000 NP |
34 | #ifdef CONFIG_UNEVICTABLE_LRU |
35 | /* | |
36 | * Mlocked pages are marked with PageMlocked() flag for efficient testing | |
37 | * in vmscan and, possibly, the fault path; and to support semi-accurate | |
38 | * statistics. | |
39 | * | |
40 | * An mlocked page [PageMlocked(page)] is unevictable. As such, it will | |
41 | * be placed on the LRU "unevictable" list, rather than the [in]active lists. | |
42 | * The unevictable list is an LRU sibling list to the [in]active lists. | |
43 | * PageUnevictable is set to indicate the unevictable state. | |
44 | * | |
45 | * When lazy mlocking via vmscan, it is important to ensure that the | |
46 | * vma's VM_LOCKED status is not concurrently being modified, otherwise we | |
47 | * may have mlocked a page that is being munlocked. So lazy mlock must take | |
48 | * the mmap_sem for read, and verify that the vma really is locked | |
49 | * (see mm/rmap.c). | |
50 | */ | |
51 | ||
52 | /* | |
53 | * LRU accounting for clear_page_mlock() | |
54 | */ | |
55 | void __clear_page_mlock(struct page *page) | |
56 | { | |
57 | VM_BUG_ON(!PageLocked(page)); | |
58 | ||
59 | if (!page->mapping) { /* truncated ? */ | |
60 | return; | |
61 | } | |
62 | ||
63 | if (!isolate_lru_page(page)) { | |
64 | putback_lru_page(page); | |
65 | } else { | |
66 | /* | |
67 | * Page not on the LRU yet. Flush all pagevecs and retry. | |
68 | */ | |
69 | lru_add_drain_all(); | |
70 | if (!isolate_lru_page(page)) | |
71 | putback_lru_page(page); | |
72 | } | |
73 | } | |
74 | ||
75 | /* | |
76 | * Mark page as mlocked if not already. | |
77 | * If page on LRU, isolate and putback to move to unevictable list. | |
78 | */ | |
79 | void mlock_vma_page(struct page *page) | |
80 | { | |
81 | BUG_ON(!PageLocked(page)); | |
82 | ||
83 | if (!TestSetPageMlocked(page) && !isolate_lru_page(page)) | |
84 | putback_lru_page(page); | |
85 | } | |
86 | ||
87 | /* | |
88 | * called from munlock()/munmap() path with page supposedly on the LRU. | |
89 | * | |
90 | * Note: unlike mlock_vma_page(), we can't just clear the PageMlocked | |
91 | * [in try_to_munlock()] and then attempt to isolate the page. We must | |
92 | * isolate the page to keep others from messing with its unevictable | |
93 | * and mlocked state while trying to munlock. However, we pre-clear the | |
94 | * mlocked state anyway as we might lose the isolation race and we might | |
95 | * not get another chance to clear PageMlocked. If we successfully | |
96 | * isolate the page and try_to_munlock() detects other VM_LOCKED vmas | |
97 | * mapping the page, it will restore the PageMlocked state, unless the page | |
98 | * is mapped in a non-linear vma. So, we go ahead and SetPageMlocked(), | |
99 | * perhaps redundantly. | |
100 | * If we lose the isolation race, and the page is mapped by other VM_LOCKED | |
101 | * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap() | |
102 | * either of which will restore the PageMlocked state by calling | |
103 | * mlock_vma_page() above, if it can grab the vma's mmap sem. | |
104 | */ | |
105 | static void munlock_vma_page(struct page *page) | |
106 | { | |
107 | BUG_ON(!PageLocked(page)); | |
108 | ||
109 | if (TestClearPageMlocked(page) && !isolate_lru_page(page)) { | |
110 | try_to_munlock(page); | |
111 | putback_lru_page(page); | |
112 | } | |
113 | } | |
114 | ||
115 | /* | |
116 | * mlock a range of pages in the vma. | |
117 | * | |
118 | * This takes care of making the pages present too. | |
119 | * | |
120 | * vma->vm_mm->mmap_sem must be held for write. | |
121 | */ | |
122 | static int __mlock_vma_pages_range(struct vm_area_struct *vma, | |
123 | unsigned long start, unsigned long end) | |
124 | { | |
125 | struct mm_struct *mm = vma->vm_mm; | |
126 | unsigned long addr = start; | |
127 | struct page *pages[16]; /* 16 gives a reasonable batch */ | |
128 | int write = !!(vma->vm_flags & VM_WRITE); | |
129 | int nr_pages = (end - start) / PAGE_SIZE; | |
130 | int ret; | |
131 | ||
132 | VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); | |
133 | VM_BUG_ON(start < vma->vm_start || end > vma->vm_end); | |
134 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | |
135 | ||
136 | lru_add_drain_all(); /* push cached pages to LRU */ | |
137 | ||
138 | while (nr_pages > 0) { | |
139 | int i; | |
140 | ||
141 | cond_resched(); | |
142 | ||
143 | /* | |
144 | * get_user_pages makes pages present if we are | |
145 | * setting mlock. and this extra reference count will | |
146 | * disable migration of this page. However, page may | |
147 | * still be truncated out from under us. | |
148 | */ | |
149 | ret = get_user_pages(current, mm, addr, | |
150 | min_t(int, nr_pages, ARRAY_SIZE(pages)), | |
151 | write, 0, pages, NULL); | |
152 | /* | |
153 | * This can happen for, e.g., VM_NONLINEAR regions before | |
154 | * a page has been allocated and mapped at a given offset, | |
155 | * or for addresses that map beyond end of a file. | |
156 | * We'll mlock the the pages if/when they get faulted in. | |
157 | */ | |
158 | if (ret < 0) | |
159 | break; | |
160 | if (ret == 0) { | |
161 | /* | |
162 | * We know the vma is there, so the only time | |
163 | * we cannot get a single page should be an | |
164 | * error (ret < 0) case. | |
165 | */ | |
166 | WARN_ON(1); | |
167 | break; | |
168 | } | |
169 | ||
170 | lru_add_drain(); /* push cached pages to LRU */ | |
171 | ||
172 | for (i = 0; i < ret; i++) { | |
173 | struct page *page = pages[i]; | |
174 | ||
175 | lock_page(page); | |
176 | /* | |
177 | * Because we lock page here and migration is blocked | |
178 | * by the elevated reference, we need only check for | |
179 | * page truncation (file-cache only). | |
180 | */ | |
181 | if (page->mapping) | |
182 | mlock_vma_page(page); | |
183 | unlock_page(page); | |
184 | put_page(page); /* ref from get_user_pages() */ | |
185 | ||
186 | /* | |
187 | * here we assume that get_user_pages() has given us | |
188 | * a list of virtually contiguous pages. | |
189 | */ | |
190 | addr += PAGE_SIZE; /* for next get_user_pages() */ | |
191 | nr_pages--; | |
192 | } | |
193 | } | |
194 | ||
195 | lru_add_drain_all(); /* to update stats */ | |
196 | ||
197 | return 0; /* count entire vma as locked_vm */ | |
198 | } | |
199 | ||
200 | /* | |
201 | * private structure for munlock page table walk | |
202 | */ | |
203 | struct munlock_page_walk { | |
204 | struct vm_area_struct *vma; | |
205 | pmd_t *pmd; /* for migration_entry_wait() */ | |
206 | }; | |
207 | ||
208 | /* | |
209 | * munlock normal pages for present ptes | |
210 | */ | |
211 | static int __munlock_pte_handler(pte_t *ptep, unsigned long addr, | |
212 | unsigned long end, struct mm_walk *walk) | |
213 | { | |
214 | struct munlock_page_walk *mpw = walk->private; | |
215 | swp_entry_t entry; | |
216 | struct page *page; | |
217 | pte_t pte; | |
218 | ||
219 | retry: | |
220 | pte = *ptep; | |
221 | /* | |
222 | * If it's a swap pte, we might be racing with page migration. | |
223 | */ | |
224 | if (unlikely(!pte_present(pte))) { | |
225 | if (!is_swap_pte(pte)) | |
226 | goto out; | |
227 | entry = pte_to_swp_entry(pte); | |
228 | if (is_migration_entry(entry)) { | |
229 | migration_entry_wait(mpw->vma->vm_mm, mpw->pmd, addr); | |
230 | goto retry; | |
231 | } | |
232 | goto out; | |
233 | } | |
234 | ||
235 | page = vm_normal_page(mpw->vma, addr, pte); | |
236 | if (!page) | |
237 | goto out; | |
238 | ||
239 | lock_page(page); | |
240 | if (!page->mapping) { | |
241 | unlock_page(page); | |
242 | goto retry; | |
243 | } | |
244 | munlock_vma_page(page); | |
245 | unlock_page(page); | |
246 | ||
247 | out: | |
248 | return 0; | |
249 | } | |
250 | ||
251 | /* | |
252 | * Save pmd for pte handler for waiting on migration entries | |
253 | */ | |
254 | static int __munlock_pmd_handler(pmd_t *pmd, unsigned long addr, | |
255 | unsigned long end, struct mm_walk *walk) | |
256 | { | |
257 | struct munlock_page_walk *mpw = walk->private; | |
258 | ||
259 | mpw->pmd = pmd; | |
260 | return 0; | |
261 | } | |
262 | ||
263 | ||
264 | /* | |
265 | * munlock a range of pages in the vma using standard page table walk. | |
266 | * | |
267 | * vma->vm_mm->mmap_sem must be held for write. | |
268 | */ | |
269 | static void __munlock_vma_pages_range(struct vm_area_struct *vma, | |
270 | unsigned long start, unsigned long end) | |
271 | { | |
272 | struct mm_struct *mm = vma->vm_mm; | |
273 | struct munlock_page_walk mpw = { | |
274 | .vma = vma, | |
275 | }; | |
276 | struct mm_walk munlock_page_walk = { | |
277 | .pmd_entry = __munlock_pmd_handler, | |
278 | .pte_entry = __munlock_pte_handler, | |
279 | .private = &mpw, | |
280 | .mm = mm, | |
281 | }; | |
282 | ||
283 | VM_BUG_ON(start & ~PAGE_MASK || end & ~PAGE_MASK); | |
284 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | |
285 | VM_BUG_ON(start < vma->vm_start); | |
286 | VM_BUG_ON(end > vma->vm_end); | |
287 | ||
288 | lru_add_drain_all(); /* push cached pages to LRU */ | |
289 | walk_page_range(start, end, &munlock_page_walk); | |
290 | lru_add_drain_all(); /* to update stats */ | |
291 | } | |
292 | ||
293 | #else /* CONFIG_UNEVICTABLE_LRU */ | |
294 | ||
295 | /* | |
296 | * Just make pages present if VM_LOCKED. No-op if unlocking. | |
297 | */ | |
298 | static int __mlock_vma_pages_range(struct vm_area_struct *vma, | |
299 | unsigned long start, unsigned long end) | |
300 | { | |
301 | if (vma->vm_flags & VM_LOCKED) | |
302 | make_pages_present(start, end); | |
303 | return 0; | |
304 | } | |
305 | ||
306 | /* | |
307 | * munlock a range of pages in the vma -- no-op. | |
308 | */ | |
309 | static void __munlock_vma_pages_range(struct vm_area_struct *vma, | |
310 | unsigned long start, unsigned long end) | |
311 | { | |
312 | } | |
313 | #endif /* CONFIG_UNEVICTABLE_LRU */ | |
314 | ||
315 | /* | |
316 | * mlock all pages in this vma range. For mmap()/mremap()/... | |
317 | */ | |
318 | int mlock_vma_pages_range(struct vm_area_struct *vma, | |
319 | unsigned long start, unsigned long end) | |
320 | { | |
8edb08ca | 321 | struct mm_struct *mm = vma->vm_mm; |
b291f000 NP |
322 | int nr_pages = (end - start) / PAGE_SIZE; |
323 | BUG_ON(!(vma->vm_flags & VM_LOCKED)); | |
324 | ||
325 | /* | |
326 | * filter unlockable vmas | |
327 | */ | |
328 | if (vma->vm_flags & (VM_IO | VM_PFNMAP)) | |
329 | goto no_mlock; | |
330 | ||
331 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | |
332 | is_vm_hugetlb_page(vma) || | |
8edb08ca LS |
333 | vma == get_gate_vma(current))) { |
334 | downgrade_write(&mm->mmap_sem); | |
335 | nr_pages = __mlock_vma_pages_range(vma, start, end); | |
336 | ||
337 | up_read(&mm->mmap_sem); | |
338 | /* vma can change or disappear */ | |
339 | down_write(&mm->mmap_sem); | |
340 | vma = find_vma(mm, start); | |
341 | /* non-NULL vma must contain @start, but need to check @end */ | |
342 | if (!vma || end > vma->vm_end) | |
343 | return -EAGAIN; | |
344 | return nr_pages; | |
345 | } | |
b291f000 NP |
346 | |
347 | /* | |
348 | * User mapped kernel pages or huge pages: | |
349 | * make these pages present to populate the ptes, but | |
350 | * fall thru' to reset VM_LOCKED--no need to unlock, and | |
351 | * return nr_pages so these don't get counted against task's | |
352 | * locked limit. huge pages are already counted against | |
353 | * locked vm limit. | |
354 | */ | |
355 | make_pages_present(start, end); | |
356 | ||
357 | no_mlock: | |
358 | vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */ | |
359 | return nr_pages; /* pages NOT mlocked */ | |
360 | } | |
361 | ||
362 | ||
363 | /* | |
364 | * munlock all pages in vma. For munmap() and exit(). | |
365 | */ | |
366 | void munlock_vma_pages_all(struct vm_area_struct *vma) | |
367 | { | |
368 | vma->vm_flags &= ~VM_LOCKED; | |
369 | __munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); | |
370 | } | |
371 | ||
372 | /* | |
373 | * mlock_fixup - handle mlock[all]/munlock[all] requests. | |
374 | * | |
375 | * Filters out "special" vmas -- VM_LOCKED never gets set for these, and | |
376 | * munlock is a no-op. However, for some special vmas, we go ahead and | |
377 | * populate the ptes via make_pages_present(). | |
378 | * | |
379 | * For vmas that pass the filters, merge/split as appropriate. | |
380 | */ | |
1da177e4 LT |
381 | static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, |
382 | unsigned long start, unsigned long end, unsigned int newflags) | |
383 | { | |
b291f000 | 384 | struct mm_struct *mm = vma->vm_mm; |
1da177e4 | 385 | pgoff_t pgoff; |
b291f000 | 386 | int nr_pages; |
1da177e4 | 387 | int ret = 0; |
b291f000 | 388 | int lock = newflags & VM_LOCKED; |
1da177e4 | 389 | |
b291f000 NP |
390 | if (newflags == vma->vm_flags || |
391 | (vma->vm_flags & (VM_IO | VM_PFNMAP))) | |
392 | goto out; /* don't set VM_LOCKED, don't count */ | |
393 | ||
394 | if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | |
395 | is_vm_hugetlb_page(vma) || | |
396 | vma == get_gate_vma(current)) { | |
397 | if (lock) | |
398 | make_pages_present(start, end); | |
399 | goto out; /* don't set VM_LOCKED, don't count */ | |
1da177e4 LT |
400 | } |
401 | ||
402 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | |
403 | *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, | |
404 | vma->vm_file, pgoff, vma_policy(vma)); | |
405 | if (*prev) { | |
406 | vma = *prev; | |
407 | goto success; | |
408 | } | |
409 | ||
1da177e4 LT |
410 | if (start != vma->vm_start) { |
411 | ret = split_vma(mm, vma, start, 1); | |
412 | if (ret) | |
413 | goto out; | |
414 | } | |
415 | ||
416 | if (end != vma->vm_end) { | |
417 | ret = split_vma(mm, vma, end, 0); | |
418 | if (ret) | |
419 | goto out; | |
420 | } | |
421 | ||
422 | success: | |
b291f000 NP |
423 | /* |
424 | * Keep track of amount of locked VM. | |
425 | */ | |
426 | nr_pages = (end - start) >> PAGE_SHIFT; | |
427 | if (!lock) | |
428 | nr_pages = -nr_pages; | |
429 | mm->locked_vm += nr_pages; | |
430 | ||
1da177e4 LT |
431 | /* |
432 | * vm_flags is protected by the mmap_sem held in write mode. | |
433 | * It's okay if try_to_unmap_one unmaps a page just after we | |
b291f000 | 434 | * set VM_LOCKED, __mlock_vma_pages_range will bring it back. |
1da177e4 LT |
435 | */ |
436 | vma->vm_flags = newflags; | |
437 | ||
b291f000 | 438 | if (lock) { |
8edb08ca LS |
439 | /* |
440 | * mmap_sem is currently held for write. Downgrade the write | |
441 | * lock to a read lock so that other faults, mmap scans, ... | |
442 | * while we fault in all pages. | |
443 | */ | |
444 | downgrade_write(&mm->mmap_sem); | |
445 | ||
b291f000 NP |
446 | ret = __mlock_vma_pages_range(vma, start, end); |
447 | if (ret > 0) { | |
448 | mm->locked_vm -= ret; | |
449 | ret = 0; | |
450 | } | |
8edb08ca LS |
451 | /* |
452 | * Need to reacquire mmap sem in write mode, as our callers | |
453 | * expect this. We have no support for atomically upgrading | |
454 | * a sem to write, so we need to check for ranges while sem | |
455 | * is unlocked. | |
456 | */ | |
457 | up_read(&mm->mmap_sem); | |
458 | /* vma can change or disappear */ | |
459 | down_write(&mm->mmap_sem); | |
460 | *prev = find_vma(mm, start); | |
461 | /* non-NULL *prev must contain @start, but need to check @end */ | |
462 | if (!(*prev) || end > (*prev)->vm_end) | |
463 | ret = -EAGAIN; | |
464 | } else { | |
465 | /* | |
466 | * TODO: for unlocking, pages will already be resident, so | |
467 | * we don't need to wait for allocations/reclaim/pagein, ... | |
468 | * However, unlocking a very large region can still take a | |
469 | * while. Should we downgrade the semaphore for both lock | |
470 | * AND unlock ? | |
471 | */ | |
b291f000 | 472 | __munlock_vma_pages_range(vma, start, end); |
8edb08ca | 473 | } |
1da177e4 | 474 | |
1da177e4 | 475 | out: |
b291f000 | 476 | *prev = vma; |
1da177e4 LT |
477 | return ret; |
478 | } | |
479 | ||
480 | static int do_mlock(unsigned long start, size_t len, int on) | |
481 | { | |
482 | unsigned long nstart, end, tmp; | |
483 | struct vm_area_struct * vma, * prev; | |
484 | int error; | |
485 | ||
486 | len = PAGE_ALIGN(len); | |
487 | end = start + len; | |
488 | if (end < start) | |
489 | return -EINVAL; | |
490 | if (end == start) | |
491 | return 0; | |
492 | vma = find_vma_prev(current->mm, start, &prev); | |
493 | if (!vma || vma->vm_start > start) | |
494 | return -ENOMEM; | |
495 | ||
496 | if (start > vma->vm_start) | |
497 | prev = vma; | |
498 | ||
499 | for (nstart = start ; ; ) { | |
500 | unsigned int newflags; | |
501 | ||
502 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ | |
503 | ||
504 | newflags = vma->vm_flags | VM_LOCKED; | |
505 | if (!on) | |
506 | newflags &= ~VM_LOCKED; | |
507 | ||
508 | tmp = vma->vm_end; | |
509 | if (tmp > end) | |
510 | tmp = end; | |
511 | error = mlock_fixup(vma, &prev, nstart, tmp, newflags); | |
512 | if (error) | |
513 | break; | |
514 | nstart = tmp; | |
515 | if (nstart < prev->vm_end) | |
516 | nstart = prev->vm_end; | |
517 | if (nstart >= end) | |
518 | break; | |
519 | ||
520 | vma = prev->vm_next; | |
521 | if (!vma || vma->vm_start != nstart) { | |
522 | error = -ENOMEM; | |
523 | break; | |
524 | } | |
525 | } | |
526 | return error; | |
527 | } | |
528 | ||
529 | asmlinkage long sys_mlock(unsigned long start, size_t len) | |
530 | { | |
531 | unsigned long locked; | |
532 | unsigned long lock_limit; | |
533 | int error = -ENOMEM; | |
534 | ||
535 | if (!can_do_mlock()) | |
536 | return -EPERM; | |
537 | ||
538 | down_write(¤t->mm->mmap_sem); | |
539 | len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); | |
540 | start &= PAGE_MASK; | |
541 | ||
542 | locked = len >> PAGE_SHIFT; | |
543 | locked += current->mm->locked_vm; | |
544 | ||
545 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | |
546 | lock_limit >>= PAGE_SHIFT; | |
547 | ||
548 | /* check against resource limits */ | |
549 | if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) | |
550 | error = do_mlock(start, len, 1); | |
551 | up_write(¤t->mm->mmap_sem); | |
552 | return error; | |
553 | } | |
554 | ||
555 | asmlinkage long sys_munlock(unsigned long start, size_t len) | |
556 | { | |
557 | int ret; | |
558 | ||
559 | down_write(¤t->mm->mmap_sem); | |
560 | len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); | |
561 | start &= PAGE_MASK; | |
562 | ret = do_mlock(start, len, 0); | |
563 | up_write(¤t->mm->mmap_sem); | |
564 | return ret; | |
565 | } | |
566 | ||
567 | static int do_mlockall(int flags) | |
568 | { | |
569 | struct vm_area_struct * vma, * prev = NULL; | |
570 | unsigned int def_flags = 0; | |
571 | ||
572 | if (flags & MCL_FUTURE) | |
573 | def_flags = VM_LOCKED; | |
574 | current->mm->def_flags = def_flags; | |
575 | if (flags == MCL_FUTURE) | |
576 | goto out; | |
577 | ||
578 | for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { | |
579 | unsigned int newflags; | |
580 | ||
581 | newflags = vma->vm_flags | VM_LOCKED; | |
582 | if (!(flags & MCL_CURRENT)) | |
583 | newflags &= ~VM_LOCKED; | |
584 | ||
585 | /* Ignore errors */ | |
586 | mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); | |
587 | } | |
588 | out: | |
589 | return 0; | |
590 | } | |
591 | ||
592 | asmlinkage long sys_mlockall(int flags) | |
593 | { | |
594 | unsigned long lock_limit; | |
595 | int ret = -EINVAL; | |
596 | ||
597 | if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) | |
598 | goto out; | |
599 | ||
600 | ret = -EPERM; | |
601 | if (!can_do_mlock()) | |
602 | goto out; | |
603 | ||
604 | down_write(¤t->mm->mmap_sem); | |
605 | ||
606 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | |
607 | lock_limit >>= PAGE_SHIFT; | |
608 | ||
609 | ret = -ENOMEM; | |
610 | if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || | |
611 | capable(CAP_IPC_LOCK)) | |
612 | ret = do_mlockall(flags); | |
613 | up_write(¤t->mm->mmap_sem); | |
614 | out: | |
615 | return ret; | |
616 | } | |
617 | ||
618 | asmlinkage long sys_munlockall(void) | |
619 | { | |
620 | int ret; | |
621 | ||
622 | down_write(¤t->mm->mmap_sem); | |
623 | ret = do_mlockall(0); | |
624 | up_write(¤t->mm->mmap_sem); | |
625 | return ret; | |
626 | } | |
627 | ||
628 | /* | |
629 | * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB | |
630 | * shm segments) get accounted against the user_struct instead. | |
631 | */ | |
632 | static DEFINE_SPINLOCK(shmlock_user_lock); | |
633 | ||
634 | int user_shm_lock(size_t size, struct user_struct *user) | |
635 | { | |
636 | unsigned long lock_limit, locked; | |
637 | int allowed = 0; | |
638 | ||
639 | locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
640 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | |
5ed44a40 HB |
641 | if (lock_limit == RLIM_INFINITY) |
642 | allowed = 1; | |
1da177e4 LT |
643 | lock_limit >>= PAGE_SHIFT; |
644 | spin_lock(&shmlock_user_lock); | |
5ed44a40 HB |
645 | if (!allowed && |
646 | locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK)) | |
1da177e4 LT |
647 | goto out; |
648 | get_uid(user); | |
649 | user->locked_shm += locked; | |
650 | allowed = 1; | |
651 | out: | |
652 | spin_unlock(&shmlock_user_lock); | |
653 | return allowed; | |
654 | } | |
655 | ||
656 | void user_shm_unlock(size_t size, struct user_struct *user) | |
657 | { | |
658 | spin_lock(&shmlock_user_lock); | |
659 | user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
660 | spin_unlock(&shmlock_user_lock); | |
661 | free_uid(user); | |
662 | } |