]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
6e84f315 IM |
2 | #ifndef _LINUX_SCHED_MM_H |
3 | #define _LINUX_SCHED_MM_H | |
4 | ||
b8d6d80b IM |
5 | #include <linux/kernel.h> |
6 | #include <linux/atomic.h> | |
6e84f315 | 7 | #include <linux/sched.h> |
589ee628 | 8 | #include <linux/mm_types.h> |
fd771233 | 9 | #include <linux/gfp.h> |
70216e18 | 10 | #include <linux/sync_core.h> |
a6cbd440 | 11 | #include <linux/ioasid.h> |
6e84f315 | 12 | |
68e21be2 IM |
13 | /* |
14 | * Routines for handling mm_structs | |
15 | */ | |
d70f2a14 | 16 | extern struct mm_struct *mm_alloc(void); |
68e21be2 IM |
17 | |
18 | /** | |
19 | * mmgrab() - Pin a &struct mm_struct. | |
20 | * @mm: The &struct mm_struct to pin. | |
21 | * | |
22 | * Make sure that @mm will not get freed even after the owning task | |
23 | * exits. This doesn't guarantee that the associated address space | |
24 | * will still exist later on and mmget_not_zero() has to be used before | |
25 | * accessing it. | |
26 | * | |
e0078e2e | 27 | * This is a preferred way to pin @mm for a longer/unbounded amount |
68e21be2 IM |
28 | * of time. |
29 | * | |
30 | * Use mmdrop() to release the reference acquired by mmgrab(). | |
31 | * | |
ee65728e | 32 | * See also <Documentation/mm/active_mm.rst> for an in-depth explanation |
68e21be2 IM |
33 | * of &mm_struct.mm_count vs &mm_struct.mm_users. |
34 | */ | |
35 | static inline void mmgrab(struct mm_struct *mm) | |
36 | { | |
37 | atomic_inc(&mm->mm_count); | |
38 | } | |
39 | ||
223baf9d MD |
40 | static inline void smp_mb__after_mmgrab(void) |
41 | { | |
42 | smp_mb__after_atomic(); | |
43 | } | |
44 | ||
d34bc48f AM |
45 | extern void __mmdrop(struct mm_struct *mm); |
46 | ||
47 | static inline void mmdrop(struct mm_struct *mm) | |
48 | { | |
49 | /* | |
50 | * The implicit full barrier implied by atomic_dec_and_test() is | |
51 | * required by the membarrier system call before returning to | |
52 | * user-space, after storing to rq->curr. | |
53 | */ | |
54 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) | |
55 | __mmdrop(mm); | |
56 | } | |
68e21be2 | 57 | |
8d491de6 TG |
58 | #ifdef CONFIG_PREEMPT_RT |
59 | /* | |
60 | * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is | |
61 | * by far the least expensive way to do that. | |
62 | */ | |
63 | static inline void __mmdrop_delayed(struct rcu_head *rhp) | |
64 | { | |
65 | struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); | |
66 | ||
67 | __mmdrop(mm); | |
68 | } | |
69 | ||
70 | /* | |
71 | * Invoked from finish_task_switch(). Delegates the heavy lifting on RT | |
72 | * kernels via RCU. | |
73 | */ | |
74 | static inline void mmdrop_sched(struct mm_struct *mm) | |
75 | { | |
76 | /* Provides a full memory barrier. See mmdrop() */ | |
77 | if (atomic_dec_and_test(&mm->mm_count)) | |
78 | call_rcu(&mm->delayed_drop, __mmdrop_delayed); | |
79 | } | |
80 | #else | |
81 | static inline void mmdrop_sched(struct mm_struct *mm) | |
82 | { | |
83 | mmdrop(mm); | |
84 | } | |
85 | #endif | |
86 | ||
68e21be2 IM |
87 | /** |
88 | * mmget() - Pin the address space associated with a &struct mm_struct. | |
89 | * @mm: The address space to pin. | |
90 | * | |
91 | * Make sure that the address space of the given &struct mm_struct doesn't | |
92 | * go away. This does not protect against parts of the address space being | |
93 | * modified or freed, however. | |
94 | * | |
95 | * Never use this function to pin this address space for an | |
96 | * unbounded/indefinite amount of time. | |
97 | * | |
98 | * Use mmput() to release the reference acquired by mmget(). | |
99 | * | |
ee65728e | 100 | * See also <Documentation/mm/active_mm.rst> for an in-depth explanation |
68e21be2 IM |
101 | * of &mm_struct.mm_count vs &mm_struct.mm_users. |
102 | */ | |
103 | static inline void mmget(struct mm_struct *mm) | |
104 | { | |
105 | atomic_inc(&mm->mm_users); | |
106 | } | |
107 | ||
108 | static inline bool mmget_not_zero(struct mm_struct *mm) | |
109 | { | |
110 | return atomic_inc_not_zero(&mm->mm_users); | |
111 | } | |
112 | ||
113 | /* mmput gets rid of the mappings and all user-space */ | |
114 | extern void mmput(struct mm_struct *); | |
a1b2289c SY |
115 | #ifdef CONFIG_MMU |
116 | /* same as above but performs the slow path from the async context. Can | |
117 | * be called from the atomic context as well | |
118 | */ | |
119 | void mmput_async(struct mm_struct *); | |
120 | #endif | |
68e21be2 IM |
121 | |
122 | /* Grab a reference to a task's mm, if it is not already going away */ | |
123 | extern struct mm_struct *get_task_mm(struct task_struct *task); | |
124 | /* | |
125 | * Grab a reference to a task's mm, if it is not already going away | |
126 | * and ptrace_may_access with the mode parameter passed to it | |
127 | * succeeds. | |
128 | */ | |
129 | extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); | |
4610ba7a TG |
130 | /* Remove the current tasks stale references to the old mm_struct on exit() */ |
131 | extern void exit_mm_release(struct task_struct *, struct mm_struct *); | |
132 | /* Remove the current tasks stale references to the old mm_struct on exec() */ | |
133 | extern void exec_mm_release(struct task_struct *, struct mm_struct *); | |
68e21be2 | 134 | |
4240c8bf IM |
135 | #ifdef CONFIG_MEMCG |
136 | extern void mm_update_next_owner(struct mm_struct *mm); | |
137 | #else | |
138 | static inline void mm_update_next_owner(struct mm_struct *mm) | |
139 | { | |
140 | } | |
141 | #endif /* CONFIG_MEMCG */ | |
142 | ||
143 | #ifdef CONFIG_MMU | |
5f24d5a5 | 144 | #ifndef arch_get_mmap_end |
2cb4de08 | 145 | #define arch_get_mmap_end(addr, len, flags) (TASK_SIZE) |
5f24d5a5 CL |
146 | #endif |
147 | ||
148 | #ifndef arch_get_mmap_base | |
149 | #define arch_get_mmap_base(addr, base) (base) | |
150 | #endif | |
151 | ||
8f2af155 KC |
152 | extern void arch_pick_mmap_layout(struct mm_struct *mm, |
153 | struct rlimit *rlim_stack); | |
4240c8bf IM |
154 | extern unsigned long |
155 | arch_get_unmapped_area(struct file *, unsigned long, unsigned long, | |
156 | unsigned long, unsigned long); | |
157 | extern unsigned long | |
158 | arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, | |
159 | unsigned long len, unsigned long pgoff, | |
160 | unsigned long flags); | |
4b439e25 CL |
161 | |
162 | unsigned long | |
163 | generic_get_unmapped_area(struct file *filp, unsigned long addr, | |
164 | unsigned long len, unsigned long pgoff, | |
165 | unsigned long flags); | |
166 | unsigned long | |
167 | generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, | |
168 | unsigned long len, unsigned long pgoff, | |
169 | unsigned long flags); | |
4240c8bf | 170 | #else |
8f2af155 KC |
171 | static inline void arch_pick_mmap_layout(struct mm_struct *mm, |
172 | struct rlimit *rlim_stack) {} | |
4240c8bf IM |
173 | #endif |
174 | ||
d026ce79 IM |
175 | static inline bool in_vfork(struct task_struct *tsk) |
176 | { | |
177 | bool ret; | |
178 | ||
179 | /* | |
180 | * need RCU to access ->real_parent if CLONE_VM was used along with | |
181 | * CLONE_PARENT. | |
182 | * | |
183 | * We check real_parent->mm == tsk->mm because CLONE_VFORK does not | |
184 | * imply CLONE_VM | |
185 | * | |
186 | * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus | |
187 | * ->real_parent is not necessarily the task doing vfork(), so in | |
188 | * theory we can't rely on task_lock() if we want to dereference it. | |
189 | * | |
190 | * And in this case we can't trust the real_parent->mm == tsk->mm | |
191 | * check, it can be false negative. But we do not care, if init or | |
192 | * another oom-unkillable task does this it should blame itself. | |
193 | */ | |
194 | rcu_read_lock(); | |
149fc787 MWO |
195 | ret = tsk->vfork_done && |
196 | rcu_dereference(tsk->real_parent)->mm == tsk->mm; | |
d026ce79 IM |
197 | rcu_read_unlock(); |
198 | ||
199 | return ret; | |
200 | } | |
201 | ||
7dea19f9 MH |
202 | /* |
203 | * Applies per-task gfp context to the given allocation flags. | |
204 | * PF_MEMALLOC_NOIO implies GFP_NOIO | |
205 | * PF_MEMALLOC_NOFS implies GFP_NOFS | |
8e3560d9 | 206 | * PF_MEMALLOC_PIN implies !GFP_MOVABLE |
74444eda | 207 | */ |
7dea19f9 | 208 | static inline gfp_t current_gfp_context(gfp_t flags) |
74444eda | 209 | { |
af161bee WL |
210 | unsigned int pflags = READ_ONCE(current->flags); |
211 | ||
8e3560d9 | 212 | if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { |
d7fefcc8 AK |
213 | /* |
214 | * NOIO implies both NOIO and NOFS and it is a weaker context | |
215 | * so always make sure it makes precedence | |
216 | */ | |
af161bee | 217 | if (pflags & PF_MEMALLOC_NOIO) |
d7fefcc8 | 218 | flags &= ~(__GFP_IO | __GFP_FS); |
af161bee | 219 | else if (pflags & PF_MEMALLOC_NOFS) |
d7fefcc8 | 220 | flags &= ~__GFP_FS; |
8e3560d9 PT |
221 | |
222 | if (pflags & PF_MEMALLOC_PIN) | |
223 | flags &= ~__GFP_MOVABLE; | |
d7fefcc8 | 224 | } |
74444eda IM |
225 | return flags; |
226 | } | |
227 | ||
d92a8cfc | 228 | #ifdef CONFIG_LOCKDEP |
4f3eaf45 MWO |
229 | extern void __fs_reclaim_acquire(unsigned long ip); |
230 | extern void __fs_reclaim_release(unsigned long ip); | |
d92a8cfc PZ |
231 | extern void fs_reclaim_acquire(gfp_t gfp_mask); |
232 | extern void fs_reclaim_release(gfp_t gfp_mask); | |
233 | #else | |
4f3eaf45 MWO |
234 | static inline void __fs_reclaim_acquire(unsigned long ip) { } |
235 | static inline void __fs_reclaim_release(unsigned long ip) { } | |
d92a8cfc PZ |
236 | static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } |
237 | static inline void fs_reclaim_release(gfp_t gfp_mask) { } | |
238 | #endif | |
239 | ||
4034247a N |
240 | /* Any memory-allocation retry loop should use |
241 | * memalloc_retry_wait(), and pass the flags for the most | |
242 | * constrained allocation attempt that might have failed. | |
243 | * This provides useful documentation of where loops are, | |
244 | * and a central place to fine tune the waiting as the MM | |
245 | * implementation changes. | |
246 | */ | |
247 | static inline void memalloc_retry_wait(gfp_t gfp_flags) | |
248 | { | |
249 | /* We use io_schedule_timeout because waiting for memory | |
250 | * typically included waiting for dirty pages to be | |
251 | * written out, which requires IO. | |
252 | */ | |
253 | __set_current_state(TASK_UNINTERRUPTIBLE); | |
254 | gfp_flags = current_gfp_context(gfp_flags); | |
255 | if (gfpflags_allow_blocking(gfp_flags) && | |
256 | !(gfp_flags & __GFP_NORETRY)) | |
257 | /* Probably waited already, no need for much more */ | |
258 | io_schedule_timeout(1); | |
259 | else | |
260 | /* Probably didn't wait, and has now released a lock, | |
261 | * so now is a good time to wait | |
262 | */ | |
263 | io_schedule_timeout(HZ/50); | |
264 | } | |
265 | ||
95d6c701 SV |
266 | /** |
267 | * might_alloc - Mark possible allocation sites | |
268 | * @gfp_mask: gfp_t flags that would be used to allocate | |
269 | * | |
270 | * Similar to might_sleep() and other annotations, this can be used in functions | |
271 | * that might allocate, but often don't. Compiles to nothing without | |
272 | * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking. | |
273 | */ | |
274 | static inline void might_alloc(gfp_t gfp_mask) | |
275 | { | |
276 | fs_reclaim_acquire(gfp_mask); | |
277 | fs_reclaim_release(gfp_mask); | |
278 | ||
279 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | |
280 | } | |
281 | ||
46ca3599 MH |
282 | /** |
283 | * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. | |
284 | * | |
285 | * This functions marks the beginning of the GFP_NOIO allocation scope. | |
286 | * All further allocations will implicitly drop __GFP_IO flag and so | |
287 | * they are safe for the IO critical section from the allocation recursion | |
288 | * point of view. Use memalloc_noio_restore to end the scope with flags | |
289 | * returned by this function. | |
290 | * | |
291 | * This function is safe to be used from any context. | |
292 | */ | |
74444eda IM |
293 | static inline unsigned int memalloc_noio_save(void) |
294 | { | |
295 | unsigned int flags = current->flags & PF_MEMALLOC_NOIO; | |
296 | current->flags |= PF_MEMALLOC_NOIO; | |
297 | return flags; | |
298 | } | |
299 | ||
46ca3599 MH |
300 | /** |
301 | * memalloc_noio_restore - Ends the implicit GFP_NOIO scope. | |
302 | * @flags: Flags to restore. | |
303 | * | |
304 | * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. | |
e0078e2e | 305 | * Always make sure that the given flags is the return value from the |
46ca3599 MH |
306 | * pairing memalloc_noio_save call. |
307 | */ | |
74444eda IM |
308 | static inline void memalloc_noio_restore(unsigned int flags) |
309 | { | |
310 | current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; | |
311 | } | |
312 | ||
46ca3599 MH |
313 | /** |
314 | * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope. | |
315 | * | |
316 | * This functions marks the beginning of the GFP_NOFS allocation scope. | |
317 | * All further allocations will implicitly drop __GFP_FS flag and so | |
318 | * they are safe for the FS critical section from the allocation recursion | |
319 | * point of view. Use memalloc_nofs_restore to end the scope with flags | |
320 | * returned by this function. | |
321 | * | |
322 | * This function is safe to be used from any context. | |
323 | */ | |
7dea19f9 MH |
324 | static inline unsigned int memalloc_nofs_save(void) |
325 | { | |
326 | unsigned int flags = current->flags & PF_MEMALLOC_NOFS; | |
327 | current->flags |= PF_MEMALLOC_NOFS; | |
328 | return flags; | |
329 | } | |
330 | ||
46ca3599 MH |
331 | /** |
332 | * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope. | |
333 | * @flags: Flags to restore. | |
334 | * | |
335 | * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. | |
e0078e2e | 336 | * Always make sure that the given flags is the return value from the |
46ca3599 MH |
337 | * pairing memalloc_nofs_save call. |
338 | */ | |
7dea19f9 MH |
339 | static inline void memalloc_nofs_restore(unsigned int flags) |
340 | { | |
341 | current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; | |
342 | } | |
343 | ||
499118e9 VB |
344 | static inline unsigned int memalloc_noreclaim_save(void) |
345 | { | |
346 | unsigned int flags = current->flags & PF_MEMALLOC; | |
347 | current->flags |= PF_MEMALLOC; | |
348 | return flags; | |
349 | } | |
350 | ||
351 | static inline void memalloc_noreclaim_restore(unsigned int flags) | |
352 | { | |
353 | current->flags = (current->flags & ~PF_MEMALLOC) | flags; | |
354 | } | |
355 | ||
1a08ae36 | 356 | static inline unsigned int memalloc_pin_save(void) |
d7fefcc8 | 357 | { |
1a08ae36 | 358 | unsigned int flags = current->flags & PF_MEMALLOC_PIN; |
d7fefcc8 | 359 | |
1a08ae36 | 360 | current->flags |= PF_MEMALLOC_PIN; |
d7fefcc8 AK |
361 | return flags; |
362 | } | |
363 | ||
1a08ae36 | 364 | static inline void memalloc_pin_restore(unsigned int flags) |
d7fefcc8 | 365 | { |
1a08ae36 | 366 | current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; |
d7fefcc8 | 367 | } |
d7fefcc8 | 368 | |
d46eb14b | 369 | #ifdef CONFIG_MEMCG |
37d5985c | 370 | DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg); |
d46eb14b | 371 | /** |
b87d8cef | 372 | * set_active_memcg - Starts the remote memcg charging scope. |
d46eb14b SB |
373 | * @memcg: memcg to charge. |
374 | * | |
375 | * This function marks the beginning of the remote memcg charging scope. All the | |
376 | * __GFP_ACCOUNT allocations till the end of the scope will be charged to the | |
377 | * given memcg. | |
378 | * | |
b87d8cef RG |
379 | * NOTE: This function can nest. Users must save the return value and |
380 | * reset the previous value after their own charging scope is over. | |
d46eb14b | 381 | */ |
b87d8cef RG |
382 | static inline struct mem_cgroup * |
383 | set_active_memcg(struct mem_cgroup *memcg) | |
d46eb14b | 384 | { |
37d5985c RG |
385 | struct mem_cgroup *old; |
386 | ||
55a68c82 | 387 | if (!in_task()) { |
37d5985c RG |
388 | old = this_cpu_read(int_active_memcg); |
389 | this_cpu_write(int_active_memcg, memcg); | |
390 | } else { | |
391 | old = current->active_memcg; | |
392 | current->active_memcg = memcg; | |
393 | } | |
394 | ||
b87d8cef | 395 | return old; |
d46eb14b SB |
396 | } |
397 | #else | |
b87d8cef RG |
398 | static inline struct mem_cgroup * |
399 | set_active_memcg(struct mem_cgroup *memcg) | |
d46eb14b | 400 | { |
b87d8cef | 401 | return NULL; |
d46eb14b SB |
402 | } |
403 | #endif | |
404 | ||
a961e409 MD |
405 | #ifdef CONFIG_MEMBARRIER |
406 | enum { | |
c5f58bd5 MD |
407 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), |
408 | MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), | |
409 | MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), | |
410 | MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), | |
70216e18 MD |
411 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), |
412 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), | |
2a36ab71 PO |
413 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6), |
414 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7), | |
70216e18 MD |
415 | }; |
416 | ||
417 | enum { | |
418 | MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), | |
2a36ab71 | 419 | MEMBARRIER_FLAG_RSEQ = (1U << 1), |
a961e409 MD |
420 | }; |
421 | ||
3ccfebed MD |
422 | #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS |
423 | #include <asm/membarrier.h> | |
424 | #endif | |
425 | ||
70216e18 MD |
426 | static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) |
427 | { | |
2840cf02 MD |
428 | if (current->mm != mm) |
429 | return; | |
70216e18 MD |
430 | if (likely(!(atomic_read(&mm->membarrier_state) & |
431 | MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) | |
432 | return; | |
433 | sync_core_before_usermode(); | |
434 | } | |
435 | ||
227a4aad MD |
436 | extern void membarrier_exec_mmap(struct mm_struct *mm); |
437 | ||
5bc78502 MD |
438 | extern void membarrier_update_current_mm(struct mm_struct *next_mm); |
439 | ||
a961e409 | 440 | #else |
3ccfebed MD |
441 | #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS |
442 | static inline void membarrier_arch_switch_mm(struct mm_struct *prev, | |
443 | struct mm_struct *next, | |
444 | struct task_struct *tsk) | |
445 | { | |
446 | } | |
447 | #endif | |
227a4aad | 448 | static inline void membarrier_exec_mmap(struct mm_struct *mm) |
a961e409 MD |
449 | { |
450 | } | |
70216e18 MD |
451 | static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) |
452 | { | |
453 | } | |
5bc78502 MD |
454 | static inline void membarrier_update_current_mm(struct mm_struct *next_mm) |
455 | { | |
456 | } | |
a961e409 MD |
457 | #endif |
458 | ||
a6cbd440 FY |
459 | #ifdef CONFIG_IOMMU_SVA |
460 | static inline void mm_pasid_init(struct mm_struct *mm) | |
461 | { | |
462 | mm->pasid = INVALID_IOASID; | |
463 | } | |
701fac40 FY |
464 | |
465 | /* Associate a PASID with an mm_struct: */ | |
466 | static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) | |
467 | { | |
468 | mm->pasid = pasid; | |
469 | } | |
470 | ||
471 | static inline void mm_pasid_drop(struct mm_struct *mm) | |
472 | { | |
473 | if (pasid_valid(mm->pasid)) { | |
474 | ioasid_free(mm->pasid); | |
475 | mm->pasid = INVALID_IOASID; | |
476 | } | |
477 | } | |
a6cbd440 FY |
478 | #else |
479 | static inline void mm_pasid_init(struct mm_struct *mm) {} | |
701fac40 FY |
480 | static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} |
481 | static inline void mm_pasid_drop(struct mm_struct *mm) {} | |
a6cbd440 FY |
482 | #endif |
483 | ||
6e84f315 | 484 | #endif /* _LINUX_SCHED_MM_H */ |