]>
Commit | Line | Data |
---|---|---|
a38e4082 DC |
1 | /* |
2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. | |
3 | * Authors: David Chinner and Glauber Costa | |
4 | * | |
5 | * Generic LRU infrastructure | |
6 | */ | |
7 | #include <linux/kernel.h> | |
8 | #include <linux/module.h> | |
3b1d58a4 | 9 | #include <linux/mm.h> |
a38e4082 | 10 | #include <linux/list_lru.h> |
5ca302c8 | 11 | #include <linux/slab.h> |
c0a5b560 | 12 | #include <linux/mutex.h> |
60d3fd32 | 13 | #include <linux/memcontrol.h> |
c0a5b560 VD |
14 | |
15 | #ifdef CONFIG_MEMCG_KMEM | |
16 | static LIST_HEAD(list_lrus); | |
17 | static DEFINE_MUTEX(list_lrus_mutex); | |
18 | ||
19 | static void list_lru_register(struct list_lru *lru) | |
20 | { | |
21 | mutex_lock(&list_lrus_mutex); | |
22 | list_add(&lru->list, &list_lrus); | |
23 | mutex_unlock(&list_lrus_mutex); | |
24 | } | |
25 | ||
26 | static void list_lru_unregister(struct list_lru *lru) | |
27 | { | |
28 | mutex_lock(&list_lrus_mutex); | |
29 | list_del(&lru->list); | |
30 | mutex_unlock(&list_lrus_mutex); | |
31 | } | |
32 | #else | |
33 | static void list_lru_register(struct list_lru *lru) | |
34 | { | |
35 | } | |
36 | ||
37 | static void list_lru_unregister(struct list_lru *lru) | |
38 | { | |
39 | } | |
40 | #endif /* CONFIG_MEMCG_KMEM */ | |
a38e4082 | 41 | |
60d3fd32 VD |
42 | #ifdef CONFIG_MEMCG_KMEM |
43 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | |
44 | { | |
45 | return !!lru->node[0].memcg_lrus; | |
46 | } | |
47 | ||
48 | static inline struct list_lru_one * | |
49 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | |
50 | { | |
51 | /* | |
52 | * The lock protects the array of per cgroup lists from relocation | |
53 | * (see memcg_update_list_lru_node). | |
54 | */ | |
55 | lockdep_assert_held(&nlru->lock); | |
56 | if (nlru->memcg_lrus && idx >= 0) | |
57 | return nlru->memcg_lrus->lru[idx]; | |
58 | ||
59 | return &nlru->lru; | |
60 | } | |
61 | ||
62 | static inline struct list_lru_one * | |
63 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | |
64 | { | |
65 | struct mem_cgroup *memcg; | |
66 | ||
67 | if (!nlru->memcg_lrus) | |
68 | return &nlru->lru; | |
69 | ||
70 | memcg = mem_cgroup_from_kmem(ptr); | |
71 | if (!memcg) | |
72 | return &nlru->lru; | |
73 | ||
74 | return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); | |
75 | } | |
76 | #else | |
77 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | |
78 | { | |
79 | return false; | |
80 | } | |
81 | ||
82 | static inline struct list_lru_one * | |
83 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | |
84 | { | |
85 | return &nlru->lru; | |
86 | } | |
87 | ||
88 | static inline struct list_lru_one * | |
89 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | |
90 | { | |
91 | return &nlru->lru; | |
92 | } | |
93 | #endif /* CONFIG_MEMCG_KMEM */ | |
94 | ||
a38e4082 DC |
95 | bool list_lru_add(struct list_lru *lru, struct list_head *item) |
96 | { | |
3b1d58a4 DC |
97 | int nid = page_to_nid(virt_to_page(item)); |
98 | struct list_lru_node *nlru = &lru->node[nid]; | |
60d3fd32 | 99 | struct list_lru_one *l; |
3b1d58a4 DC |
100 | |
101 | spin_lock(&nlru->lock); | |
60d3fd32 | 102 | l = list_lru_from_kmem(nlru, item); |
a38e4082 | 103 | if (list_empty(item)) { |
60d3fd32 VD |
104 | list_add_tail(item, &l->list); |
105 | l->nr_items++; | |
3b1d58a4 | 106 | spin_unlock(&nlru->lock); |
a38e4082 DC |
107 | return true; |
108 | } | |
3b1d58a4 | 109 | spin_unlock(&nlru->lock); |
a38e4082 DC |
110 | return false; |
111 | } | |
112 | EXPORT_SYMBOL_GPL(list_lru_add); | |
113 | ||
114 | bool list_lru_del(struct list_lru *lru, struct list_head *item) | |
115 | { | |
3b1d58a4 DC |
116 | int nid = page_to_nid(virt_to_page(item)); |
117 | struct list_lru_node *nlru = &lru->node[nid]; | |
60d3fd32 | 118 | struct list_lru_one *l; |
3b1d58a4 DC |
119 | |
120 | spin_lock(&nlru->lock); | |
60d3fd32 | 121 | l = list_lru_from_kmem(nlru, item); |
a38e4082 DC |
122 | if (!list_empty(item)) { |
123 | list_del_init(item); | |
60d3fd32 | 124 | l->nr_items--; |
3b1d58a4 | 125 | spin_unlock(&nlru->lock); |
a38e4082 DC |
126 | return true; |
127 | } | |
3b1d58a4 | 128 | spin_unlock(&nlru->lock); |
a38e4082 DC |
129 | return false; |
130 | } | |
131 | EXPORT_SYMBOL_GPL(list_lru_del); | |
132 | ||
3f97b163 VD |
133 | void list_lru_isolate(struct list_lru_one *list, struct list_head *item) |
134 | { | |
135 | list_del_init(item); | |
136 | list->nr_items--; | |
137 | } | |
138 | EXPORT_SYMBOL_GPL(list_lru_isolate); | |
139 | ||
140 | void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, | |
141 | struct list_head *head) | |
142 | { | |
143 | list_move(item, head); | |
144 | list->nr_items--; | |
145 | } | |
146 | EXPORT_SYMBOL_GPL(list_lru_isolate_move); | |
147 | ||
60d3fd32 VD |
148 | static unsigned long __list_lru_count_one(struct list_lru *lru, |
149 | int nid, int memcg_idx) | |
a38e4082 | 150 | { |
6a4f496f | 151 | struct list_lru_node *nlru = &lru->node[nid]; |
60d3fd32 VD |
152 | struct list_lru_one *l; |
153 | unsigned long count; | |
3b1d58a4 | 154 | |
6a4f496f | 155 | spin_lock(&nlru->lock); |
60d3fd32 | 156 | l = list_lru_from_memcg_idx(nlru, memcg_idx); |
60d3fd32 | 157 | count = l->nr_items; |
6a4f496f | 158 | spin_unlock(&nlru->lock); |
3b1d58a4 DC |
159 | |
160 | return count; | |
161 | } | |
60d3fd32 VD |
162 | |
163 | unsigned long list_lru_count_one(struct list_lru *lru, | |
164 | int nid, struct mem_cgroup *memcg) | |
165 | { | |
166 | return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); | |
167 | } | |
168 | EXPORT_SYMBOL_GPL(list_lru_count_one); | |
169 | ||
170 | unsigned long list_lru_count_node(struct list_lru *lru, int nid) | |
171 | { | |
172 | long count = 0; | |
173 | int memcg_idx; | |
174 | ||
175 | count += __list_lru_count_one(lru, nid, -1); | |
176 | if (list_lru_memcg_aware(lru)) { | |
177 | for_each_memcg_cache_index(memcg_idx) | |
178 | count += __list_lru_count_one(lru, nid, memcg_idx); | |
179 | } | |
180 | return count; | |
181 | } | |
6a4f496f | 182 | EXPORT_SYMBOL_GPL(list_lru_count_node); |
3b1d58a4 | 183 | |
60d3fd32 VD |
184 | static unsigned long |
185 | __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, | |
186 | list_lru_walk_cb isolate, void *cb_arg, | |
187 | unsigned long *nr_to_walk) | |
3b1d58a4 DC |
188 | { |
189 | ||
60d3fd32 VD |
190 | struct list_lru_node *nlru = &lru->node[nid]; |
191 | struct list_lru_one *l; | |
a38e4082 | 192 | struct list_head *item, *n; |
3b1d58a4 | 193 | unsigned long isolated = 0; |
a38e4082 | 194 | |
3b1d58a4 | 195 | spin_lock(&nlru->lock); |
60d3fd32 | 196 | l = list_lru_from_memcg_idx(nlru, memcg_idx); |
a38e4082 | 197 | restart: |
60d3fd32 | 198 | list_for_each_safe(item, n, &l->list) { |
a38e4082 | 199 | enum lru_status ret; |
5cedf721 DC |
200 | |
201 | /* | |
202 | * decrement nr_to_walk first so that we don't livelock if we | |
203 | * get stuck on large numbesr of LRU_RETRY items | |
204 | */ | |
c56b097a | 205 | if (!*nr_to_walk) |
5cedf721 | 206 | break; |
c56b097a | 207 | --*nr_to_walk; |
5cedf721 | 208 | |
3f97b163 | 209 | ret = isolate(item, l, &nlru->lock, cb_arg); |
a38e4082 | 210 | switch (ret) { |
449dd698 JW |
211 | case LRU_REMOVED_RETRY: |
212 | assert_spin_locked(&nlru->lock); | |
a38e4082 | 213 | case LRU_REMOVED: |
3b1d58a4 | 214 | isolated++; |
449dd698 JW |
215 | /* |
216 | * If the lru lock has been dropped, our list | |
217 | * traversal is now invalid and so we have to | |
218 | * restart from scratch. | |
219 | */ | |
220 | if (ret == LRU_REMOVED_RETRY) | |
221 | goto restart; | |
a38e4082 DC |
222 | break; |
223 | case LRU_ROTATE: | |
60d3fd32 | 224 | list_move_tail(item, &l->list); |
a38e4082 DC |
225 | break; |
226 | case LRU_SKIP: | |
227 | break; | |
228 | case LRU_RETRY: | |
5cedf721 DC |
229 | /* |
230 | * The lru lock has been dropped, our list traversal is | |
231 | * now invalid and so we have to restart from scratch. | |
232 | */ | |
449dd698 | 233 | assert_spin_locked(&nlru->lock); |
a38e4082 DC |
234 | goto restart; |
235 | default: | |
236 | BUG(); | |
237 | } | |
a38e4082 | 238 | } |
3b1d58a4 DC |
239 | |
240 | spin_unlock(&nlru->lock); | |
241 | return isolated; | |
242 | } | |
60d3fd32 VD |
243 | |
244 | unsigned long | |
245 | list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, | |
246 | list_lru_walk_cb isolate, void *cb_arg, | |
247 | unsigned long *nr_to_walk) | |
248 | { | |
249 | return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), | |
250 | isolate, cb_arg, nr_to_walk); | |
251 | } | |
252 | EXPORT_SYMBOL_GPL(list_lru_walk_one); | |
253 | ||
254 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | |
255 | list_lru_walk_cb isolate, void *cb_arg, | |
256 | unsigned long *nr_to_walk) | |
257 | { | |
258 | long isolated = 0; | |
259 | int memcg_idx; | |
260 | ||
261 | isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, | |
262 | nr_to_walk); | |
263 | if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { | |
264 | for_each_memcg_cache_index(memcg_idx) { | |
265 | isolated += __list_lru_walk_one(lru, nid, memcg_idx, | |
266 | isolate, cb_arg, nr_to_walk); | |
267 | if (*nr_to_walk <= 0) | |
268 | break; | |
269 | } | |
270 | } | |
271 | return isolated; | |
272 | } | |
3b1d58a4 DC |
273 | EXPORT_SYMBOL_GPL(list_lru_walk_node); |
274 | ||
60d3fd32 VD |
275 | static void init_one_lru(struct list_lru_one *l) |
276 | { | |
277 | INIT_LIST_HEAD(&l->list); | |
278 | l->nr_items = 0; | |
279 | } | |
280 | ||
281 | #ifdef CONFIG_MEMCG_KMEM | |
282 | static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, | |
283 | int begin, int end) | |
284 | { | |
285 | int i; | |
286 | ||
287 | for (i = begin; i < end; i++) | |
288 | kfree(memcg_lrus->lru[i]); | |
289 | } | |
290 | ||
291 | static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, | |
292 | int begin, int end) | |
293 | { | |
294 | int i; | |
295 | ||
296 | for (i = begin; i < end; i++) { | |
297 | struct list_lru_one *l; | |
298 | ||
299 | l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); | |
300 | if (!l) | |
301 | goto fail; | |
302 | ||
303 | init_one_lru(l); | |
304 | memcg_lrus->lru[i] = l; | |
305 | } | |
306 | return 0; | |
307 | fail: | |
308 | __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); | |
309 | return -ENOMEM; | |
310 | } | |
311 | ||
312 | static int memcg_init_list_lru_node(struct list_lru_node *nlru) | |
313 | { | |
314 | int size = memcg_nr_cache_ids; | |
315 | ||
316 | nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); | |
317 | if (!nlru->memcg_lrus) | |
318 | return -ENOMEM; | |
319 | ||
320 | if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { | |
321 | kfree(nlru->memcg_lrus); | |
322 | return -ENOMEM; | |
323 | } | |
324 | ||
325 | return 0; | |
326 | } | |
327 | ||
328 | static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) | |
329 | { | |
330 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); | |
331 | kfree(nlru->memcg_lrus); | |
332 | } | |
333 | ||
334 | static int memcg_update_list_lru_node(struct list_lru_node *nlru, | |
335 | int old_size, int new_size) | |
336 | { | |
337 | struct list_lru_memcg *old, *new; | |
338 | ||
339 | BUG_ON(old_size > new_size); | |
340 | ||
341 | old = nlru->memcg_lrus; | |
342 | new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); | |
343 | if (!new) | |
344 | return -ENOMEM; | |
345 | ||
346 | if (__memcg_init_list_lru_node(new, old_size, new_size)) { | |
347 | kfree(new); | |
348 | return -ENOMEM; | |
349 | } | |
350 | ||
351 | memcpy(new, old, old_size * sizeof(void *)); | |
352 | ||
353 | /* | |
354 | * The lock guarantees that we won't race with a reader | |
355 | * (see list_lru_from_memcg_idx). | |
356 | * | |
357 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, | |
358 | * we have to use IRQ-safe primitives here to avoid deadlock. | |
359 | */ | |
360 | spin_lock_irq(&nlru->lock); | |
361 | nlru->memcg_lrus = new; | |
362 | spin_unlock_irq(&nlru->lock); | |
363 | ||
364 | kfree(old); | |
365 | return 0; | |
366 | } | |
367 | ||
368 | static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, | |
369 | int old_size, int new_size) | |
370 | { | |
371 | /* do not bother shrinking the array back to the old size, because we | |
372 | * cannot handle allocation failures here */ | |
373 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); | |
374 | } | |
375 | ||
376 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | |
377 | { | |
378 | int i; | |
379 | ||
380 | for (i = 0; i < nr_node_ids; i++) { | |
381 | if (!memcg_aware) | |
382 | lru->node[i].memcg_lrus = NULL; | |
383 | else if (memcg_init_list_lru_node(&lru->node[i])) | |
384 | goto fail; | |
385 | } | |
386 | return 0; | |
387 | fail: | |
388 | for (i = i - 1; i >= 0; i--) | |
389 | memcg_destroy_list_lru_node(&lru->node[i]); | |
390 | return -ENOMEM; | |
391 | } | |
392 | ||
393 | static void memcg_destroy_list_lru(struct list_lru *lru) | |
394 | { | |
395 | int i; | |
396 | ||
397 | if (!list_lru_memcg_aware(lru)) | |
398 | return; | |
399 | ||
400 | for (i = 0; i < nr_node_ids; i++) | |
401 | memcg_destroy_list_lru_node(&lru->node[i]); | |
402 | } | |
403 | ||
404 | static int memcg_update_list_lru(struct list_lru *lru, | |
405 | int old_size, int new_size) | |
406 | { | |
407 | int i; | |
408 | ||
409 | if (!list_lru_memcg_aware(lru)) | |
410 | return 0; | |
411 | ||
412 | for (i = 0; i < nr_node_ids; i++) { | |
413 | if (memcg_update_list_lru_node(&lru->node[i], | |
414 | old_size, new_size)) | |
415 | goto fail; | |
416 | } | |
417 | return 0; | |
418 | fail: | |
419 | for (i = i - 1; i >= 0; i--) | |
420 | memcg_cancel_update_list_lru_node(&lru->node[i], | |
421 | old_size, new_size); | |
422 | return -ENOMEM; | |
423 | } | |
424 | ||
425 | static void memcg_cancel_update_list_lru(struct list_lru *lru, | |
426 | int old_size, int new_size) | |
427 | { | |
428 | int i; | |
429 | ||
430 | if (!list_lru_memcg_aware(lru)) | |
431 | return; | |
432 | ||
433 | for (i = 0; i < nr_node_ids; i++) | |
434 | memcg_cancel_update_list_lru_node(&lru->node[i], | |
435 | old_size, new_size); | |
436 | } | |
437 | ||
438 | int memcg_update_all_list_lrus(int new_size) | |
439 | { | |
440 | int ret = 0; | |
441 | struct list_lru *lru; | |
442 | int old_size = memcg_nr_cache_ids; | |
443 | ||
444 | mutex_lock(&list_lrus_mutex); | |
445 | list_for_each_entry(lru, &list_lrus, list) { | |
446 | ret = memcg_update_list_lru(lru, old_size, new_size); | |
447 | if (ret) | |
448 | goto fail; | |
449 | } | |
450 | out: | |
451 | mutex_unlock(&list_lrus_mutex); | |
452 | return ret; | |
453 | fail: | |
454 | list_for_each_entry_continue_reverse(lru, &list_lrus, list) | |
455 | memcg_cancel_update_list_lru(lru, old_size, new_size); | |
456 | goto out; | |
457 | } | |
2788cf0c VD |
458 | |
459 | static void memcg_drain_list_lru_node(struct list_lru_node *nlru, | |
460 | int src_idx, int dst_idx) | |
461 | { | |
462 | struct list_lru_one *src, *dst; | |
463 | ||
464 | /* | |
465 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, | |
466 | * we have to use IRQ-safe primitives here to avoid deadlock. | |
467 | */ | |
468 | spin_lock_irq(&nlru->lock); | |
469 | ||
470 | src = list_lru_from_memcg_idx(nlru, src_idx); | |
471 | dst = list_lru_from_memcg_idx(nlru, dst_idx); | |
472 | ||
473 | list_splice_init(&src->list, &dst->list); | |
474 | dst->nr_items += src->nr_items; | |
475 | src->nr_items = 0; | |
476 | ||
477 | spin_unlock_irq(&nlru->lock); | |
478 | } | |
479 | ||
480 | static void memcg_drain_list_lru(struct list_lru *lru, | |
481 | int src_idx, int dst_idx) | |
482 | { | |
483 | int i; | |
484 | ||
485 | if (!list_lru_memcg_aware(lru)) | |
486 | return; | |
487 | ||
488 | for (i = 0; i < nr_node_ids; i++) | |
489 | memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx); | |
490 | } | |
491 | ||
492 | void memcg_drain_all_list_lrus(int src_idx, int dst_idx) | |
493 | { | |
494 | struct list_lru *lru; | |
495 | ||
496 | mutex_lock(&list_lrus_mutex); | |
497 | list_for_each_entry(lru, &list_lrus, list) | |
498 | memcg_drain_list_lru(lru, src_idx, dst_idx); | |
499 | mutex_unlock(&list_lrus_mutex); | |
500 | } | |
60d3fd32 VD |
501 | #else |
502 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | |
503 | { | |
504 | return 0; | |
505 | } | |
506 | ||
507 | static void memcg_destroy_list_lru(struct list_lru *lru) | |
508 | { | |
509 | } | |
510 | #endif /* CONFIG_MEMCG_KMEM */ | |
511 | ||
512 | int __list_lru_init(struct list_lru *lru, bool memcg_aware, | |
513 | struct lock_class_key *key) | |
a38e4082 | 514 | { |
3b1d58a4 | 515 | int i; |
5ca302c8 | 516 | size_t size = sizeof(*lru->node) * nr_node_ids; |
60d3fd32 VD |
517 | int err = -ENOMEM; |
518 | ||
519 | memcg_get_cache_ids(); | |
5ca302c8 GC |
520 | |
521 | lru->node = kzalloc(size, GFP_KERNEL); | |
522 | if (!lru->node) | |
60d3fd32 | 523 | goto out; |
a38e4082 | 524 | |
5ca302c8 | 525 | for (i = 0; i < nr_node_ids; i++) { |
3b1d58a4 | 526 | spin_lock_init(&lru->node[i].lock); |
449dd698 JW |
527 | if (key) |
528 | lockdep_set_class(&lru->node[i].lock, key); | |
60d3fd32 VD |
529 | init_one_lru(&lru->node[i].lru); |
530 | } | |
531 | ||
532 | err = memcg_init_list_lru(lru, memcg_aware); | |
533 | if (err) { | |
534 | kfree(lru->node); | |
535 | goto out; | |
3b1d58a4 | 536 | } |
60d3fd32 | 537 | |
c0a5b560 | 538 | list_lru_register(lru); |
60d3fd32 VD |
539 | out: |
540 | memcg_put_cache_ids(); | |
541 | return err; | |
a38e4082 | 542 | } |
60d3fd32 | 543 | EXPORT_SYMBOL_GPL(__list_lru_init); |
5ca302c8 GC |
544 | |
545 | void list_lru_destroy(struct list_lru *lru) | |
546 | { | |
c0a5b560 VD |
547 | /* Already destroyed or not yet initialized? */ |
548 | if (!lru->node) | |
549 | return; | |
60d3fd32 VD |
550 | |
551 | memcg_get_cache_ids(); | |
552 | ||
c0a5b560 | 553 | list_lru_unregister(lru); |
60d3fd32 VD |
554 | |
555 | memcg_destroy_list_lru(lru); | |
5ca302c8 | 556 | kfree(lru->node); |
c0a5b560 | 557 | lru->node = NULL; |
60d3fd32 VD |
558 | |
559 | memcg_put_cache_ids(); | |
5ca302c8 GC |
560 | } |
561 | EXPORT_SYMBOL_GPL(list_lru_destroy); |