]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
3e32cb2e JW |
2 | /* |
3 | * Lockless hierarchical page accounting & limiting | |
4 | * | |
5 | * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner | |
6 | */ | |
7 | ||
8 | #include <linux/page_counter.h> | |
9 | #include <linux/atomic.h> | |
10 | #include <linux/kernel.h> | |
11 | #include <linux/string.h> | |
12 | #include <linux/sched.h> | |
13 | #include <linux/bug.h> | |
14 | #include <asm/page.h> | |
15 | ||
16 | /** | |
17 | * page_counter_cancel - take pages out of the local counter | |
18 | * @counter: counter | |
19 | * @nr_pages: number of pages to cancel | |
3e32cb2e | 20 | */ |
64f21993 | 21 | void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) |
3e32cb2e JW |
22 | { |
23 | long new; | |
24 | ||
25 | new = atomic_long_sub_return(nr_pages, &counter->count); | |
3e32cb2e JW |
26 | /* More uncharges than charges? */ |
27 | WARN_ON_ONCE(new < 0); | |
3e32cb2e JW |
28 | } |
29 | ||
30 | /** | |
31 | * page_counter_charge - hierarchically charge pages | |
32 | * @counter: counter | |
33 | * @nr_pages: number of pages to charge | |
34 | * | |
35 | * NOTE: This does not consider any configured counter limits. | |
36 | */ | |
37 | void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) | |
38 | { | |
39 | struct page_counter *c; | |
40 | ||
41 | for (c = counter; c; c = c->parent) { | |
42 | long new; | |
43 | ||
44 | new = atomic_long_add_return(nr_pages, &c->count); | |
45 | /* | |
46 | * This is indeed racy, but we can live with some | |
47 | * inaccuracy in the watermark. | |
48 | */ | |
49 | if (new > c->watermark) | |
50 | c->watermark = new; | |
51 | } | |
52 | } | |
53 | ||
54 | /** | |
55 | * page_counter_try_charge - try to hierarchically charge pages | |
56 | * @counter: counter | |
57 | * @nr_pages: number of pages to charge | |
58 | * @fail: points first counter to hit its limit, if any | |
59 | * | |
6071ca52 JW |
60 | * Returns %true on success, or %false and @fail if the counter or one |
61 | * of its ancestors has hit its configured limit. | |
3e32cb2e | 62 | */ |
6071ca52 JW |
63 | bool page_counter_try_charge(struct page_counter *counter, |
64 | unsigned long nr_pages, | |
65 | struct page_counter **fail) | |
3e32cb2e JW |
66 | { |
67 | struct page_counter *c; | |
68 | ||
69 | for (c = counter; c; c = c->parent) { | |
70 | long new; | |
71 | /* | |
72 | * Charge speculatively to avoid an expensive CAS. If | |
73 | * a bigger charge fails, it might falsely lock out a | |
74 | * racing smaller charge and send it into reclaim | |
75 | * early, but the error is limited to the difference | |
76 | * between the two sizes, which is less than 2M/4M in | |
77 | * case of a THP locking out a regular page charge. | |
78 | * | |
79 | * The atomic_long_add_return() implies a full memory | |
80 | * barrier between incrementing the count and reading | |
81 | * the limit. When racing with page_counter_limit(), | |
82 | * we either see the new limit or the setter sees the | |
83 | * counter has changed and retries. | |
84 | */ | |
85 | new = atomic_long_add_return(nr_pages, &c->count); | |
86 | if (new > c->limit) { | |
87 | atomic_long_sub(nr_pages, &c->count); | |
88 | /* | |
89 | * This is racy, but we can live with some | |
90 | * inaccuracy in the failcnt. | |
91 | */ | |
92 | c->failcnt++; | |
93 | *fail = c; | |
94 | goto failed; | |
95 | } | |
96 | /* | |
97 | * Just like with failcnt, we can live with some | |
98 | * inaccuracy in the watermark. | |
99 | */ | |
100 | if (new > c->watermark) | |
101 | c->watermark = new; | |
102 | } | |
6071ca52 | 103 | return true; |
3e32cb2e JW |
104 | |
105 | failed: | |
106 | for (c = counter; c != *fail; c = c->parent) | |
107 | page_counter_cancel(c, nr_pages); | |
108 | ||
6071ca52 | 109 | return false; |
3e32cb2e JW |
110 | } |
111 | ||
112 | /** | |
113 | * page_counter_uncharge - hierarchically uncharge pages | |
114 | * @counter: counter | |
115 | * @nr_pages: number of pages to uncharge | |
3e32cb2e | 116 | */ |
64f21993 | 117 | void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) |
3e32cb2e JW |
118 | { |
119 | struct page_counter *c; | |
3e32cb2e | 120 | |
64f21993 JW |
121 | for (c = counter; c; c = c->parent) |
122 | page_counter_cancel(c, nr_pages); | |
3e32cb2e JW |
123 | } |
124 | ||
125 | /** | |
126 | * page_counter_limit - limit the number of pages allowed | |
127 | * @counter: counter | |
128 | * @limit: limit to set | |
129 | * | |
130 | * Returns 0 on success, -EBUSY if the current number of pages on the | |
131 | * counter already exceeds the specified limit. | |
132 | * | |
133 | * The caller must serialize invocations on the same counter. | |
134 | */ | |
135 | int page_counter_limit(struct page_counter *counter, unsigned long limit) | |
136 | { | |
137 | for (;;) { | |
138 | unsigned long old; | |
139 | long count; | |
140 | ||
141 | /* | |
142 | * Update the limit while making sure that it's not | |
143 | * below the concurrently-changing counter value. | |
144 | * | |
145 | * The xchg implies two full memory barriers before | |
146 | * and after, so the read-swap-read is ordered and | |
147 | * ensures coherency with page_counter_try_charge(): | |
148 | * that function modifies the count before checking | |
149 | * the limit, so if it sees the old limit, we see the | |
150 | * modified counter and retry. | |
151 | */ | |
152 | count = atomic_long_read(&counter->count); | |
153 | ||
154 | if (count > limit) | |
155 | return -EBUSY; | |
156 | ||
157 | old = xchg(&counter->limit, limit); | |
158 | ||
159 | if (atomic_long_read(&counter->count) <= count) | |
160 | return 0; | |
161 | ||
162 | counter->limit = old; | |
163 | cond_resched(); | |
164 | } | |
165 | } | |
166 | ||
167 | /** | |
168 | * page_counter_memparse - memparse() for page counter limits | |
169 | * @buf: string to parse | |
650c5e56 | 170 | * @max: string meaning maximum possible value |
3e32cb2e JW |
171 | * @nr_pages: returns the result in number of pages |
172 | * | |
173 | * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be | |
174 | * limited to %PAGE_COUNTER_MAX. | |
175 | */ | |
650c5e56 JW |
176 | int page_counter_memparse(const char *buf, const char *max, |
177 | unsigned long *nr_pages) | |
3e32cb2e | 178 | { |
3e32cb2e JW |
179 | char *end; |
180 | u64 bytes; | |
181 | ||
650c5e56 | 182 | if (!strcmp(buf, max)) { |
3e32cb2e JW |
183 | *nr_pages = PAGE_COUNTER_MAX; |
184 | return 0; | |
185 | } | |
186 | ||
187 | bytes = memparse(buf, &end); | |
188 | if (*end != '\0') | |
189 | return -EINVAL; | |
190 | ||
191 | *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); | |
192 | ||
193 | return 0; | |
194 | } |