]>
Commit | Line | Data |
---|---|---|
145ca25e PZ |
1 | /* |
2 | * Floating proportions | |
3 | * | |
4 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <[email protected]> | |
5 | * | |
6 | * Description: | |
7 | * | |
8 | * The floating proportion is a time derivative with an exponentially decaying | |
9 | * history: | |
10 | * | |
11 | * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) | |
12 | * | |
13 | * Where j is an element from {prop_local}, x_{j} is j's number of events, | |
14 | * and i the time period over which the differential is taken. So d/dt_{-i} is | |
15 | * the differential over the i-th last period. | |
16 | * | |
17 | * The decaying history gives smooth transitions. The time differential carries | |
18 | * the notion of speed. | |
19 | * | |
20 | * The denominator is 2^(1+i) because we want the series to be normalised, ie. | |
21 | * | |
22 | * \Sum_{i=0} 1/2^(1+i) = 1 | |
23 | * | |
24 | * Further more, if we measure time (t) in the same events as x; so that: | |
25 | * | |
26 | * t = \Sum_{j} x_{j} | |
27 | * | |
28 | * we get that: | |
29 | * | |
30 | * \Sum_{j} p_{j} = 1 | |
31 | * | |
32 | * Writing this in an iterative fashion we get (dropping the 'd's): | |
33 | * | |
34 | * if (++x_{j}, ++t > period) | |
35 | * t /= 2; | |
36 | * for_each (j) | |
37 | * x_{j} /= 2; | |
38 | * | |
39 | * so that: | |
40 | * | |
41 | * p_{j} = x_{j} / t; | |
42 | * | |
43 | * We optimize away the '/= 2' for the global time delta by noting that: | |
44 | * | |
45 | * if (++t > period) t /= 2: | |
46 | * | |
47 | * Can be approximated by: | |
48 | * | |
49 | * period/2 + (++t % period/2) | |
50 | * | |
51 | * [ Furthermore, when we choose period to be 2^n it can be written in terms of | |
52 | * binary operations and wraparound artefacts disappear. ] | |
53 | * | |
54 | * Also note that this yields a natural counter of the elapsed periods: | |
55 | * | |
56 | * c = t / (period/2) | |
57 | * | |
58 | * [ Its monotonic increasing property can be applied to mitigate the wrap- | |
59 | * around issue. ] | |
60 | * | |
61 | * This allows us to do away with the loop over all prop_locals on each period | |
62 | * expiration. By remembering the period count under which it was last accessed | |
63 | * as c_{j}, we can obtain the number of 'missed' cycles from: | |
64 | * | |
65 | * c - c_{j} | |
66 | * | |
67 | * We can then lazily catch up to the global period count every time we are | |
68 | * going to use x_{j}, by doing: | |
69 | * | |
70 | * x_{j} /= 2^(c - c_{j}), c_{j} = c | |
71 | */ | |
72 | ||
73 | #include <linux/proportions.h> | |
74 | #include <linux/rcupdate.h> | |
75 | ||
76 | /* | |
77 | * Limit the time part in order to ensure there are some bits left for the | |
78 | * cycle counter. | |
79 | */ | |
80 | #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) | |
81 | ||
82 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) | |
83 | { | |
84 | int err; | |
85 | ||
86 | if (shift > PROP_MAX_SHIFT) | |
87 | shift = PROP_MAX_SHIFT; | |
88 | ||
89 | pd->index = 0; | |
90 | pd->pg[0].shift = shift; | |
91 | mutex_init(&pd->mutex); | |
92 | err = percpu_counter_init_irq(&pd->pg[0].events, 0); | |
93 | if (err) | |
94 | goto out; | |
95 | ||
96 | err = percpu_counter_init_irq(&pd->pg[1].events, 0); | |
97 | if (err) | |
98 | percpu_counter_destroy(&pd->pg[0].events); | |
99 | ||
100 | out: | |
101 | return err; | |
102 | } | |
103 | ||
104 | /* | |
105 | * We have two copies, and flip between them to make it seem like an atomic | |
106 | * update. The update is not really atomic wrt the events counter, but | |
107 | * it is internally consistent with the bit layout depending on shift. | |
108 | * | |
109 | * We copy the events count, move the bits around and flip the index. | |
110 | */ | |
111 | void prop_change_shift(struct prop_descriptor *pd, int shift) | |
112 | { | |
113 | int index; | |
114 | int offset; | |
115 | u64 events; | |
116 | unsigned long flags; | |
117 | ||
118 | if (shift > PROP_MAX_SHIFT) | |
119 | shift = PROP_MAX_SHIFT; | |
120 | ||
121 | mutex_lock(&pd->mutex); | |
122 | ||
123 | index = pd->index ^ 1; | |
124 | offset = pd->pg[pd->index].shift - shift; | |
125 | if (!offset) | |
126 | goto out; | |
127 | ||
128 | pd->pg[index].shift = shift; | |
129 | ||
130 | local_irq_save(flags); | |
131 | events = percpu_counter_sum(&pd->pg[pd->index].events); | |
132 | if (offset < 0) | |
133 | events <<= -offset; | |
134 | else | |
135 | events >>= offset; | |
136 | percpu_counter_set(&pd->pg[index].events, events); | |
137 | ||
138 | /* | |
139 | * ensure the new pg is fully written before the switch | |
140 | */ | |
141 | smp_wmb(); | |
142 | pd->index = index; | |
143 | local_irq_restore(flags); | |
144 | ||
145 | synchronize_rcu(); | |
146 | ||
147 | out: | |
148 | mutex_unlock(&pd->mutex); | |
149 | } | |
150 | ||
151 | /* | |
152 | * wrap the access to the data in an rcu_read_lock() section; | |
153 | * this is used to track the active references. | |
154 | */ | |
155 | static struct prop_global *prop_get_global(struct prop_descriptor *pd) | |
156 | { | |
157 | int index; | |
158 | ||
159 | rcu_read_lock(); | |
160 | index = pd->index; | |
161 | /* | |
162 | * match the wmb from vcd_flip() | |
163 | */ | |
164 | smp_rmb(); | |
165 | return &pd->pg[index]; | |
166 | } | |
167 | ||
168 | static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) | |
169 | { | |
170 | rcu_read_unlock(); | |
171 | } | |
172 | ||
173 | static void | |
174 | prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) | |
175 | { | |
176 | int offset = *pl_shift - new_shift; | |
177 | ||
178 | if (!offset) | |
179 | return; | |
180 | ||
181 | if (offset < 0) | |
182 | *pl_period <<= -offset; | |
183 | else | |
184 | *pl_period >>= offset; | |
185 | ||
186 | *pl_shift = new_shift; | |
187 | } | |
188 | ||
189 | /* | |
190 | * PERCPU | |
191 | */ | |
192 | ||
193 | int prop_local_init_percpu(struct prop_local_percpu *pl) | |
194 | { | |
195 | spin_lock_init(&pl->lock); | |
196 | pl->shift = 0; | |
197 | pl->period = 0; | |
198 | return percpu_counter_init_irq(&pl->events, 0); | |
199 | } | |
200 | ||
201 | void prop_local_destroy_percpu(struct prop_local_percpu *pl) | |
202 | { | |
203 | percpu_counter_destroy(&pl->events); | |
204 | } | |
205 | ||
206 | /* | |
207 | * Catch up with missed period expirations. | |
208 | * | |
209 | * until (c_{j} == c) | |
210 | * x_{j} -= x_{j}/2; | |
211 | * c_{j}++; | |
212 | */ | |
213 | static | |
214 | void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) | |
215 | { | |
216 | unsigned long period = 1UL << (pg->shift - 1); | |
217 | unsigned long period_mask = ~(period - 1); | |
218 | unsigned long global_period; | |
219 | unsigned long flags; | |
220 | ||
221 | global_period = percpu_counter_read(&pg->events); | |
222 | global_period &= period_mask; | |
223 | ||
224 | /* | |
225 | * Fast path - check if the local and global period count still match | |
226 | * outside of the lock. | |
227 | */ | |
228 | if (pl->period == global_period) | |
229 | return; | |
230 | ||
231 | spin_lock_irqsave(&pl->lock, flags); | |
232 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | |
233 | /* | |
234 | * For each missed period, we half the local counter. | |
235 | * basically: | |
236 | * pl->events >> (global_period - pl->period); | |
237 | * | |
238 | * but since the distributed nature of percpu counters make division | |
239 | * rather hard, use a regular subtraction loop. This is safe, because | |
240 | * the events will only every be incremented, hence the subtraction | |
241 | * can never result in a negative number. | |
242 | */ | |
243 | while (pl->period != global_period) { | |
244 | unsigned long val = percpu_counter_read(&pl->events); | |
245 | unsigned long half = (val + 1) >> 1; | |
246 | ||
247 | /* | |
248 | * Half of zero won't be much less, break out. | |
249 | * This limits the loop to shift iterations, even | |
250 | * if we missed a million. | |
251 | */ | |
252 | if (!val) | |
253 | break; | |
254 | ||
255 | percpu_counter_add(&pl->events, -half); | |
256 | pl->period += period; | |
257 | } | |
258 | pl->period = global_period; | |
259 | spin_unlock_irqrestore(&pl->lock, flags); | |
260 | } | |
261 | ||
262 | /* | |
263 | * ++x_{j}, ++t | |
264 | */ | |
265 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |
266 | { | |
267 | struct prop_global *pg = prop_get_global(pd); | |
268 | ||
269 | prop_norm_percpu(pg, pl); | |
270 | percpu_counter_add(&pl->events, 1); | |
271 | percpu_counter_add(&pg->events, 1); | |
272 | prop_put_global(pd, pg); | |
273 | } | |
274 | ||
275 | /* | |
276 | * Obtain a fraction of this proportion | |
277 | * | |
278 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
279 | */ | |
280 | void prop_fraction_percpu(struct prop_descriptor *pd, | |
281 | struct prop_local_percpu *pl, | |
282 | long *numerator, long *denominator) | |
283 | { | |
284 | struct prop_global *pg = prop_get_global(pd); | |
285 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
286 | unsigned long counter_mask = period_2 - 1; | |
287 | unsigned long global_count; | |
288 | ||
289 | prop_norm_percpu(pg, pl); | |
290 | *numerator = percpu_counter_read_positive(&pl->events); | |
291 | ||
292 | global_count = percpu_counter_read(&pg->events); | |
293 | *denominator = period_2 + (global_count & counter_mask); | |
294 | ||
295 | prop_put_global(pd, pg); | |
296 | } | |
297 | ||
298 | /* | |
299 | * SINGLE | |
300 | */ | |
301 | ||
302 | int prop_local_init_single(struct prop_local_single *pl) | |
303 | { | |
304 | spin_lock_init(&pl->lock); | |
305 | pl->shift = 0; | |
306 | pl->period = 0; | |
307 | pl->events = 0; | |
308 | return 0; | |
309 | } | |
310 | ||
311 | void prop_local_destroy_single(struct prop_local_single *pl) | |
312 | { | |
313 | } | |
314 | ||
315 | /* | |
316 | * Catch up with missed period expirations. | |
317 | */ | |
318 | static | |
319 | void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) | |
320 | { | |
321 | unsigned long period = 1UL << (pg->shift - 1); | |
322 | unsigned long period_mask = ~(period - 1); | |
323 | unsigned long global_period; | |
324 | unsigned long flags; | |
325 | ||
326 | global_period = percpu_counter_read(&pg->events); | |
327 | global_period &= period_mask; | |
328 | ||
329 | /* | |
330 | * Fast path - check if the local and global period count still match | |
331 | * outside of the lock. | |
332 | */ | |
333 | if (pl->period == global_period) | |
334 | return; | |
335 | ||
336 | spin_lock_irqsave(&pl->lock, flags); | |
337 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); | |
338 | /* | |
339 | * For each missed period, we half the local counter. | |
340 | */ | |
341 | period = (global_period - pl->period) >> (pg->shift - 1); | |
342 | if (likely(period < BITS_PER_LONG)) | |
343 | pl->events >>= period; | |
344 | else | |
345 | pl->events = 0; | |
346 | pl->period = global_period; | |
347 | spin_unlock_irqrestore(&pl->lock, flags); | |
348 | } | |
349 | ||
350 | /* | |
351 | * ++x_{j}, ++t | |
352 | */ | |
353 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | |
354 | { | |
355 | struct prop_global *pg = prop_get_global(pd); | |
356 | ||
357 | prop_norm_single(pg, pl); | |
358 | pl->events++; | |
359 | percpu_counter_add(&pg->events, 1); | |
360 | prop_put_global(pd, pg); | |
361 | } | |
362 | ||
363 | /* | |
364 | * Obtain a fraction of this proportion | |
365 | * | |
366 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
367 | */ | |
368 | void prop_fraction_single(struct prop_descriptor *pd, | |
369 | struct prop_local_single *pl, | |
370 | long *numerator, long *denominator) | |
371 | { | |
372 | struct prop_global *pg = prop_get_global(pd); | |
373 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
374 | unsigned long counter_mask = period_2 - 1; | |
375 | unsigned long global_count; | |
376 | ||
377 | prop_norm_single(pg, pl); | |
378 | *numerator = pl->events; | |
379 | ||
380 | global_count = percpu_counter_read(&pg->events); | |
381 | *denominator = period_2 + (global_count & counter_mask); | |
382 | ||
383 | prop_put_global(pd, pg); | |
384 | } |