]>
Commit | Line | Data |
---|---|---|
bf3afd5f EC |
1 | /* |
2 | * qdist.c - QEMU helpers for handling frequency distributions of data. | |
3 | * | |
4 | * Copyright (C) 2016, Emilio G. Cota <[email protected]> | |
5 | * | |
6 | * License: GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | */ | |
e9abfcb5 | 9 | #include "qemu/osdep.h" |
bf3afd5f EC |
10 | #include "qemu/qdist.h" |
11 | ||
12 | #include <math.h> | |
13 | #ifndef NAN | |
14 | #define NAN (0.0 / 0.0) | |
15 | #endif | |
16 | ||
17 | void qdist_init(struct qdist *dist) | |
18 | { | |
19 | dist->entries = g_malloc(sizeof(*dist->entries)); | |
20 | dist->size = 1; | |
21 | dist->n = 0; | |
22 | } | |
23 | ||
24 | void qdist_destroy(struct qdist *dist) | |
25 | { | |
26 | g_free(dist->entries); | |
27 | } | |
28 | ||
29 | static inline int qdist_cmp_double(double a, double b) | |
30 | { | |
31 | if (a > b) { | |
32 | return 1; | |
33 | } else if (a < b) { | |
34 | return -1; | |
35 | } | |
36 | return 0; | |
37 | } | |
38 | ||
39 | static int qdist_cmp(const void *ap, const void *bp) | |
40 | { | |
41 | const struct qdist_entry *a = ap; | |
42 | const struct qdist_entry *b = bp; | |
43 | ||
44 | return qdist_cmp_double(a->x, b->x); | |
45 | } | |
46 | ||
47 | void qdist_add(struct qdist *dist, double x, long count) | |
48 | { | |
49 | struct qdist_entry *entry = NULL; | |
50 | ||
51 | if (dist->n) { | |
52 | struct qdist_entry e; | |
53 | ||
54 | e.x = x; | |
55 | entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); | |
56 | } | |
57 | ||
58 | if (entry) { | |
59 | entry->count += count; | |
60 | return; | |
61 | } | |
62 | ||
63 | if (unlikely(dist->n == dist->size)) { | |
64 | dist->size *= 2; | |
65 | dist->entries = g_realloc(dist->entries, | |
66 | sizeof(*dist->entries) * (dist->size)); | |
67 | } | |
68 | dist->n++; | |
69 | entry = &dist->entries[dist->n - 1]; | |
70 | entry->x = x; | |
71 | entry->count = count; | |
72 | qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); | |
73 | } | |
74 | ||
75 | void qdist_inc(struct qdist *dist, double x) | |
76 | { | |
77 | qdist_add(dist, x, 1); | |
78 | } | |
79 | ||
80 | /* | |
81 | * Unicode for block elements. See: | |
82 | * https://en.wikipedia.org/wiki/Block_Elements | |
83 | */ | |
84 | static const gunichar qdist_blocks[] = { | |
85 | 0x2581, | |
86 | 0x2582, | |
87 | 0x2583, | |
88 | 0x2584, | |
89 | 0x2585, | |
90 | 0x2586, | |
91 | 0x2587, | |
92 | 0x2588 | |
93 | }; | |
94 | ||
95 | #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) | |
96 | ||
97 | /* | |
98 | * Print a distribution into a string. | |
99 | * | |
100 | * This function assumes that appropriate binning has been done on the input; | |
101 | * see qdist_bin__internal() and qdist_pr_plain(). | |
102 | * | |
103 | * Callers must free the returned string with g_free(). | |
104 | */ | |
105 | static char *qdist_pr_internal(const struct qdist *dist) | |
106 | { | |
107 | double min, max; | |
108 | GString *s = g_string_new(""); | |
109 | size_t i; | |
110 | ||
111 | /* if only one entry, its printout will be either full or empty */ | |
112 | if (dist->n == 1) { | |
113 | if (dist->entries[0].count) { | |
114 | g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); | |
115 | } else { | |
116 | g_string_append_c(s, ' '); | |
117 | } | |
118 | goto out; | |
119 | } | |
120 | ||
121 | /* get min and max counts */ | |
122 | min = dist->entries[0].count; | |
123 | max = min; | |
124 | for (i = 0; i < dist->n; i++) { | |
125 | struct qdist_entry *e = &dist->entries[i]; | |
126 | ||
127 | if (e->count < min) { | |
128 | min = e->count; | |
129 | } | |
130 | if (e->count > max) { | |
131 | max = e->count; | |
132 | } | |
133 | } | |
134 | ||
135 | for (i = 0; i < dist->n; i++) { | |
136 | struct qdist_entry *e = &dist->entries[i]; | |
137 | int index; | |
138 | ||
139 | /* make an exception with 0; instead of using block[0], print a space */ | |
140 | if (e->count) { | |
141 | /* divide first to avoid loss of precision when e->count == max */ | |
142 | index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); | |
143 | g_string_append_unichar(s, qdist_blocks[index]); | |
144 | } else { | |
145 | g_string_append_c(s, ' '); | |
146 | } | |
147 | } | |
148 | out: | |
149 | return g_string_free(s, FALSE); | |
150 | } | |
151 | ||
152 | /* | |
153 | * Bin the distribution in @from into @n bins of consecutive, non-overlapping | |
154 | * intervals, copying the result to @to. | |
155 | * | |
156 | * This function is internal to qdist: only this file and test code should | |
157 | * ever call it. | |
158 | * | |
159 | * Note: calling this function on an already-binned qdist is a bug. | |
160 | * | |
161 | * If @n == 0 or @from->n == 1, use @from->n. | |
162 | */ | |
163 | void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) | |
164 | { | |
165 | double xmin, xmax; | |
166 | double step; | |
167 | size_t i, j; | |
168 | ||
169 | qdist_init(to); | |
170 | ||
171 | if (from->n == 0) { | |
172 | return; | |
173 | } | |
174 | if (n == 0 || from->n == 1) { | |
175 | n = from->n; | |
176 | } | |
177 | ||
178 | /* set equally-sized bins between @from's left and right */ | |
179 | xmin = qdist_xmin(from); | |
180 | xmax = qdist_xmax(from); | |
181 | step = (xmax - xmin) / n; | |
182 | ||
183 | if (n == from->n) { | |
184 | /* if @from's entries are equally spaced, no need to re-bin */ | |
185 | for (i = 0; i < from->n; i++) { | |
186 | if (from->entries[i].x != xmin + i * step) { | |
187 | goto rebin; | |
188 | } | |
189 | } | |
190 | /* they're equally spaced, so copy the dist and bail out */ | |
f9dbc19e | 191 | to->entries = g_realloc_n(to->entries, n, sizeof(*to->entries)); |
bf3afd5f EC |
192 | to->n = from->n; |
193 | memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); | |
194 | return; | |
195 | } | |
196 | ||
197 | rebin: | |
198 | j = 0; | |
199 | for (i = 0; i < n; i++) { | |
200 | double x; | |
201 | double left, right; | |
202 | ||
203 | left = xmin + i * step; | |
204 | right = xmin + (i + 1) * step; | |
205 | ||
206 | /* Add x, even if it might not get any counts later */ | |
207 | x = left; | |
208 | qdist_add(to, x, 0); | |
209 | ||
210 | /* | |
211 | * To avoid double-counting we capture [left, right) ranges, except for | |
212 | * the righmost bin, which captures a [left, right] range. | |
213 | */ | |
214 | while (j < from->n && (from->entries[j].x < right || i == n - 1)) { | |
215 | struct qdist_entry *o = &from->entries[j]; | |
216 | ||
217 | qdist_add(to, x, o->count); | |
218 | j++; | |
219 | } | |
220 | } | |
221 | } | |
222 | ||
223 | /* | |
224 | * Print @dist into a string, after re-binning it into @n bins of consecutive, | |
225 | * non-overlapping intervals. | |
226 | * | |
227 | * If @n == 0, use @orig->n. | |
228 | * | |
229 | * Callers must free the returned string with g_free(). | |
230 | */ | |
231 | char *qdist_pr_plain(const struct qdist *dist, size_t n) | |
232 | { | |
233 | struct qdist binned; | |
234 | char *ret; | |
235 | ||
236 | if (dist->n == 0) { | |
237 | return NULL; | |
238 | } | |
239 | qdist_bin__internal(&binned, dist, n); | |
240 | ret = qdist_pr_internal(&binned); | |
241 | qdist_destroy(&binned); | |
242 | return ret; | |
243 | } | |
244 | ||
245 | static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, | |
246 | uint32_t opt, bool is_left) | |
247 | { | |
248 | const char *percent; | |
249 | const char *lparen; | |
250 | const char *rparen; | |
251 | GString *s; | |
252 | double x1, x2, step; | |
253 | double x; | |
254 | double n; | |
255 | int dec; | |
256 | ||
257 | s = g_string_new(""); | |
258 | if (!(opt & QDIST_PR_LABELS)) { | |
259 | goto out; | |
260 | } | |
261 | ||
262 | dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; | |
263 | percent = opt & QDIST_PR_PERCENT ? "%" : ""; | |
264 | ||
265 | n = n_bins ? n_bins : dist->n; | |
266 | x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); | |
267 | step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; | |
268 | ||
269 | if (opt & QDIST_PR_100X) { | |
270 | x *= 100.0; | |
271 | step *= 100.0; | |
272 | } | |
273 | if (opt & QDIST_PR_NOBINRANGE) { | |
274 | lparen = rparen = ""; | |
275 | x1 = x; | |
276 | x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ | |
277 | } else { | |
278 | lparen = "["; | |
279 | rparen = is_left ? ")" : "]"; | |
280 | if (is_left) { | |
281 | x1 = x; | |
282 | x2 = x + step; | |
283 | } else { | |
284 | x1 = x - step; | |
285 | x2 = x; | |
286 | } | |
287 | } | |
288 | g_string_append_printf(s, "%s%.*f", lparen, dec, x1); | |
289 | if (!(opt & QDIST_PR_NOBINRANGE)) { | |
290 | g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); | |
291 | } | |
292 | g_string_append(s, percent); | |
293 | out: | |
294 | return g_string_free(s, FALSE); | |
295 | } | |
296 | ||
297 | /* | |
298 | * Print the distribution's histogram into a string. | |
299 | * | |
300 | * See also: qdist_pr_plain(). | |
301 | * | |
302 | * Callers must free the returned string with g_free(). | |
303 | */ | |
304 | char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) | |
305 | { | |
306 | const char *border = opt & QDIST_PR_BORDER ? "|" : ""; | |
307 | char *llabel, *rlabel; | |
308 | char *hgram; | |
309 | GString *s; | |
310 | ||
311 | if (dist->n == 0) { | |
312 | return NULL; | |
313 | } | |
314 | ||
315 | s = g_string_new(""); | |
316 | ||
317 | llabel = qdist_pr_label(dist, n_bins, opt, true); | |
318 | rlabel = qdist_pr_label(dist, n_bins, opt, false); | |
319 | hgram = qdist_pr_plain(dist, n_bins); | |
320 | g_string_append_printf(s, "%s%s%s%s%s", | |
321 | llabel, border, hgram, border, rlabel); | |
322 | g_free(llabel); | |
323 | g_free(rlabel); | |
324 | g_free(hgram); | |
325 | ||
326 | return g_string_free(s, FALSE); | |
327 | } | |
328 | ||
329 | static inline double qdist_x(const struct qdist *dist, int index) | |
330 | { | |
331 | if (dist->n == 0) { | |
332 | return NAN; | |
333 | } | |
334 | return dist->entries[index].x; | |
335 | } | |
336 | ||
337 | double qdist_xmin(const struct qdist *dist) | |
338 | { | |
339 | return qdist_x(dist, 0); | |
340 | } | |
341 | ||
342 | double qdist_xmax(const struct qdist *dist) | |
343 | { | |
344 | return qdist_x(dist, dist->n - 1); | |
345 | } | |
346 | ||
347 | size_t qdist_unique_entries(const struct qdist *dist) | |
348 | { | |
349 | return dist->n; | |
350 | } | |
351 | ||
352 | unsigned long qdist_sample_count(const struct qdist *dist) | |
353 | { | |
354 | unsigned long count = 0; | |
355 | size_t i; | |
356 | ||
357 | for (i = 0; i < dist->n; i++) { | |
358 | struct qdist_entry *e = &dist->entries[i]; | |
359 | ||
360 | count += e->count; | |
361 | } | |
362 | return count; | |
363 | } | |
364 | ||
365 | static double qdist_pairwise_avg(const struct qdist *dist, size_t index, | |
366 | size_t n, unsigned long count) | |
367 | { | |
368 | /* amortize the recursion by using a base case > 2 */ | |
369 | if (n <= 8) { | |
370 | size_t i; | |
371 | double ret = 0; | |
372 | ||
373 | for (i = 0; i < n; i++) { | |
374 | struct qdist_entry *e = &dist->entries[index + i]; | |
375 | ||
376 | ret += e->x * e->count / count; | |
377 | } | |
378 | return ret; | |
379 | } else { | |
380 | size_t n2 = n / 2; | |
381 | ||
382 | return qdist_pairwise_avg(dist, index, n2, count) + | |
383 | qdist_pairwise_avg(dist, index + n2, n - n2, count); | |
384 | } | |
385 | } | |
386 | ||
387 | double qdist_avg(const struct qdist *dist) | |
388 | { | |
389 | unsigned long count; | |
390 | ||
391 | count = qdist_sample_count(dist); | |
392 | if (!count) { | |
393 | return NAN; | |
394 | } | |
395 | return qdist_pairwise_avg(dist, 0, dist->n, count); | |
396 | } |