]>
Commit | Line | Data |
---|---|---|
bf3afd5f EC |
1 | /* |
2 | * qdist.c - QEMU helpers for handling frequency distributions of data. | |
3 | * | |
4 | * Copyright (C) 2016, Emilio G. Cota <[email protected]> | |
5 | * | |
6 | * License: GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | */ | |
e9abfcb5 | 9 | #include "qemu/osdep.h" |
bf3afd5f EC |
10 | #include "qemu/qdist.h" |
11 | ||
12 | #include <math.h> | |
13 | #ifndef NAN | |
14 | #define NAN (0.0 / 0.0) | |
15 | #endif | |
16 | ||
11b7b07f EC |
17 | #define QDIST_EMPTY_STR "(empty)" |
18 | ||
bf3afd5f EC |
19 | void qdist_init(struct qdist *dist) |
20 | { | |
071d4054 | 21 | dist->entries = g_new(struct qdist_entry, 1); |
bf3afd5f EC |
22 | dist->size = 1; |
23 | dist->n = 0; | |
24 | } | |
25 | ||
26 | void qdist_destroy(struct qdist *dist) | |
27 | { | |
28 | g_free(dist->entries); | |
29 | } | |
30 | ||
31 | static inline int qdist_cmp_double(double a, double b) | |
32 | { | |
33 | if (a > b) { | |
34 | return 1; | |
35 | } else if (a < b) { | |
36 | return -1; | |
37 | } | |
38 | return 0; | |
39 | } | |
40 | ||
41 | static int qdist_cmp(const void *ap, const void *bp) | |
42 | { | |
43 | const struct qdist_entry *a = ap; | |
44 | const struct qdist_entry *b = bp; | |
45 | ||
46 | return qdist_cmp_double(a->x, b->x); | |
47 | } | |
48 | ||
49 | void qdist_add(struct qdist *dist, double x, long count) | |
50 | { | |
51 | struct qdist_entry *entry = NULL; | |
52 | ||
53 | if (dist->n) { | |
54 | struct qdist_entry e; | |
55 | ||
56 | e.x = x; | |
57 | entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); | |
58 | } | |
59 | ||
60 | if (entry) { | |
61 | entry->count += count; | |
62 | return; | |
63 | } | |
64 | ||
65 | if (unlikely(dist->n == dist->size)) { | |
66 | dist->size *= 2; | |
071d4054 | 67 | dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size); |
bf3afd5f EC |
68 | } |
69 | dist->n++; | |
70 | entry = &dist->entries[dist->n - 1]; | |
71 | entry->x = x; | |
72 | entry->count = count; | |
73 | qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); | |
74 | } | |
75 | ||
76 | void qdist_inc(struct qdist *dist, double x) | |
77 | { | |
78 | qdist_add(dist, x, 1); | |
79 | } | |
80 | ||
81 | /* | |
82 | * Unicode for block elements. See: | |
83 | * https://en.wikipedia.org/wiki/Block_Elements | |
84 | */ | |
85 | static const gunichar qdist_blocks[] = { | |
86 | 0x2581, | |
87 | 0x2582, | |
88 | 0x2583, | |
89 | 0x2584, | |
90 | 0x2585, | |
91 | 0x2586, | |
92 | 0x2587, | |
93 | 0x2588 | |
94 | }; | |
95 | ||
96 | #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) | |
97 | ||
98 | /* | |
99 | * Print a distribution into a string. | |
100 | * | |
101 | * This function assumes that appropriate binning has been done on the input; | |
102 | * see qdist_bin__internal() and qdist_pr_plain(). | |
103 | * | |
104 | * Callers must free the returned string with g_free(). | |
105 | */ | |
106 | static char *qdist_pr_internal(const struct qdist *dist) | |
107 | { | |
108 | double min, max; | |
109 | GString *s = g_string_new(""); | |
110 | size_t i; | |
111 | ||
112 | /* if only one entry, its printout will be either full or empty */ | |
113 | if (dist->n == 1) { | |
114 | if (dist->entries[0].count) { | |
115 | g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); | |
116 | } else { | |
117 | g_string_append_c(s, ' '); | |
118 | } | |
119 | goto out; | |
120 | } | |
121 | ||
122 | /* get min and max counts */ | |
123 | min = dist->entries[0].count; | |
124 | max = min; | |
125 | for (i = 0; i < dist->n; i++) { | |
126 | struct qdist_entry *e = &dist->entries[i]; | |
127 | ||
128 | if (e->count < min) { | |
129 | min = e->count; | |
130 | } | |
131 | if (e->count > max) { | |
132 | max = e->count; | |
133 | } | |
134 | } | |
135 | ||
136 | for (i = 0; i < dist->n; i++) { | |
137 | struct qdist_entry *e = &dist->entries[i]; | |
138 | int index; | |
139 | ||
140 | /* make an exception with 0; instead of using block[0], print a space */ | |
141 | if (e->count) { | |
142 | /* divide first to avoid loss of precision when e->count == max */ | |
143 | index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); | |
144 | g_string_append_unichar(s, qdist_blocks[index]); | |
145 | } else { | |
146 | g_string_append_c(s, ' '); | |
147 | } | |
148 | } | |
149 | out: | |
150 | return g_string_free(s, FALSE); | |
151 | } | |
152 | ||
153 | /* | |
154 | * Bin the distribution in @from into @n bins of consecutive, non-overlapping | |
155 | * intervals, copying the result to @to. | |
156 | * | |
157 | * This function is internal to qdist: only this file and test code should | |
158 | * ever call it. | |
159 | * | |
160 | * Note: calling this function on an already-binned qdist is a bug. | |
161 | * | |
162 | * If @n == 0 or @from->n == 1, use @from->n. | |
163 | */ | |
164 | void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) | |
165 | { | |
166 | double xmin, xmax; | |
167 | double step; | |
168 | size_t i, j; | |
169 | ||
170 | qdist_init(to); | |
171 | ||
172 | if (from->n == 0) { | |
173 | return; | |
174 | } | |
175 | if (n == 0 || from->n == 1) { | |
176 | n = from->n; | |
177 | } | |
178 | ||
179 | /* set equally-sized bins between @from's left and right */ | |
180 | xmin = qdist_xmin(from); | |
181 | xmax = qdist_xmax(from); | |
182 | step = (xmax - xmin) / n; | |
183 | ||
184 | if (n == from->n) { | |
185 | /* if @from's entries are equally spaced, no need to re-bin */ | |
186 | for (i = 0; i < from->n; i++) { | |
187 | if (from->entries[i].x != xmin + i * step) { | |
188 | goto rebin; | |
189 | } | |
190 | } | |
191 | /* they're equally spaced, so copy the dist and bail out */ | |
071d4054 | 192 | to->entries = g_renew(struct qdist_entry, to->entries, n); |
bf3afd5f EC |
193 | to->n = from->n; |
194 | memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); | |
195 | return; | |
196 | } | |
197 | ||
198 | rebin: | |
199 | j = 0; | |
200 | for (i = 0; i < n; i++) { | |
201 | double x; | |
202 | double left, right; | |
203 | ||
204 | left = xmin + i * step; | |
205 | right = xmin + (i + 1) * step; | |
206 | ||
207 | /* Add x, even if it might not get any counts later */ | |
208 | x = left; | |
209 | qdist_add(to, x, 0); | |
210 | ||
211 | /* | |
212 | * To avoid double-counting we capture [left, right) ranges, except for | |
213 | * the righmost bin, which captures a [left, right] range. | |
214 | */ | |
215 | while (j < from->n && (from->entries[j].x < right || i == n - 1)) { | |
216 | struct qdist_entry *o = &from->entries[j]; | |
217 | ||
218 | qdist_add(to, x, o->count); | |
219 | j++; | |
220 | } | |
221 | } | |
222 | } | |
223 | ||
224 | /* | |
225 | * Print @dist into a string, after re-binning it into @n bins of consecutive, | |
226 | * non-overlapping intervals. | |
227 | * | |
228 | * If @n == 0, use @orig->n. | |
229 | * | |
230 | * Callers must free the returned string with g_free(). | |
231 | */ | |
232 | char *qdist_pr_plain(const struct qdist *dist, size_t n) | |
233 | { | |
234 | struct qdist binned; | |
235 | char *ret; | |
236 | ||
237 | if (dist->n == 0) { | |
11b7b07f | 238 | return g_strdup(QDIST_EMPTY_STR); |
bf3afd5f EC |
239 | } |
240 | qdist_bin__internal(&binned, dist, n); | |
241 | ret = qdist_pr_internal(&binned); | |
242 | qdist_destroy(&binned); | |
243 | return ret; | |
244 | } | |
245 | ||
246 | static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, | |
247 | uint32_t opt, bool is_left) | |
248 | { | |
249 | const char *percent; | |
250 | const char *lparen; | |
251 | const char *rparen; | |
252 | GString *s; | |
253 | double x1, x2, step; | |
254 | double x; | |
255 | double n; | |
256 | int dec; | |
257 | ||
258 | s = g_string_new(""); | |
259 | if (!(opt & QDIST_PR_LABELS)) { | |
260 | goto out; | |
261 | } | |
262 | ||
263 | dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; | |
264 | percent = opt & QDIST_PR_PERCENT ? "%" : ""; | |
265 | ||
266 | n = n_bins ? n_bins : dist->n; | |
267 | x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); | |
268 | step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; | |
269 | ||
270 | if (opt & QDIST_PR_100X) { | |
271 | x *= 100.0; | |
272 | step *= 100.0; | |
273 | } | |
274 | if (opt & QDIST_PR_NOBINRANGE) { | |
275 | lparen = rparen = ""; | |
276 | x1 = x; | |
277 | x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ | |
278 | } else { | |
279 | lparen = "["; | |
280 | rparen = is_left ? ")" : "]"; | |
281 | if (is_left) { | |
282 | x1 = x; | |
283 | x2 = x + step; | |
284 | } else { | |
285 | x1 = x - step; | |
286 | x2 = x; | |
287 | } | |
288 | } | |
289 | g_string_append_printf(s, "%s%.*f", lparen, dec, x1); | |
290 | if (!(opt & QDIST_PR_NOBINRANGE)) { | |
291 | g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); | |
292 | } | |
293 | g_string_append(s, percent); | |
294 | out: | |
295 | return g_string_free(s, FALSE); | |
296 | } | |
297 | ||
298 | /* | |
299 | * Print the distribution's histogram into a string. | |
300 | * | |
301 | * See also: qdist_pr_plain(). | |
302 | * | |
303 | * Callers must free the returned string with g_free(). | |
304 | */ | |
305 | char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) | |
306 | { | |
307 | const char *border = opt & QDIST_PR_BORDER ? "|" : ""; | |
308 | char *llabel, *rlabel; | |
309 | char *hgram; | |
310 | GString *s; | |
311 | ||
312 | if (dist->n == 0) { | |
11b7b07f | 313 | return g_strdup(QDIST_EMPTY_STR); |
bf3afd5f EC |
314 | } |
315 | ||
316 | s = g_string_new(""); | |
317 | ||
318 | llabel = qdist_pr_label(dist, n_bins, opt, true); | |
319 | rlabel = qdist_pr_label(dist, n_bins, opt, false); | |
320 | hgram = qdist_pr_plain(dist, n_bins); | |
321 | g_string_append_printf(s, "%s%s%s%s%s", | |
322 | llabel, border, hgram, border, rlabel); | |
323 | g_free(llabel); | |
324 | g_free(rlabel); | |
325 | g_free(hgram); | |
326 | ||
327 | return g_string_free(s, FALSE); | |
328 | } | |
329 | ||
330 | static inline double qdist_x(const struct qdist *dist, int index) | |
331 | { | |
332 | if (dist->n == 0) { | |
333 | return NAN; | |
334 | } | |
335 | return dist->entries[index].x; | |
336 | } | |
337 | ||
338 | double qdist_xmin(const struct qdist *dist) | |
339 | { | |
340 | return qdist_x(dist, 0); | |
341 | } | |
342 | ||
343 | double qdist_xmax(const struct qdist *dist) | |
344 | { | |
345 | return qdist_x(dist, dist->n - 1); | |
346 | } | |
347 | ||
348 | size_t qdist_unique_entries(const struct qdist *dist) | |
349 | { | |
350 | return dist->n; | |
351 | } | |
352 | ||
353 | unsigned long qdist_sample_count(const struct qdist *dist) | |
354 | { | |
355 | unsigned long count = 0; | |
356 | size_t i; | |
357 | ||
358 | for (i = 0; i < dist->n; i++) { | |
359 | struct qdist_entry *e = &dist->entries[i]; | |
360 | ||
361 | count += e->count; | |
362 | } | |
363 | return count; | |
364 | } | |
365 | ||
366 | static double qdist_pairwise_avg(const struct qdist *dist, size_t index, | |
367 | size_t n, unsigned long count) | |
368 | { | |
369 | /* amortize the recursion by using a base case > 2 */ | |
370 | if (n <= 8) { | |
371 | size_t i; | |
372 | double ret = 0; | |
373 | ||
374 | for (i = 0; i < n; i++) { | |
375 | struct qdist_entry *e = &dist->entries[index + i]; | |
376 | ||
377 | ret += e->x * e->count / count; | |
378 | } | |
379 | return ret; | |
380 | } else { | |
381 | size_t n2 = n / 2; | |
382 | ||
383 | return qdist_pairwise_avg(dist, index, n2, count) + | |
384 | qdist_pairwise_avg(dist, index + n2, n - n2, count); | |
385 | } | |
386 | } | |
387 | ||
388 | double qdist_avg(const struct qdist *dist) | |
389 | { | |
390 | unsigned long count; | |
391 | ||
392 | count = qdist_sample_count(dist); | |
393 | if (!count) { | |
394 | return NAN; | |
395 | } | |
396 | return qdist_pairwise_avg(dist, 0, dist->n, count); | |
397 | } |