]>
Commit | Line | Data |
---|---|---|
73f3d1b4 NT |
1 | /* |
2 | * FSE : Finite State Entropy encoder | |
3 | * Copyright (C) 2013-2015, Yann Collet. | |
4 | * | |
5 | * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions are | |
9 | * met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above | |
14 | * copyright notice, this list of conditions and the following disclaimer | |
15 | * in the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * | |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | * | |
30 | * This program is free software; you can redistribute it and/or modify it under | |
31 | * the terms of the GNU General Public License version 2 as published by the | |
32 | * Free Software Foundation. This program is dual-licensed; you may select | |
33 | * either version 2 of the GNU General Public License ("GPL") or BSD license | |
34 | * ("BSD"). | |
35 | * | |
36 | * You can contact the author at : | |
37 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
38 | */ | |
39 | ||
40 | /* ************************************************************** | |
41 | * Compiler specifics | |
42 | ****************************************************************/ | |
43 | #define FORCE_INLINE static __always_inline | |
44 | ||
45 | /* ************************************************************** | |
46 | * Includes | |
47 | ****************************************************************/ | |
48 | #include "bitstream.h" | |
49 | #include "fse.h" | |
50 | #include <linux/compiler.h> | |
51 | #include <linux/kernel.h> | |
52 | #include <linux/math64.h> | |
53 | #include <linux/string.h> /* memcpy, memset */ | |
54 | ||
55 | /* ************************************************************** | |
56 | * Error Management | |
57 | ****************************************************************/ | |
58 | #define FSE_STATIC_ASSERT(c) \ | |
59 | { \ | |
60 | enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ | |
61 | } /* use only *after* variable declarations */ | |
62 | ||
63 | /* ************************************************************** | |
64 | * Templates | |
65 | ****************************************************************/ | |
66 | /* | |
67 | designed to be included | |
68 | for type-specific functions (template emulation in C) | |
69 | Objective is to write these functions only once, for improved maintenance | |
70 | */ | |
71 | ||
72 | /* safety checks */ | |
73 | #ifndef FSE_FUNCTION_EXTENSION | |
74 | #error "FSE_FUNCTION_EXTENSION must be defined" | |
75 | #endif | |
76 | #ifndef FSE_FUNCTION_TYPE | |
77 | #error "FSE_FUNCTION_TYPE must be defined" | |
78 | #endif | |
79 | ||
80 | /* Function names */ | |
81 | #define FSE_CAT(X, Y) X##Y | |
82 | #define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) | |
83 | #define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) | |
84 | ||
85 | /* Function templates */ | |
86 | ||
87 | /* FSE_buildCTable_wksp() : | |
88 | * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). | |
89 | * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` | |
90 | * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements | |
91 | */ | |
92 | size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) | |
93 | { | |
94 | U32 const tableSize = 1 << tableLog; | |
95 | U32 const tableMask = tableSize - 1; | |
96 | void *const ptr = ct; | |
97 | U16 *const tableU16 = ((U16 *)ptr) + 2; | |
98 | void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1); | |
99 | FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); | |
100 | U32 const step = FSE_TABLESTEP(tableSize); | |
101 | U32 highThreshold = tableSize - 1; | |
102 | ||
103 | U32 *cumul; | |
104 | FSE_FUNCTION_TYPE *tableSymbol; | |
105 | size_t spaceUsed32 = 0; | |
106 | ||
107 | cumul = (U32 *)workspace + spaceUsed32; | |
108 | spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2; | |
109 | tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32); | |
110 | spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2; | |
111 | ||
112 | if ((spaceUsed32 << 2) > workspaceSize) | |
113 | return ERROR(tableLog_tooLarge); | |
114 | workspace = (U32 *)workspace + spaceUsed32; | |
115 | workspaceSize -= (spaceUsed32 << 2); | |
116 | ||
117 | /* CTable header */ | |
118 | tableU16[-2] = (U16)tableLog; | |
119 | tableU16[-1] = (U16)maxSymbolValue; | |
120 | ||
121 | /* For explanations on how to distribute symbol values over the table : | |
122 | * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ | |
123 | ||
124 | /* symbol start positions */ | |
125 | { | |
126 | U32 u; | |
127 | cumul[0] = 0; | |
128 | for (u = 1; u <= maxSymbolValue + 1; u++) { | |
129 | if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */ | |
130 | cumul[u] = cumul[u - 1] + 1; | |
131 | tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1); | |
132 | } else { | |
133 | cumul[u] = cumul[u - 1] + normalizedCounter[u - 1]; | |
134 | } | |
135 | } | |
136 | cumul[maxSymbolValue + 1] = tableSize + 1; | |
137 | } | |
138 | ||
139 | /* Spread symbols */ | |
140 | { | |
141 | U32 position = 0; | |
142 | U32 symbol; | |
143 | for (symbol = 0; symbol <= maxSymbolValue; symbol++) { | |
144 | int nbOccurences; | |
145 | for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) { | |
146 | tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; | |
147 | position = (position + step) & tableMask; | |
148 | while (position > highThreshold) | |
149 | position = (position + step) & tableMask; /* Low proba area */ | |
150 | } | |
151 | } | |
152 | ||
153 | if (position != 0) | |
154 | return ERROR(GENERIC); /* Must have gone through all positions */ | |
155 | } | |
156 | ||
157 | /* Build table */ | |
158 | { | |
159 | U32 u; | |
160 | for (u = 0; u < tableSize; u++) { | |
161 | FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ | |
162 | tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */ | |
163 | } | |
164 | } | |
165 | ||
166 | /* Build Symbol Transformation Table */ | |
167 | { | |
168 | unsigned total = 0; | |
169 | unsigned s; | |
170 | for (s = 0; s <= maxSymbolValue; s++) { | |
171 | switch (normalizedCounter[s]) { | |
172 | case 0: break; | |
173 | ||
174 | case -1: | |
175 | case 1: | |
176 | symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog); | |
177 | symbolTT[s].deltaFindState = total - 1; | |
178 | total++; | |
179 | break; | |
180 | default: { | |
181 | U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1); | |
182 | U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; | |
183 | symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; | |
184 | symbolTT[s].deltaFindState = total - normalizedCounter[s]; | |
185 | total += normalizedCounter[s]; | |
186 | } | |
187 | } | |
188 | } | |
189 | } | |
190 | ||
191 | return 0; | |
192 | } | |
193 | ||
194 | /*-************************************************************** | |
195 | * FSE NCount encoding-decoding | |
196 | ****************************************************************/ | |
197 | size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) | |
198 | { | |
199 | size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3; | |
200 | return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ | |
201 | } | |
202 | ||
203 | static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, | |
204 | unsigned writeIsSafe) | |
205 | { | |
206 | BYTE *const ostart = (BYTE *)header; | |
207 | BYTE *out = ostart; | |
208 | BYTE *const oend = ostart + headerBufferSize; | |
209 | int nbBits; | |
210 | const int tableSize = 1 << tableLog; | |
211 | int remaining; | |
212 | int threshold; | |
213 | U32 bitStream; | |
214 | int bitCount; | |
215 | unsigned charnum = 0; | |
216 | int previous0 = 0; | |
217 | ||
218 | bitStream = 0; | |
219 | bitCount = 0; | |
220 | /* Table Size */ | |
221 | bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount; | |
222 | bitCount += 4; | |
223 | ||
224 | /* Init */ | |
225 | remaining = tableSize + 1; /* +1 for extra accuracy */ | |
226 | threshold = tableSize; | |
227 | nbBits = tableLog + 1; | |
228 | ||
229 | while (remaining > 1) { /* stops at 1 */ | |
230 | if (previous0) { | |
231 | unsigned start = charnum; | |
232 | while (!normalizedCounter[charnum]) | |
233 | charnum++; | |
234 | while (charnum >= start + 24) { | |
235 | start += 24; | |
236 | bitStream += 0xFFFFU << bitCount; | |
237 | if ((!writeIsSafe) && (out > oend - 2)) | |
238 | return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |
239 | out[0] = (BYTE)bitStream; | |
240 | out[1] = (BYTE)(bitStream >> 8); | |
241 | out += 2; | |
242 | bitStream >>= 16; | |
243 | } | |
244 | while (charnum >= start + 3) { | |
245 | start += 3; | |
246 | bitStream += 3 << bitCount; | |
247 | bitCount += 2; | |
248 | } | |
249 | bitStream += (charnum - start) << bitCount; | |
250 | bitCount += 2; | |
251 | if (bitCount > 16) { | |
252 | if ((!writeIsSafe) && (out > oend - 2)) | |
253 | return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |
254 | out[0] = (BYTE)bitStream; | |
255 | out[1] = (BYTE)(bitStream >> 8); | |
256 | out += 2; | |
257 | bitStream >>= 16; | |
258 | bitCount -= 16; | |
259 | } | |
260 | } | |
261 | { | |
262 | int count = normalizedCounter[charnum++]; | |
263 | int const max = (2 * threshold - 1) - remaining; | |
264 | remaining -= count < 0 ? -count : count; | |
265 | count++; /* +1 for extra accuracy */ | |
266 | if (count >= threshold) | |
267 | count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ | |
268 | bitStream += count << bitCount; | |
269 | bitCount += nbBits; | |
270 | bitCount -= (count < max); | |
271 | previous0 = (count == 1); | |
272 | if (remaining < 1) | |
273 | return ERROR(GENERIC); | |
274 | while (remaining < threshold) | |
275 | nbBits--, threshold >>= 1; | |
276 | } | |
277 | if (bitCount > 16) { | |
278 | if ((!writeIsSafe) && (out > oend - 2)) | |
279 | return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |
280 | out[0] = (BYTE)bitStream; | |
281 | out[1] = (BYTE)(bitStream >> 8); | |
282 | out += 2; | |
283 | bitStream >>= 16; | |
284 | bitCount -= 16; | |
285 | } | |
286 | } | |
287 | ||
288 | /* flush remaining bitStream */ | |
289 | if ((!writeIsSafe) && (out > oend - 2)) | |
290 | return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |
291 | out[0] = (BYTE)bitStream; | |
292 | out[1] = (BYTE)(bitStream >> 8); | |
293 | out += (bitCount + 7) / 8; | |
294 | ||
295 | if (charnum > maxSymbolValue + 1) | |
296 | return ERROR(GENERIC); | |
297 | ||
298 | return (out - ostart); | |
299 | } | |
300 | ||
301 | size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |
302 | { | |
303 | if (tableLog > FSE_MAX_TABLELOG) | |
304 | return ERROR(tableLog_tooLarge); /* Unsupported */ | |
305 | if (tableLog < FSE_MIN_TABLELOG) | |
306 | return ERROR(GENERIC); /* Unsupported */ | |
307 | ||
308 | if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) | |
309 | return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); | |
310 | ||
311 | return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); | |
312 | } | |
313 | ||
314 | /*-************************************************************** | |
315 | * Counting histogram | |
316 | ****************************************************************/ | |
317 | /*! FSE_count_simple | |
318 | This function counts byte values within `src`, and store the histogram into table `count`. | |
319 | It doesn't use any additional memory. | |
320 | But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. | |
321 | For this reason, prefer using a table `count` with 256 elements. | |
322 | @return : count of most numerous element | |
323 | */ | |
324 | size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize) | |
325 | { | |
326 | const BYTE *ip = (const BYTE *)src; | |
327 | const BYTE *const end = ip + srcSize; | |
328 | unsigned maxSymbolValue = *maxSymbolValuePtr; | |
329 | unsigned max = 0; | |
330 | ||
331 | memset(count, 0, (maxSymbolValue + 1) * sizeof(*count)); | |
332 | if (srcSize == 0) { | |
333 | *maxSymbolValuePtr = 0; | |
334 | return 0; | |
335 | } | |
336 | ||
337 | while (ip < end) | |
338 | count[*ip++]++; | |
339 | ||
340 | while (!count[maxSymbolValue]) | |
341 | maxSymbolValue--; | |
342 | *maxSymbolValuePtr = maxSymbolValue; | |
343 | ||
344 | { | |
345 | U32 s; | |
346 | for (s = 0; s <= maxSymbolValue; s++) | |
347 | if (count[s] > max) | |
348 | max = count[s]; | |
349 | } | |
350 | ||
351 | return (size_t)max; | |
352 | } | |
353 | ||
354 | /* FSE_count_parallel_wksp() : | |
355 | * Same as FSE_count_parallel(), but using an externally provided scratch buffer. | |
356 | * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ | |
357 | static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax, | |
358 | unsigned *const workSpace) | |
359 | { | |
360 | const BYTE *ip = (const BYTE *)source; | |
361 | const BYTE *const iend = ip + sourceSize; | |
362 | unsigned maxSymbolValue = *maxSymbolValuePtr; | |
363 | unsigned max = 0; | |
364 | U32 *const Counting1 = workSpace; | |
365 | U32 *const Counting2 = Counting1 + 256; | |
366 | U32 *const Counting3 = Counting2 + 256; | |
367 | U32 *const Counting4 = Counting3 + 256; | |
368 | ||
369 | memset(Counting1, 0, 4 * 256 * sizeof(unsigned)); | |
370 | ||
371 | /* safety checks */ | |
372 | if (!sourceSize) { | |
373 | memset(count, 0, maxSymbolValue + 1); | |
374 | *maxSymbolValuePtr = 0; | |
375 | return 0; | |
376 | } | |
377 | if (!maxSymbolValue) | |
378 | maxSymbolValue = 255; /* 0 == default */ | |
379 | ||
380 | /* by stripes of 16 bytes */ | |
381 | { | |
382 | U32 cached = ZSTD_read32(ip); | |
383 | ip += 4; | |
384 | while (ip < iend - 15) { | |
385 | U32 c = cached; | |
386 | cached = ZSTD_read32(ip); | |
387 | ip += 4; | |
388 | Counting1[(BYTE)c]++; | |
389 | Counting2[(BYTE)(c >> 8)]++; | |
390 | Counting3[(BYTE)(c >> 16)]++; | |
391 | Counting4[c >> 24]++; | |
392 | c = cached; | |
393 | cached = ZSTD_read32(ip); | |
394 | ip += 4; | |
395 | Counting1[(BYTE)c]++; | |
396 | Counting2[(BYTE)(c >> 8)]++; | |
397 | Counting3[(BYTE)(c >> 16)]++; | |
398 | Counting4[c >> 24]++; | |
399 | c = cached; | |
400 | cached = ZSTD_read32(ip); | |
401 | ip += 4; | |
402 | Counting1[(BYTE)c]++; | |
403 | Counting2[(BYTE)(c >> 8)]++; | |
404 | Counting3[(BYTE)(c >> 16)]++; | |
405 | Counting4[c >> 24]++; | |
406 | c = cached; | |
407 | cached = ZSTD_read32(ip); | |
408 | ip += 4; | |
409 | Counting1[(BYTE)c]++; | |
410 | Counting2[(BYTE)(c >> 8)]++; | |
411 | Counting3[(BYTE)(c >> 16)]++; | |
412 | Counting4[c >> 24]++; | |
413 | } | |
414 | ip -= 4; | |
415 | } | |
416 | ||
417 | /* finish last symbols */ | |
418 | while (ip < iend) | |
419 | Counting1[*ip++]++; | |
420 | ||
421 | if (checkMax) { /* verify stats will fit into destination table */ | |
422 | U32 s; | |
423 | for (s = 255; s > maxSymbolValue; s--) { | |
424 | Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; | |
425 | if (Counting1[s]) | |
426 | return ERROR(maxSymbolValue_tooSmall); | |
427 | } | |
428 | } | |
429 | ||
430 | { | |
431 | U32 s; | |
432 | for (s = 0; s <= maxSymbolValue; s++) { | |
433 | count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; | |
434 | if (count[s] > max) | |
435 | max = count[s]; | |
436 | } | |
437 | } | |
438 | ||
439 | while (!count[maxSymbolValue]) | |
440 | maxSymbolValue--; | |
441 | *maxSymbolValuePtr = maxSymbolValue; | |
442 | return (size_t)max; | |
443 | } | |
444 | ||
445 | /* FSE_countFast_wksp() : | |
446 | * Same as FSE_countFast(), but using an externally provided scratch buffer. | |
447 | * `workSpace` size must be table of >= `1024` unsigned */ | |
448 | size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) | |
449 | { | |
450 | if (sourceSize < 1500) | |
451 | return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); | |
452 | return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); | |
453 | } | |
454 | ||
455 | /* FSE_count_wksp() : | |
456 | * Same as FSE_count(), but using an externally provided scratch buffer. | |
457 | * `workSpace` size must be table of >= `1024` unsigned */ | |
458 | size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) | |
459 | { | |
460 | if (*maxSymbolValuePtr < 255) | |
461 | return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); | |
462 | *maxSymbolValuePtr = 255; | |
463 | return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); | |
464 | } | |
465 | ||
466 | /*-************************************************************** | |
467 | * FSE Compression Code | |
468 | ****************************************************************/ | |
469 | /*! FSE_sizeof_CTable() : | |
470 | FSE_CTable is a variable size structure which contains : | |
471 | `U16 tableLog;` | |
472 | `U16 maxSymbolValue;` | |
473 | `U16 nextStateNumber[1 << tableLog];` // This size is variable | |
474 | `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable | |
475 | Allocation is manual (C standard does not support variable-size structures). | |
476 | */ | |
477 | size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog) | |
478 | { | |
479 | if (tableLog > FSE_MAX_TABLELOG) | |
480 | return ERROR(tableLog_tooLarge); | |
481 | return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32); | |
482 | } | |
483 | ||
484 | /* provides the minimum logSize to safely represent a distribution */ | |
485 | static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) | |
486 | { | |
487 | U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; | |
488 | U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; | |
489 | U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; | |
490 | return minBits; | |
491 | } | |
492 | ||
493 | unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) | |
494 | { | |
495 | U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; | |
496 | U32 tableLog = maxTableLog; | |
497 | U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); | |
498 | if (tableLog == 0) | |
499 | tableLog = FSE_DEFAULT_TABLELOG; | |
500 | if (maxBitsSrc < tableLog) | |
501 | tableLog = maxBitsSrc; /* Accuracy can be reduced */ | |
502 | if (minBits > tableLog) | |
503 | tableLog = minBits; /* Need a minimum to safely represent all symbol values */ | |
504 | if (tableLog < FSE_MIN_TABLELOG) | |
505 | tableLog = FSE_MIN_TABLELOG; | |
506 | if (tableLog > FSE_MAX_TABLELOG) | |
507 | tableLog = FSE_MAX_TABLELOG; | |
508 | return tableLog; | |
509 | } | |
510 | ||
511 | unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) | |
512 | { | |
513 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); | |
514 | } | |
515 | ||
516 | /* Secondary normalization method. | |
517 | To be used when primary method fails. */ | |
518 | ||
519 | static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue) | |
520 | { | |
521 | short const NOT_YET_ASSIGNED = -2; | |
522 | U32 s; | |
523 | U32 distributed = 0; | |
524 | U32 ToDistribute; | |
525 | ||
526 | /* Init */ | |
527 | U32 const lowThreshold = (U32)(total >> tableLog); | |
528 | U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); | |
529 | ||
530 | for (s = 0; s <= maxSymbolValue; s++) { | |
531 | if (count[s] == 0) { | |
532 | norm[s] = 0; | |
533 | continue; | |
534 | } | |
535 | if (count[s] <= lowThreshold) { | |
536 | norm[s] = -1; | |
537 | distributed++; | |
538 | total -= count[s]; | |
539 | continue; | |
540 | } | |
541 | if (count[s] <= lowOne) { | |
542 | norm[s] = 1; | |
543 | distributed++; | |
544 | total -= count[s]; | |
545 | continue; | |
546 | } | |
547 | ||
548 | norm[s] = NOT_YET_ASSIGNED; | |
549 | } | |
550 | ToDistribute = (1 << tableLog) - distributed; | |
551 | ||
552 | if ((total / ToDistribute) > lowOne) { | |
553 | /* risk of rounding to zero */ | |
554 | lowOne = (U32)((total * 3) / (ToDistribute * 2)); | |
555 | for (s = 0; s <= maxSymbolValue; s++) { | |
556 | if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { | |
557 | norm[s] = 1; | |
558 | distributed++; | |
559 | total -= count[s]; | |
560 | continue; | |
561 | } | |
562 | } | |
563 | ToDistribute = (1 << tableLog) - distributed; | |
564 | } | |
565 | ||
566 | if (distributed == maxSymbolValue + 1) { | |
567 | /* all values are pretty poor; | |
568 | probably incompressible data (should have already been detected); | |
569 | find max, then give all remaining points to max */ | |
570 | U32 maxV = 0, maxC = 0; | |
571 | for (s = 0; s <= maxSymbolValue; s++) | |
572 | if (count[s] > maxC) | |
573 | maxV = s, maxC = count[s]; | |
574 | norm[maxV] += (short)ToDistribute; | |
575 | return 0; | |
576 | } | |
577 | ||
578 | if (total == 0) { | |
579 | /* all of the symbols were low enough for the lowOne or lowThreshold */ | |
580 | for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1)) | |
581 | if (norm[s] > 0) | |
582 | ToDistribute--, norm[s]++; | |
583 | return 0; | |
584 | } | |
585 | ||
586 | { | |
587 | U64 const vStepLog = 62 - tableLog; | |
588 | U64 const mid = (1ULL << (vStepLog - 1)) - 1; | |
589 | U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ | |
590 | U64 tmpTotal = mid; | |
591 | for (s = 0; s <= maxSymbolValue; s++) { | |
592 | if (norm[s] == NOT_YET_ASSIGNED) { | |
593 | U64 const end = tmpTotal + (count[s] * rStep); | |
594 | U32 const sStart = (U32)(tmpTotal >> vStepLog); | |
595 | U32 const sEnd = (U32)(end >> vStepLog); | |
596 | U32 const weight = sEnd - sStart; | |
597 | if (weight < 1) | |
598 | return ERROR(GENERIC); | |
599 | norm[s] = (short)weight; | |
600 | tmpTotal = end; | |
601 | } | |
602 | } | |
603 | } | |
604 | ||
605 | return 0; | |
606 | } | |
607 | ||
608 | size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue) | |
609 | { | |
610 | /* Sanity checks */ | |
611 | if (tableLog == 0) | |
612 | tableLog = FSE_DEFAULT_TABLELOG; | |
613 | if (tableLog < FSE_MIN_TABLELOG) | |
614 | return ERROR(GENERIC); /* Unsupported size */ | |
615 | if (tableLog > FSE_MAX_TABLELOG) | |
616 | return ERROR(tableLog_tooLarge); /* Unsupported size */ | |
617 | if (tableLog < FSE_minTableLog(total, maxSymbolValue)) | |
618 | return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ | |
619 | ||
620 | { | |
621 | U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}; | |
622 | U64 const scale = 62 - tableLog; | |
623 | U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */ | |
624 | U64 const vStep = 1ULL << (scale - 20); | |
625 | int stillToDistribute = 1 << tableLog; | |
626 | unsigned s; | |
627 | unsigned largest = 0; | |
628 | short largestP = 0; | |
629 | U32 lowThreshold = (U32)(total >> tableLog); | |
630 | ||
631 | for (s = 0; s <= maxSymbolValue; s++) { | |
632 | if (count[s] == total) | |
633 | return 0; /* rle special case */ | |
634 | if (count[s] == 0) { | |
635 | normalizedCounter[s] = 0; | |
636 | continue; | |
637 | } | |
638 | if (count[s] <= lowThreshold) { | |
639 | normalizedCounter[s] = -1; | |
640 | stillToDistribute--; | |
641 | } else { | |
642 | short proba = (short)((count[s] * step) >> scale); | |
643 | if (proba < 8) { | |
644 | U64 restToBeat = vStep * rtbTable[proba]; | |
645 | proba += (count[s] * step) - ((U64)proba << scale) > restToBeat; | |
646 | } | |
647 | if (proba > largestP) | |
648 | largestP = proba, largest = s; | |
649 | normalizedCounter[s] = proba; | |
650 | stillToDistribute -= proba; | |
651 | } | |
652 | } | |
653 | if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { | |
654 | /* corner case, need another normalization method */ | |
655 | size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); | |
656 | if (FSE_isError(errorCode)) | |
657 | return errorCode; | |
658 | } else | |
659 | normalizedCounter[largest] += (short)stillToDistribute; | |
660 | } | |
661 | ||
662 | return tableLog; | |
663 | } | |
664 | ||
665 | /* fake FSE_CTable, for raw (uncompressed) input */ | |
666 | size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits) | |
667 | { | |
668 | const unsigned tableSize = 1 << nbBits; | |
669 | const unsigned tableMask = tableSize - 1; | |
670 | const unsigned maxSymbolValue = tableMask; | |
671 | void *const ptr = ct; | |
672 | U16 *const tableU16 = ((U16 *)ptr) + 2; | |
673 | void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */ | |
674 | FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); | |
675 | unsigned s; | |
676 | ||
677 | /* Sanity checks */ | |
678 | if (nbBits < 1) | |
679 | return ERROR(GENERIC); /* min size */ | |
680 | ||
681 | /* header */ | |
682 | tableU16[-2] = (U16)nbBits; | |
683 | tableU16[-1] = (U16)maxSymbolValue; | |
684 | ||
685 | /* Build table */ | |
686 | for (s = 0; s < tableSize; s++) | |
687 | tableU16[s] = (U16)(tableSize + s); | |
688 | ||
689 | /* Build Symbol Transformation Table */ | |
690 | { | |
691 | const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); | |
692 | for (s = 0; s <= maxSymbolValue; s++) { | |
693 | symbolTT[s].deltaNbBits = deltaNbBits; | |
694 | symbolTT[s].deltaFindState = s - 1; | |
695 | } | |
696 | } | |
697 | ||
698 | return 0; | |
699 | } | |
700 | ||
701 | /* fake FSE_CTable, for rle input (always same symbol) */ | |
702 | size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue) | |
703 | { | |
704 | void *ptr = ct; | |
705 | U16 *tableU16 = ((U16 *)ptr) + 2; | |
706 | void *FSCTptr = (U32 *)ptr + 2; | |
707 | FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr; | |
708 | ||
709 | /* header */ | |
710 | tableU16[-2] = (U16)0; | |
711 | tableU16[-1] = (U16)symbolValue; | |
712 | ||
713 | /* Build table */ | |
714 | tableU16[0] = 0; | |
715 | tableU16[1] = 0; /* just in case */ | |
716 | ||
717 | /* Build Symbol Transformation Table */ | |
718 | symbolTT[symbolValue].deltaNbBits = 0; | |
719 | symbolTT[symbolValue].deltaFindState = 0; | |
720 | ||
721 | return 0; | |
722 | } | |
723 | ||
724 | static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast) | |
725 | { | |
726 | const BYTE *const istart = (const BYTE *)src; | |
727 | const BYTE *const iend = istart + srcSize; | |
728 | const BYTE *ip = iend; | |
729 | ||
730 | BIT_CStream_t bitC; | |
731 | FSE_CState_t CState1, CState2; | |
732 | ||
733 | /* init */ | |
734 | if (srcSize <= 2) | |
735 | return 0; | |
736 | { | |
737 | size_t const initError = BIT_initCStream(&bitC, dst, dstSize); | |
738 | if (FSE_isError(initError)) | |
739 | return 0; /* not enough space available to write a bitstream */ | |
740 | } | |
741 | ||
742 | #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) | |
743 | ||
744 | if (srcSize & 1) { | |
745 | FSE_initCState2(&CState1, ct, *--ip); | |
746 | FSE_initCState2(&CState2, ct, *--ip); | |
747 | FSE_encodeSymbol(&bitC, &CState1, *--ip); | |
748 | FSE_FLUSHBITS(&bitC); | |
749 | } else { | |
750 | FSE_initCState2(&CState2, ct, *--ip); | |
751 | FSE_initCState2(&CState1, ct, *--ip); | |
752 | } | |
753 | ||
754 | /* join to mod 4 */ | |
755 | srcSize -= 2; | |
756 | if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */ | |
757 | FSE_encodeSymbol(&bitC, &CState2, *--ip); | |
758 | FSE_encodeSymbol(&bitC, &CState1, *--ip); | |
759 | FSE_FLUSHBITS(&bitC); | |
760 | } | |
761 | ||
762 | /* 2 or 4 encoding per loop */ | |
763 | while (ip > istart) { | |
764 | ||
765 | FSE_encodeSymbol(&bitC, &CState2, *--ip); | |
766 | ||
767 | if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */ | |
768 | FSE_FLUSHBITS(&bitC); | |
769 | ||
770 | FSE_encodeSymbol(&bitC, &CState1, *--ip); | |
771 | ||
772 | if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */ | |
773 | FSE_encodeSymbol(&bitC, &CState2, *--ip); | |
774 | FSE_encodeSymbol(&bitC, &CState1, *--ip); | |
775 | } | |
776 | ||
777 | FSE_FLUSHBITS(&bitC); | |
778 | } | |
779 | ||
780 | FSE_flushCState(&bitC, &CState2); | |
781 | FSE_flushCState(&bitC, &CState1); | |
782 | return BIT_closeCStream(&bitC); | |
783 | } | |
784 | ||
785 | size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct) | |
786 | { | |
787 | unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); | |
788 | ||
789 | if (fast) | |
790 | return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); | |
791 | else | |
792 | return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); | |
793 | } | |
794 | ||
795 | size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } |