]>
Commit | Line | Data |
---|---|---|
73f3d1b4 NT |
1 | /* |
2 | * Huffman encoder, part of New Generation Entropy library | |
3 | * Copyright (C) 2013-2016, Yann Collet. | |
4 | * | |
5 | * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions are | |
9 | * met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above | |
14 | * copyright notice, this list of conditions and the following disclaimer | |
15 | * in the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * | |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | * | |
30 | * This program is free software; you can redistribute it and/or modify it under | |
31 | * the terms of the GNU General Public License version 2 as published by the | |
32 | * Free Software Foundation. This program is dual-licensed; you may select | |
33 | * either version 2 of the GNU General Public License ("GPL") or BSD license | |
34 | * ("BSD"). | |
35 | * | |
36 | * You can contact the author at : | |
37 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
38 | */ | |
39 | ||
40 | /* ************************************************************** | |
41 | * Includes | |
42 | ****************************************************************/ | |
43 | #include "bitstream.h" | |
44 | #include "fse.h" /* header compression */ | |
45 | #include "huf.h" | |
46 | #include <linux/kernel.h> | |
47 | #include <linux/string.h> /* memcpy, memset */ | |
48 | ||
49 | /* ************************************************************** | |
50 | * Error Management | |
51 | ****************************************************************/ | |
52 | #define HUF_STATIC_ASSERT(c) \ | |
53 | { \ | |
54 | enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ | |
55 | } /* use only *after* variable declarations */ | |
56 | #define CHECK_V_F(e, f) \ | |
57 | size_t const e = f; \ | |
58 | if (ERR_isError(e)) \ | |
59 | return f | |
60 | #define CHECK_F(f) \ | |
61 | { \ | |
62 | CHECK_V_F(_var_err__, f); \ | |
63 | } | |
64 | ||
65 | /* ************************************************************** | |
66 | * Utils | |
67 | ****************************************************************/ | |
68 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) | |
69 | { | |
70 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); | |
71 | } | |
72 | ||
73 | /* ******************************************************* | |
74 | * HUF : Huffman block compression | |
75 | *********************************************************/ | |
76 | /* HUF_compressWeights() : | |
77 | * Same as FSE_compress(), but dedicated to huff0's weights compression. | |
78 | * The use case needs much less stack memory. | |
79 | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. | |
80 | */ | |
81 | #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 | |
82 | size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize) | |
83 | { | |
84 | BYTE *const ostart = (BYTE *)dst; | |
85 | BYTE *op = ostart; | |
86 | BYTE *const oend = ostart + dstSize; | |
87 | ||
88 | U32 maxSymbolValue = HUF_TABLELOG_MAX; | |
89 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; | |
90 | ||
91 | FSE_CTable *CTable; | |
92 | U32 *count; | |
93 | S16 *norm; | |
94 | size_t spaceUsed32 = 0; | |
95 | ||
96 | HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32)); | |
97 | ||
98 | CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32); | |
99 | spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX); | |
100 | count = (U32 *)workspace + spaceUsed32; | |
101 | spaceUsed32 += HUF_TABLELOG_MAX + 1; | |
102 | norm = (S16 *)((U32 *)workspace + spaceUsed32); | |
103 | spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2; | |
104 | ||
105 | if ((spaceUsed32 << 2) > workspaceSize) | |
106 | return ERROR(tableLog_tooLarge); | |
107 | workspace = (U32 *)workspace + spaceUsed32; | |
108 | workspaceSize -= (spaceUsed32 << 2); | |
109 | ||
110 | /* init conditions */ | |
111 | if (wtSize <= 1) | |
112 | return 0; /* Not compressible */ | |
113 | ||
114 | /* Scan input and build symbol stats */ | |
115 | { | |
116 | CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize)); | |
117 | if (maxCount == wtSize) | |
118 | return 1; /* only a single symbol in src : rle */ | |
119 | if (maxCount == 1) | |
120 | return 0; /* each symbol present maximum once => not compressible */ | |
121 | } | |
122 | ||
123 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); | |
124 | CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue)); | |
125 | ||
126 | /* Write table description header */ | |
127 | { | |
128 | CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog)); | |
129 | op += hSize; | |
130 | } | |
131 | ||
132 | /* Compress */ | |
133 | CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize)); | |
134 | { | |
135 | CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable)); | |
136 | if (cSize == 0) | |
137 | return 0; /* not enough space for compressed data */ | |
138 | op += cSize; | |
139 | } | |
140 | ||
141 | return op - ostart; | |
142 | } | |
143 | ||
144 | struct HUF_CElt_s { | |
145 | U16 val; | |
146 | BYTE nbBits; | |
147 | }; /* typedef'd to HUF_CElt within "huf.h" */ | |
148 | ||
149 | /*! HUF_writeCTable_wksp() : | |
150 | `CTable` : Huffman tree to save, using huf representation. | |
151 | @return : size of saved CTable */ | |
152 | size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize) | |
153 | { | |
154 | BYTE *op = (BYTE *)dst; | |
155 | U32 n; | |
156 | ||
157 | BYTE *bitsToWeight; | |
158 | BYTE *huffWeight; | |
159 | size_t spaceUsed32 = 0; | |
160 | ||
161 | bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32); | |
162 | spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2; | |
163 | huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); | |
164 | spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2; | |
165 | ||
166 | if ((spaceUsed32 << 2) > workspaceSize) | |
167 | return ERROR(tableLog_tooLarge); | |
168 | workspace = (U32 *)workspace + spaceUsed32; | |
169 | workspaceSize -= (spaceUsed32 << 2); | |
170 | ||
171 | /* check conditions */ | |
172 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) | |
173 | return ERROR(maxSymbolValue_tooLarge); | |
174 | ||
175 | /* convert to weight */ | |
176 | bitsToWeight[0] = 0; | |
177 | for (n = 1; n < huffLog + 1; n++) | |
178 | bitsToWeight[n] = (BYTE)(huffLog + 1 - n); | |
179 | for (n = 0; n < maxSymbolValue; n++) | |
180 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; | |
181 | ||
182 | /* attempt weights compression by FSE */ | |
183 | { | |
184 | CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize)); | |
185 | if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */ | |
186 | op[0] = (BYTE)hSize; | |
187 | return hSize + 1; | |
188 | } | |
189 | } | |
190 | ||
191 | /* write raw values as 4-bits (max : 15) */ | |
192 | if (maxSymbolValue > (256 - 128)) | |
193 | return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ | |
194 | if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize) | |
195 | return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ | |
196 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1)); | |
197 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ | |
198 | for (n = 0; n < maxSymbolValue; n += 2) | |
199 | op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]); | |
200 | return ((maxSymbolValue + 1) / 2) + 1; | |
201 | } | |
202 | ||
203 | size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) | |
204 | { | |
205 | U32 *rankVal; | |
206 | BYTE *huffWeight; | |
207 | U32 tableLog = 0; | |
208 | U32 nbSymbols = 0; | |
209 | size_t readSize; | |
210 | size_t spaceUsed32 = 0; | |
211 | ||
212 | rankVal = (U32 *)workspace + spaceUsed32; | |
213 | spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; | |
214 | huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); | |
215 | spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; | |
216 | ||
217 | if ((spaceUsed32 << 2) > workspaceSize) | |
218 | return ERROR(tableLog_tooLarge); | |
219 | workspace = (U32 *)workspace + spaceUsed32; | |
220 | workspaceSize -= (spaceUsed32 << 2); | |
221 | ||
222 | /* get symbol weights */ | |
223 | readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); | |
224 | if (ERR_isError(readSize)) | |
225 | return readSize; | |
226 | ||
227 | /* check result */ | |
228 | if (tableLog > HUF_TABLELOG_MAX) | |
229 | return ERROR(tableLog_tooLarge); | |
230 | if (nbSymbols > maxSymbolValue + 1) | |
231 | return ERROR(maxSymbolValue_tooSmall); | |
232 | ||
233 | /* Prepare base value per rank */ | |
234 | { | |
235 | U32 n, nextRankStart = 0; | |
236 | for (n = 1; n <= tableLog; n++) { | |
237 | U32 curr = nextRankStart; | |
238 | nextRankStart += (rankVal[n] << (n - 1)); | |
239 | rankVal[n] = curr; | |
240 | } | |
241 | } | |
242 | ||
243 | /* fill nbBits */ | |
244 | { | |
245 | U32 n; | |
246 | for (n = 0; n < nbSymbols; n++) { | |
247 | const U32 w = huffWeight[n]; | |
248 | CTable[n].nbBits = (BYTE)(tableLog + 1 - w); | |
249 | } | |
250 | } | |
251 | ||
252 | /* fill val */ | |
253 | { | |
254 | U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */ | |
255 | U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0}; | |
256 | { | |
257 | U32 n; | |
258 | for (n = 0; n < nbSymbols; n++) | |
259 | nbPerRank[CTable[n].nbBits]++; | |
260 | } | |
261 | /* determine stating value per rank */ | |
262 | valPerRank[tableLog + 1] = 0; /* for w==0 */ | |
263 | { | |
264 | U16 min = 0; | |
265 | U32 n; | |
266 | for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */ | |
267 | valPerRank[n] = min; /* get starting value within each rank */ | |
268 | min += nbPerRank[n]; | |
269 | min >>= 1; | |
270 | } | |
271 | } | |
272 | /* assign value within rank, symbol order */ | |
273 | { | |
274 | U32 n; | |
275 | for (n = 0; n <= maxSymbolValue; n++) | |
276 | CTable[n].val = valPerRank[CTable[n].nbBits]++; | |
277 | } | |
278 | } | |
279 | ||
280 | return readSize; | |
281 | } | |
282 | ||
283 | typedef struct nodeElt_s { | |
284 | U32 count; | |
285 | U16 parent; | |
286 | BYTE byte; | |
287 | BYTE nbBits; | |
288 | } nodeElt; | |
289 | ||
290 | static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits) | |
291 | { | |
292 | const U32 largestBits = huffNode[lastNonNull].nbBits; | |
293 | if (largestBits <= maxNbBits) | |
294 | return largestBits; /* early exit : no elt > maxNbBits */ | |
295 | ||
296 | /* there are several too large elements (at least >= 2) */ | |
297 | { | |
298 | int totalCost = 0; | |
299 | const U32 baseCost = 1 << (largestBits - maxNbBits); | |
300 | U32 n = lastNonNull; | |
301 | ||
302 | while (huffNode[n].nbBits > maxNbBits) { | |
303 | totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); | |
304 | huffNode[n].nbBits = (BYTE)maxNbBits; | |
305 | n--; | |
306 | } /* n stops at huffNode[n].nbBits <= maxNbBits */ | |
307 | while (huffNode[n].nbBits == maxNbBits) | |
308 | n--; /* n end at index of smallest symbol using < maxNbBits */ | |
309 | ||
310 | /* renorm totalCost */ | |
311 | totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ | |
312 | ||
313 | /* repay normalized cost */ | |
314 | { | |
315 | U32 const noSymbol = 0xF0F0F0F0; | |
316 | U32 rankLast[HUF_TABLELOG_MAX + 2]; | |
317 | int pos; | |
318 | ||
319 | /* Get pos of last (smallest) symbol per rank */ | |
320 | memset(rankLast, 0xF0, sizeof(rankLast)); | |
321 | { | |
322 | U32 currNbBits = maxNbBits; | |
323 | for (pos = n; pos >= 0; pos--) { | |
324 | if (huffNode[pos].nbBits >= currNbBits) | |
325 | continue; | |
326 | currNbBits = huffNode[pos].nbBits; /* < maxNbBits */ | |
327 | rankLast[maxNbBits - currNbBits] = pos; | |
328 | } | |
329 | } | |
330 | ||
331 | while (totalCost > 0) { | |
332 | U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; | |
333 | for (; nBitsToDecrease > 1; nBitsToDecrease--) { | |
334 | U32 highPos = rankLast[nBitsToDecrease]; | |
335 | U32 lowPos = rankLast[nBitsToDecrease - 1]; | |
336 | if (highPos == noSymbol) | |
337 | continue; | |
338 | if (lowPos == noSymbol) | |
339 | break; | |
340 | { | |
341 | U32 const highTotal = huffNode[highPos].count; | |
342 | U32 const lowTotal = 2 * huffNode[lowPos].count; | |
343 | if (highTotal <= lowTotal) | |
344 | break; | |
345 | } | |
346 | } | |
347 | /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ | |
348 | /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ | |
349 | while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) | |
350 | nBitsToDecrease++; | |
351 | totalCost -= 1 << (nBitsToDecrease - 1); | |
352 | if (rankLast[nBitsToDecrease - 1] == noSymbol) | |
353 | rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ | |
354 | huffNode[rankLast[nBitsToDecrease]].nbBits++; | |
355 | if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ | |
356 | rankLast[nBitsToDecrease] = noSymbol; | |
357 | else { | |
358 | rankLast[nBitsToDecrease]--; | |
359 | if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease) | |
360 | rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ | |
361 | } | |
362 | } /* while (totalCost > 0) */ | |
363 | ||
364 | while (totalCost < 0) { /* Sometimes, cost correction overshoot */ | |
365 | if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 | |
366 | (using maxNbBits) */ | |
367 | while (huffNode[n].nbBits == maxNbBits) | |
368 | n--; | |
369 | huffNode[n + 1].nbBits--; | |
370 | rankLast[1] = n + 1; | |
371 | totalCost++; | |
372 | continue; | |
373 | } | |
374 | huffNode[rankLast[1] + 1].nbBits--; | |
375 | rankLast[1]++; | |
376 | totalCost++; | |
377 | } | |
378 | } | |
379 | } /* there are several too large elements (at least >= 2) */ | |
380 | ||
381 | return maxNbBits; | |
382 | } | |
383 | ||
384 | typedef struct { | |
385 | U32 base; | |
386 | U32 curr; | |
387 | } rankPos; | |
388 | ||
389 | static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue) | |
390 | { | |
391 | rankPos rank[32]; | |
392 | U32 n; | |
393 | ||
394 | memset(rank, 0, sizeof(rank)); | |
395 | for (n = 0; n <= maxSymbolValue; n++) { | |
396 | U32 r = BIT_highbit32(count[n] + 1); | |
397 | rank[r].base++; | |
398 | } | |
399 | for (n = 30; n > 0; n--) | |
400 | rank[n - 1].base += rank[n].base; | |
401 | for (n = 0; n < 32; n++) | |
402 | rank[n].curr = rank[n].base; | |
403 | for (n = 0; n <= maxSymbolValue; n++) { | |
404 | U32 const c = count[n]; | |
405 | U32 const r = BIT_highbit32(c + 1) + 1; | |
406 | U32 pos = rank[r].curr++; | |
407 | while ((pos > rank[r].base) && (c > huffNode[pos - 1].count)) | |
408 | huffNode[pos] = huffNode[pos - 1], pos--; | |
409 | huffNode[pos].count = c; | |
410 | huffNode[pos].byte = (BYTE)n; | |
411 | } | |
412 | } | |
413 | ||
414 | /** HUF_buildCTable_wksp() : | |
415 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |
416 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. | |
417 | */ | |
418 | #define STARTNODE (HUF_SYMBOLVALUE_MAX + 1) | |
419 | typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1]; | |
420 | size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize) | |
421 | { | |
422 | nodeElt *const huffNode0 = (nodeElt *)workSpace; | |
423 | nodeElt *const huffNode = huffNode0 + 1; | |
424 | U32 n, nonNullRank; | |
425 | int lowS, lowN; | |
426 | U16 nodeNb = STARTNODE; | |
427 | U32 nodeRoot; | |
428 | ||
429 | /* safety checks */ | |
430 | if (wkspSize < sizeof(huffNodeTable)) | |
431 | return ERROR(GENERIC); /* workSpace is not large enough */ | |
432 | if (maxNbBits == 0) | |
433 | maxNbBits = HUF_TABLELOG_DEFAULT; | |
434 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) | |
435 | return ERROR(GENERIC); | |
436 | memset(huffNode0, 0, sizeof(huffNodeTable)); | |
437 | ||
438 | /* sort, decreasing order */ | |
439 | HUF_sort(huffNode, count, maxSymbolValue); | |
440 | ||
441 | /* init for parents */ | |
442 | nonNullRank = maxSymbolValue; | |
443 | while (huffNode[nonNullRank].count == 0) | |
444 | nonNullRank--; | |
445 | lowS = nonNullRank; | |
446 | nodeRoot = nodeNb + lowS - 1; | |
447 | lowN = nodeNb; | |
448 | huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count; | |
449 | huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb; | |
450 | nodeNb++; | |
451 | lowS -= 2; | |
452 | for (n = nodeNb; n <= nodeRoot; n++) | |
453 | huffNode[n].count = (U32)(1U << 30); | |
454 | huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */ | |
455 | ||
456 | /* create parents */ | |
457 | while (nodeNb <= nodeRoot) { | |
458 | U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |
459 | U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |
460 | huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; | |
461 | huffNode[n1].parent = huffNode[n2].parent = nodeNb; | |
462 | nodeNb++; | |
463 | } | |
464 | ||
465 | /* distribute weights (unlimited tree height) */ | |
466 | huffNode[nodeRoot].nbBits = 0; | |
467 | for (n = nodeRoot - 1; n >= STARTNODE; n--) | |
468 | huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; | |
469 | for (n = 0; n <= nonNullRank; n++) | |
470 | huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; | |
471 | ||
472 | /* enforce maxTableLog */ | |
473 | maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); | |
474 | ||
475 | /* fill result into tree (val, nbBits) */ | |
476 | { | |
477 | U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0}; | |
478 | U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0}; | |
479 | if (maxNbBits > HUF_TABLELOG_MAX) | |
480 | return ERROR(GENERIC); /* check fit into table */ | |
481 | for (n = 0; n <= nonNullRank; n++) | |
482 | nbPerRank[huffNode[n].nbBits]++; | |
483 | /* determine stating value per rank */ | |
484 | { | |
485 | U16 min = 0; | |
486 | for (n = maxNbBits; n > 0; n--) { | |
487 | valPerRank[n] = min; /* get starting value within each rank */ | |
488 | min += nbPerRank[n]; | |
489 | min >>= 1; | |
490 | } | |
491 | } | |
492 | for (n = 0; n <= maxSymbolValue; n++) | |
493 | tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ | |
494 | for (n = 0; n <= maxSymbolValue; n++) | |
495 | tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ | |
496 | } | |
497 | ||
498 | return maxNbBits; | |
499 | } | |
500 | ||
501 | static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) | |
502 | { | |
503 | size_t nbBits = 0; | |
504 | int s; | |
505 | for (s = 0; s <= (int)maxSymbolValue; ++s) { | |
506 | nbBits += CTable[s].nbBits * count[s]; | |
507 | } | |
508 | return nbBits >> 3; | |
509 | } | |
510 | ||
511 | static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) | |
512 | { | |
513 | int bad = 0; | |
514 | int s; | |
515 | for (s = 0; s <= (int)maxSymbolValue; ++s) { | |
516 | bad |= (count[s] != 0) & (CTable[s].nbBits == 0); | |
517 | } | |
518 | return !bad; | |
519 | } | |
520 | ||
521 | static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable) | |
522 | { | |
523 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); | |
524 | } | |
525 | ||
526 | size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } | |
527 | ||
528 | #define HUF_FLUSHBITS(s) BIT_flushBits(s) | |
529 | ||
530 | #define HUF_FLUSHBITS_1(stream) \ | |
531 | if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \ | |
532 | HUF_FLUSHBITS(stream) | |
533 | ||
534 | #define HUF_FLUSHBITS_2(stream) \ | |
535 | if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \ | |
536 | HUF_FLUSHBITS(stream) | |
537 | ||
538 | size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) | |
539 | { | |
540 | const BYTE *ip = (const BYTE *)src; | |
541 | BYTE *const ostart = (BYTE *)dst; | |
542 | BYTE *const oend = ostart + dstSize; | |
543 | BYTE *op = ostart; | |
544 | size_t n; | |
545 | BIT_CStream_t bitC; | |
546 | ||
547 | /* init */ | |
548 | if (dstSize < 8) | |
549 | return 0; /* not enough space to compress */ | |
550 | { | |
551 | size_t const initErr = BIT_initCStream(&bitC, op, oend - op); | |
552 | if (HUF_isError(initErr)) | |
553 | return 0; | |
554 | } | |
555 | ||
556 | n = srcSize & ~3; /* join to mod 4 */ | |
557 | switch (srcSize & 3) { | |
558 | case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC); | |
559 | case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); | |
560 | case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC); | |
561 | case 0: | |
562 | default:; | |
563 | } | |
564 | ||
565 | for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */ | |
566 | HUF_encodeSymbol(&bitC, ip[n - 1], CTable); | |
567 | HUF_FLUSHBITS_1(&bitC); | |
568 | HUF_encodeSymbol(&bitC, ip[n - 2], CTable); | |
569 | HUF_FLUSHBITS_2(&bitC); | |
570 | HUF_encodeSymbol(&bitC, ip[n - 3], CTable); | |
571 | HUF_FLUSHBITS_1(&bitC); | |
572 | HUF_encodeSymbol(&bitC, ip[n - 4], CTable); | |
573 | HUF_FLUSHBITS(&bitC); | |
574 | } | |
575 | ||
576 | return BIT_closeCStream(&bitC); | |
577 | } | |
578 | ||
579 | size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) | |
580 | { | |
581 | size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */ | |
582 | const BYTE *ip = (const BYTE *)src; | |
583 | const BYTE *const iend = ip + srcSize; | |
584 | BYTE *const ostart = (BYTE *)dst; | |
585 | BYTE *const oend = ostart + dstSize; | |
586 | BYTE *op = ostart; | |
587 | ||
588 | if (dstSize < 6 + 1 + 1 + 1 + 8) | |
589 | return 0; /* minimum space to compress successfully */ | |
590 | if (srcSize < 12) | |
591 | return 0; /* no saving possible : too small input */ | |
592 | op += 6; /* jumpTable */ | |
593 | ||
594 | { | |
595 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); | |
596 | if (cSize == 0) | |
597 | return 0; | |
598 | ZSTD_writeLE16(ostart, (U16)cSize); | |
599 | op += cSize; | |
600 | } | |
601 | ||
602 | ip += segmentSize; | |
603 | { | |
604 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); | |
605 | if (cSize == 0) | |
606 | return 0; | |
607 | ZSTD_writeLE16(ostart + 2, (U16)cSize); | |
608 | op += cSize; | |
609 | } | |
610 | ||
611 | ip += segmentSize; | |
612 | { | |
613 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); | |
614 | if (cSize == 0) | |
615 | return 0; | |
616 | ZSTD_writeLE16(ostart + 4, (U16)cSize); | |
617 | op += cSize; | |
618 | } | |
619 | ||
620 | ip += segmentSize; | |
621 | { | |
622 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable)); | |
623 | if (cSize == 0) | |
624 | return 0; | |
625 | op += cSize; | |
626 | } | |
627 | ||
628 | return op - ostart; | |
629 | } | |
630 | ||
631 | static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream, | |
632 | const HUF_CElt *CTable) | |
633 | { | |
634 | size_t const cSize = | |
635 | singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); | |
636 | if (HUF_isError(cSize)) { | |
637 | return cSize; | |
638 | } | |
639 | if (cSize == 0) { | |
640 | return 0; | |
641 | } /* uncompressible */ | |
642 | op += cSize; | |
643 | /* check compressibility */ | |
644 | if ((size_t)(op - ostart) >= srcSize - 1) { | |
645 | return 0; | |
646 | } | |
647 | return op - ostart; | |
648 | } | |
649 | ||
650 | /* `workSpace` must a table of at least 1024 unsigned */ | |
651 | static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, | |
652 | unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat) | |
653 | { | |
654 | BYTE *const ostart = (BYTE *)dst; | |
655 | BYTE *const oend = ostart + dstSize; | |
656 | BYTE *op = ostart; | |
657 | ||
658 | U32 *count; | |
659 | size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1); | |
660 | HUF_CElt *CTable; | |
661 | size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1); | |
662 | ||
663 | /* checks & inits */ | |
664 | if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) | |
665 | return ERROR(GENERIC); | |
666 | if (!srcSize) | |
667 | return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ | |
668 | if (!dstSize) | |
669 | return 0; /* cannot fit within dst budget */ | |
670 | if (srcSize > HUF_BLOCKSIZE_MAX) | |
671 | return ERROR(srcSize_wrong); /* curr block size limit */ | |
672 | if (huffLog > HUF_TABLELOG_MAX) | |
673 | return ERROR(tableLog_tooLarge); | |
674 | if (!maxSymbolValue) | |
675 | maxSymbolValue = HUF_SYMBOLVALUE_MAX; | |
676 | if (!huffLog) | |
677 | huffLog = HUF_TABLELOG_DEFAULT; | |
678 | ||
679 | count = (U32 *)workSpace; | |
680 | workSpace = (BYTE *)workSpace + countSize; | |
681 | wkspSize -= countSize; | |
682 | CTable = (HUF_CElt *)workSpace; | |
683 | workSpace = (BYTE *)workSpace + CTableSize; | |
684 | wkspSize -= CTableSize; | |
685 | ||
686 | /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */ | |
687 | if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { | |
688 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); | |
689 | } | |
690 | ||
691 | /* Scan input and build symbol stats */ | |
692 | { | |
693 | CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace)); | |
694 | if (largest == srcSize) { | |
695 | *ostart = ((const BYTE *)src)[0]; | |
696 | return 1; | |
697 | } /* single symbol, rle */ | |
698 | if (largest <= (srcSize >> 7) + 1) | |
699 | return 0; /* Fast heuristic : not compressible enough */ | |
700 | } | |
701 | ||
702 | /* Check validity of previous table */ | |
703 | if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) { | |
704 | *repeat = HUF_repeat_none; | |
705 | } | |
706 | /* Heuristic : use existing table for small inputs */ | |
707 | if (preferRepeat && repeat && *repeat != HUF_repeat_none) { | |
708 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); | |
709 | } | |
710 | ||
711 | /* Build Huffman Tree */ | |
712 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); | |
713 | { | |
714 | CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize)); | |
715 | huffLog = (U32)maxBits; | |
716 | /* Zero the unused symbols so we can check it for validity */ | |
717 | memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt)); | |
718 | } | |
719 | ||
720 | /* Write table description header */ | |
721 | { | |
722 | CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize)); | |
723 | /* Check if using the previous table will be beneficial */ | |
724 | if (repeat && *repeat != HUF_repeat_none) { | |
725 | size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue); | |
726 | size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue); | |
727 | if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { | |
728 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); | |
729 | } | |
730 | } | |
731 | /* Use the new table */ | |
732 | if (hSize + 12ul >= srcSize) { | |
733 | return 0; | |
734 | } | |
735 | op += hSize; | |
736 | if (repeat) { | |
737 | *repeat = HUF_repeat_none; | |
738 | } | |
739 | if (oldHufTable) { | |
740 | memcpy(oldHufTable, CTable, CTableSize); | |
741 | } /* Save the new table */ | |
742 | } | |
743 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable); | |
744 | } | |
745 | ||
746 | size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, | |
747 | size_t wkspSize) | |
748 | { | |
749 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0); | |
750 | } | |
751 | ||
752 | size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, | |
753 | size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) | |
754 | { | |
755 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, | |
756 | preferRepeat); | |
757 | } | |
758 | ||
759 | size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, | |
760 | size_t wkspSize) | |
761 | { | |
762 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0); | |
763 | } | |
764 | ||
765 | size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, | |
766 | size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) | |
767 | { | |
768 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, | |
769 | preferRepeat); | |
770 | } |