]>
Commit | Line | Data |
---|---|---|
24fa0402 LC |
1 | /* |
2 | * Branch/Call/Jump (BCJ) filter decoders | |
3 | * | |
4 | * Authors: Lasse Collin <[email protected]> | |
5 | * Igor Pavlov <http://7-zip.org/> | |
6 | * | |
7 | * This file has been put into the public domain. | |
8 | * You can do whatever you want with this file. | |
9 | */ | |
10 | ||
11 | #include "xz_private.h" | |
12 | ||
13 | /* | |
14 | * The rest of the file is inside this ifdef. It makes things a little more | |
15 | * convenient when building without support for any BCJ filters. | |
16 | */ | |
17 | #ifdef XZ_DEC_BCJ | |
18 | ||
19 | struct xz_dec_bcj { | |
20 | /* Type of the BCJ filter being used */ | |
21 | enum { | |
22 | BCJ_X86 = 4, /* x86 or x86-64 */ | |
23 | BCJ_POWERPC = 5, /* Big endian only */ | |
24 | BCJ_IA64 = 6, /* Big or little endian */ | |
25 | BCJ_ARM = 7, /* Little endian only */ | |
26 | BCJ_ARMTHUMB = 8, /* Little endian only */ | |
27 | BCJ_SPARC = 9 /* Big or little endian */ | |
28 | } type; | |
29 | ||
30 | /* | |
31 | * Return value of the next filter in the chain. We need to preserve | |
32 | * this information across calls, because we must not call the next | |
33 | * filter anymore once it has returned XZ_STREAM_END. | |
34 | */ | |
35 | enum xz_ret ret; | |
36 | ||
37 | /* True if we are operating in single-call mode. */ | |
38 | bool single_call; | |
39 | ||
40 | /* | |
41 | * Absolute position relative to the beginning of the uncompressed | |
42 | * data (in a single .xz Block). We care only about the lowest 32 | |
43 | * bits so this doesn't need to be uint64_t even with big files. | |
44 | */ | |
45 | uint32_t pos; | |
46 | ||
47 | /* x86 filter state */ | |
48 | uint32_t x86_prev_mask; | |
49 | ||
50 | /* Temporary space to hold the variables from struct xz_buf */ | |
51 | uint8_t *out; | |
52 | size_t out_pos; | |
53 | size_t out_size; | |
54 | ||
55 | struct { | |
56 | /* Amount of already filtered data in the beginning of buf */ | |
57 | size_t filtered; | |
58 | ||
59 | /* Total amount of data currently stored in buf */ | |
60 | size_t size; | |
61 | ||
62 | /* | |
63 | * Buffer to hold a mix of filtered and unfiltered data. This | |
64 | * needs to be big enough to hold Alignment + 2 * Look-ahead: | |
65 | * | |
66 | * Type Alignment Look-ahead | |
67 | * x86 1 4 | |
68 | * PowerPC 4 0 | |
69 | * IA-64 16 0 | |
70 | * ARM 4 0 | |
71 | * ARM-Thumb 2 2 | |
72 | * SPARC 4 0 | |
73 | */ | |
74 | uint8_t buf[16]; | |
75 | } temp; | |
76 | }; | |
77 | ||
78 | #ifdef XZ_DEC_X86 | |
79 | /* | |
80 | * This is used to test the most significant byte of a memory address | |
81 | * in an x86 instruction. | |
82 | */ | |
83 | static inline int bcj_x86_test_msbyte(uint8_t b) | |
84 | { | |
85 | return b == 0x00 || b == 0xFF; | |
86 | } | |
87 | ||
88 | static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | |
89 | { | |
90 | static const bool mask_to_allowed_status[8] | |
91 | = { true, true, true, false, true, false, false, false }; | |
92 | ||
93 | static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; | |
94 | ||
95 | size_t i; | |
96 | size_t prev_pos = (size_t)-1; | |
97 | uint32_t prev_mask = s->x86_prev_mask; | |
98 | uint32_t src; | |
99 | uint32_t dest; | |
100 | uint32_t j; | |
101 | uint8_t b; | |
102 | ||
103 | if (size <= 4) | |
104 | return 0; | |
105 | ||
106 | size -= 4; | |
107 | for (i = 0; i < size; ++i) { | |
108 | if ((buf[i] & 0xFE) != 0xE8) | |
109 | continue; | |
110 | ||
111 | prev_pos = i - prev_pos; | |
112 | if (prev_pos > 3) { | |
113 | prev_mask = 0; | |
114 | } else { | |
115 | prev_mask = (prev_mask << (prev_pos - 1)) & 7; | |
116 | if (prev_mask != 0) { | |
117 | b = buf[i + 4 - mask_to_bit_num[prev_mask]]; | |
118 | if (!mask_to_allowed_status[prev_mask] | |
119 | || bcj_x86_test_msbyte(b)) { | |
120 | prev_pos = i; | |
121 | prev_mask = (prev_mask << 1) | 1; | |
122 | continue; | |
123 | } | |
124 | } | |
125 | } | |
126 | ||
127 | prev_pos = i; | |
128 | ||
129 | if (bcj_x86_test_msbyte(buf[i + 4])) { | |
130 | src = get_unaligned_le32(buf + i + 1); | |
131 | while (true) { | |
132 | dest = src - (s->pos + (uint32_t)i + 5); | |
133 | if (prev_mask == 0) | |
134 | break; | |
135 | ||
136 | j = mask_to_bit_num[prev_mask] * 8; | |
137 | b = (uint8_t)(dest >> (24 - j)); | |
138 | if (!bcj_x86_test_msbyte(b)) | |
139 | break; | |
140 | ||
141 | src = dest ^ (((uint32_t)1 << (32 - j)) - 1); | |
142 | } | |
143 | ||
144 | dest &= 0x01FFFFFF; | |
145 | dest |= (uint32_t)0 - (dest & 0x01000000); | |
146 | put_unaligned_le32(dest, buf + i + 1); | |
147 | i += 4; | |
148 | } else { | |
149 | prev_mask = (prev_mask << 1) | 1; | |
150 | } | |
151 | } | |
152 | ||
153 | prev_pos = i - prev_pos; | |
154 | s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); | |
155 | return i; | |
156 | } | |
157 | #endif | |
158 | ||
159 | #ifdef XZ_DEC_POWERPC | |
160 | static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | |
161 | { | |
162 | size_t i; | |
163 | uint32_t instr; | |
164 | ||
165 | for (i = 0; i + 4 <= size; i += 4) { | |
166 | instr = get_unaligned_be32(buf + i); | |
167 | if ((instr & 0xFC000003) == 0x48000001) { | |
168 | instr &= 0x03FFFFFC; | |
169 | instr -= s->pos + (uint32_t)i; | |
170 | instr &= 0x03FFFFFC; | |
171 | instr |= 0x48000001; | |
172 | put_unaligned_be32(instr, buf + i); | |
173 | } | |
174 | } | |
175 | ||
176 | return i; | |
177 | } | |
178 | #endif | |
179 | ||
180 | #ifdef XZ_DEC_IA64 | |
181 | static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | |
182 | { | |
183 | static const uint8_t branch_table[32] = { | |
184 | 0, 0, 0, 0, 0, 0, 0, 0, | |
185 | 0, 0, 0, 0, 0, 0, 0, 0, | |
186 | 4, 4, 6, 6, 0, 0, 7, 7, | |
187 | 4, 4, 0, 0, 4, 4, 0, 0 | |
188 | }; | |
189 | ||
190 | /* | |
191 | * The local variables take a little bit stack space, but it's less | |
192 | * than what LZMA2 decoder takes, so it doesn't make sense to reduce | |
193 | * stack usage here without doing that for the LZMA2 decoder too. | |
194 | */ | |
195 | ||
196 | /* Loop counters */ | |
197 | size_t i; | |
198 | size_t j; | |
199 | ||
200 | /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ | |
201 | uint32_t slot; | |
202 | ||
203 | /* Bitwise offset of the instruction indicated by slot */ | |
204 | uint32_t bit_pos; | |
205 | ||
206 | /* bit_pos split into byte and bit parts */ | |
207 | uint32_t byte_pos; | |
208 | uint32_t bit_res; | |
209 | ||
210 | /* Address part of an instruction */ | |
211 | uint32_t addr; | |
212 | ||
213 | /* Mask used to detect which instructions to convert */ | |
214 | uint32_t mask; | |
215 | ||
216 | /* 41-bit instruction stored somewhere in the lowest 48 bits */ | |
217 | uint64_t instr; | |
218 | ||
219 | /* Instruction normalized with bit_res for easier manipulation */ | |
220 | uint64_t norm; | |
221 | ||
222 | for (i = 0; i + 16 <= size; i += 16) { | |
223 | mask = branch_table[buf[i] & 0x1F]; | |
224 | for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { | |
225 | if (((mask >> slot) & 1) == 0) | |
226 | continue; | |
227 | ||
228 | byte_pos = bit_pos >> 3; | |
229 | bit_res = bit_pos & 7; | |
230 | instr = 0; | |
231 | for (j = 0; j < 6; ++j) | |
232 | instr |= (uint64_t)(buf[i + j + byte_pos]) | |
233 | << (8 * j); | |
234 | ||
235 | norm = instr >> bit_res; | |
236 | ||
237 | if (((norm >> 37) & 0x0F) == 0x05 | |
238 | && ((norm >> 9) & 0x07) == 0) { | |
239 | addr = (norm >> 13) & 0x0FFFFF; | |
240 | addr |= ((uint32_t)(norm >> 36) & 1) << 20; | |
241 | addr <<= 4; | |
242 | addr -= s->pos + (uint32_t)i; | |
243 | addr >>= 4; | |
244 | ||
245 | norm &= ~((uint64_t)0x8FFFFF << 13); | |
246 | norm |= (uint64_t)(addr & 0x0FFFFF) << 13; | |
247 | norm |= (uint64_t)(addr & 0x100000) | |
248 | << (36 - 20); | |
249 | ||
250 | instr &= (1 << bit_res) - 1; | |
251 | instr |= norm << bit_res; | |
252 | ||
253 | for (j = 0; j < 6; j++) | |
254 | buf[i + j + byte_pos] | |
255 | = (uint8_t)(instr >> (8 * j)); | |
256 | } | |
257 | } | |
258 | } | |
259 | ||
260 | return i; | |
261 | } | |
262 | #endif | |
263 | ||
264 | #ifdef XZ_DEC_ARM | |
265 | static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | |
266 | { | |
267 | size_t i; | |
268 | uint32_t addr; | |
269 | ||
270 | for (i = 0; i + 4 <= size; i += 4) { | |
271 | if (buf[i + 3] == 0xEB) { | |
272 | addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) | |
273 | | ((uint32_t)buf[i + 2] << 16); | |
274 | addr <<= 2; | |
275 | addr -= s->pos + (uint32_t)i + 8; | |
276 | addr >>= 2; | |
277 | buf[i] = (uint8_t)addr; | |
278 | buf[i + 1] = (uint8_t)(addr >> 8); | |
279 | buf[i + 2] = (uint8_t)(addr >> 16); | |
280 | } | |
281 | } | |
282 | ||
283 | return i; | |
284 | } | |
285 | #endif | |
286 | ||
287 | #ifdef XZ_DEC_ARMTHUMB | |
288 | static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | |
289 | { | |
290 | size_t i; | |
291 | uint32_t addr; | |
292 | ||
293 | for (i = 0; i + 4 <= size; i += 2) { | |
294 | if ((buf[i + 1] & 0xF8) == 0xF0 | |
295 | && (buf[i + 3] & 0xF8) == 0xF8) { | |
296 | addr = (((uint32_t)buf[i + 1] & 0x07) << 19) | |
297 | | ((uint32_t)buf[i] << 11) | |
298 | | (((uint32_t)buf[i + 3] & 0x07) << 8) | |
299 | | (uint32_t)buf[i + 2]; | |
300 | addr <<= 1; | |
301 | addr -= s->pos + (uint32_t)i + 4; | |
302 | addr >>= 1; | |
303 | buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); | |
304 | buf[i] = (uint8_t)(addr >> 11); | |
305 | buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); | |
306 | buf[i + 2] = (uint8_t)addr; | |
307 | i += 2; | |
308 | } | |
309 | } | |
310 | ||
311 | return i; | |
312 | } | |
313 | #endif | |
314 | ||
315 | #ifdef XZ_DEC_SPARC | |
316 | static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) | |
317 | { | |
318 | size_t i; | |
319 | uint32_t instr; | |
320 | ||
321 | for (i = 0; i + 4 <= size; i += 4) { | |
322 | instr = get_unaligned_be32(buf + i); | |
323 | if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { | |
324 | instr <<= 2; | |
325 | instr -= s->pos + (uint32_t)i; | |
326 | instr >>= 2; | |
327 | instr = ((uint32_t)0x40000000 - (instr & 0x400000)) | |
328 | | 0x40000000 | (instr & 0x3FFFFF); | |
329 | put_unaligned_be32(instr, buf + i); | |
330 | } | |
331 | } | |
332 | ||
333 | return i; | |
334 | } | |
335 | #endif | |
336 | ||
337 | /* | |
338 | * Apply the selected BCJ filter. Update *pos and s->pos to match the amount | |
339 | * of data that got filtered. | |
340 | * | |
341 | * NOTE: This is implemented as a switch statement to avoid using function | |
342 | * pointers, which could be problematic in the kernel boot code, which must | |
343 | * avoid pointers to static data (at least on x86). | |
344 | */ | |
345 | static void bcj_apply(struct xz_dec_bcj *s, | |
346 | uint8_t *buf, size_t *pos, size_t size) | |
347 | { | |
348 | size_t filtered; | |
349 | ||
350 | buf += *pos; | |
351 | size -= *pos; | |
352 | ||
353 | switch (s->type) { | |
354 | #ifdef XZ_DEC_X86 | |
355 | case BCJ_X86: | |
356 | filtered = bcj_x86(s, buf, size); | |
357 | break; | |
358 | #endif | |
359 | #ifdef XZ_DEC_POWERPC | |
360 | case BCJ_POWERPC: | |
361 | filtered = bcj_powerpc(s, buf, size); | |
362 | break; | |
363 | #endif | |
364 | #ifdef XZ_DEC_IA64 | |
365 | case BCJ_IA64: | |
366 | filtered = bcj_ia64(s, buf, size); | |
367 | break; | |
368 | #endif | |
369 | #ifdef XZ_DEC_ARM | |
370 | case BCJ_ARM: | |
371 | filtered = bcj_arm(s, buf, size); | |
372 | break; | |
373 | #endif | |
374 | #ifdef XZ_DEC_ARMTHUMB | |
375 | case BCJ_ARMTHUMB: | |
376 | filtered = bcj_armthumb(s, buf, size); | |
377 | break; | |
378 | #endif | |
379 | #ifdef XZ_DEC_SPARC | |
380 | case BCJ_SPARC: | |
381 | filtered = bcj_sparc(s, buf, size); | |
382 | break; | |
383 | #endif | |
384 | default: | |
385 | /* Never reached but silence compiler warnings. */ | |
386 | filtered = 0; | |
387 | break; | |
388 | } | |
389 | ||
390 | *pos += filtered; | |
391 | s->pos += filtered; | |
392 | } | |
393 | ||
394 | /* | |
395 | * Flush pending filtered data from temp to the output buffer. | |
396 | * Move the remaining mixture of possibly filtered and unfiltered | |
397 | * data to the beginning of temp. | |
398 | */ | |
399 | static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) | |
400 | { | |
401 | size_t copy_size; | |
402 | ||
403 | copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); | |
404 | memcpy(b->out + b->out_pos, s->temp.buf, copy_size); | |
405 | b->out_pos += copy_size; | |
406 | ||
407 | s->temp.filtered -= copy_size; | |
408 | s->temp.size -= copy_size; | |
409 | memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); | |
410 | } | |
411 | ||
412 | /* | |
413 | * The BCJ filter functions are primitive in sense that they process the | |
414 | * data in chunks of 1-16 bytes. To hide this issue, this function does | |
415 | * some buffering. | |
416 | */ | |
417 | XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, | |
418 | struct xz_dec_lzma2 *lzma2, | |
419 | struct xz_buf *b) | |
420 | { | |
421 | size_t out_start; | |
422 | ||
423 | /* | |
424 | * Flush pending already filtered data to the output buffer. Return | |
425 | * immediatelly if we couldn't flush everything, or if the next | |
426 | * filter in the chain had already returned XZ_STREAM_END. | |
427 | */ | |
428 | if (s->temp.filtered > 0) { | |
429 | bcj_flush(s, b); | |
430 | if (s->temp.filtered > 0) | |
431 | return XZ_OK; | |
432 | ||
433 | if (s->ret == XZ_STREAM_END) | |
434 | return XZ_STREAM_END; | |
435 | } | |
436 | ||
437 | /* | |
438 | * If we have more output space than what is currently pending in | |
439 | * temp, copy the unfiltered data from temp to the output buffer | |
440 | * and try to fill the output buffer by decoding more data from the | |
441 | * next filter in the chain. Apply the BCJ filter on the new data | |
442 | * in the output buffer. If everything cannot be filtered, copy it | |
443 | * to temp and rewind the output buffer position accordingly. | |
9c1f8594 LC |
444 | * |
445 | * This needs to be always run when temp.size == 0 to handle a special | |
446 | * case where the output buffer is full and the next filter has no | |
447 | * more output coming but hasn't returned XZ_STREAM_END yet. | |
24fa0402 | 448 | */ |
9c1f8594 | 449 | if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) { |
24fa0402 LC |
450 | out_start = b->out_pos; |
451 | memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); | |
452 | b->out_pos += s->temp.size; | |
453 | ||
454 | s->ret = xz_dec_lzma2_run(lzma2, b); | |
455 | if (s->ret != XZ_STREAM_END | |
456 | && (s->ret != XZ_OK || s->single_call)) | |
457 | return s->ret; | |
458 | ||
459 | bcj_apply(s, b->out, &out_start, b->out_pos); | |
460 | ||
461 | /* | |
462 | * As an exception, if the next filter returned XZ_STREAM_END, | |
463 | * we can do that too, since the last few bytes that remain | |
464 | * unfiltered are meant to remain unfiltered. | |
465 | */ | |
466 | if (s->ret == XZ_STREAM_END) | |
467 | return XZ_STREAM_END; | |
468 | ||
469 | s->temp.size = b->out_pos - out_start; | |
470 | b->out_pos -= s->temp.size; | |
471 | memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); | |
9c1f8594 LC |
472 | |
473 | /* | |
474 | * If there wasn't enough input to the next filter to fill | |
475 | * the output buffer with unfiltered data, there's no point | |
476 | * to try decoding more data to temp. | |
477 | */ | |
478 | if (b->out_pos + s->temp.size < b->out_size) | |
479 | return XZ_OK; | |
24fa0402 LC |
480 | } |
481 | ||
482 | /* | |
9c1f8594 LC |
483 | * We have unfiltered data in temp. If the output buffer isn't full |
484 | * yet, try to fill the temp buffer by decoding more data from the | |
485 | * next filter. Apply the BCJ filter on temp. Then we hopefully can | |
486 | * fill the actual output buffer by copying filtered data from temp. | |
487 | * A mix of filtered and unfiltered data may be left in temp; it will | |
488 | * be taken care on the next call to this function. | |
24fa0402 | 489 | */ |
9c1f8594 | 490 | if (b->out_pos < b->out_size) { |
24fa0402 LC |
491 | /* Make b->out{,_pos,_size} temporarily point to s->temp. */ |
492 | s->out = b->out; | |
493 | s->out_pos = b->out_pos; | |
494 | s->out_size = b->out_size; | |
495 | b->out = s->temp.buf; | |
496 | b->out_pos = s->temp.size; | |
497 | b->out_size = sizeof(s->temp.buf); | |
498 | ||
499 | s->ret = xz_dec_lzma2_run(lzma2, b); | |
500 | ||
501 | s->temp.size = b->out_pos; | |
502 | b->out = s->out; | |
503 | b->out_pos = s->out_pos; | |
504 | b->out_size = s->out_size; | |
505 | ||
506 | if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) | |
507 | return s->ret; | |
508 | ||
509 | bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); | |
510 | ||
511 | /* | |
512 | * If the next filter returned XZ_STREAM_END, we mark that | |
513 | * everything is filtered, since the last unfiltered bytes | |
514 | * of the stream are meant to be left as is. | |
515 | */ | |
516 | if (s->ret == XZ_STREAM_END) | |
517 | s->temp.filtered = s->temp.size; | |
518 | ||
519 | bcj_flush(s, b); | |
520 | if (s->temp.filtered > 0) | |
521 | return XZ_OK; | |
522 | } | |
523 | ||
524 | return s->ret; | |
525 | } | |
526 | ||
527 | XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) | |
528 | { | |
529 | struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); | |
530 | if (s != NULL) | |
531 | s->single_call = single_call; | |
532 | ||
533 | return s; | |
534 | } | |
535 | ||
536 | XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) | |
537 | { | |
538 | switch (id) { | |
539 | #ifdef XZ_DEC_X86 | |
540 | case BCJ_X86: | |
541 | #endif | |
542 | #ifdef XZ_DEC_POWERPC | |
543 | case BCJ_POWERPC: | |
544 | #endif | |
545 | #ifdef XZ_DEC_IA64 | |
546 | case BCJ_IA64: | |
547 | #endif | |
548 | #ifdef XZ_DEC_ARM | |
549 | case BCJ_ARM: | |
550 | #endif | |
551 | #ifdef XZ_DEC_ARMTHUMB | |
552 | case BCJ_ARMTHUMB: | |
553 | #endif | |
554 | #ifdef XZ_DEC_SPARC | |
555 | case BCJ_SPARC: | |
556 | #endif | |
557 | break; | |
558 | ||
559 | default: | |
560 | /* Unsupported Filter ID */ | |
561 | return XZ_OPTIONS_ERROR; | |
562 | } | |
563 | ||
564 | s->type = id; | |
565 | s->ret = XZ_OK; | |
566 | s->pos = 0; | |
567 | s->x86_prev_mask = 0; | |
568 | s->temp.filtered = 0; | |
569 | s->temp.size = 0; | |
570 | ||
571 | return XZ_OK; | |
572 | } | |
573 | ||
574 | #endif |