Loading...
1/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12
13/*
14 * The rest of the file is inside this ifdef. It makes things a little more
15 * convenient when building without support for any BCJ filters.
16 */
17#ifdef XZ_DEC_BCJ
18
19struct xz_dec_bcj {
20 /* Type of the BCJ filter being used */
21 enum {
22 BCJ_X86 = 4, /* x86 or x86-64 */
23 BCJ_POWERPC = 5, /* Big endian only */
24 BCJ_IA64 = 6, /* Big or little endian */
25 BCJ_ARM = 7, /* Little endian only */
26 BCJ_ARMTHUMB = 8, /* Little endian only */
27 BCJ_SPARC = 9 /* Big or little endian */
28 } type;
29
30 /*
31 * Return value of the next filter in the chain. We need to preserve
32 * this information across calls, because we must not call the next
33 * filter anymore once it has returned XZ_STREAM_END.
34 */
35 enum xz_ret ret;
36
37 /* True if we are operating in single-call mode. */
38 bool single_call;
39
40 /*
41 * Absolute position relative to the beginning of the uncompressed
42 * data (in a single .xz Block). We care only about the lowest 32
43 * bits so this doesn't need to be uint64_t even with big files.
44 */
45 uint32_t pos;
46
47 /* x86 filter state */
48 uint32_t x86_prev_mask;
49
50 /* Temporary space to hold the variables from struct xz_buf */
51 uint8_t *out;
52 size_t out_pos;
53 size_t out_size;
54
55 struct {
56 /* Amount of already filtered data in the beginning of buf */
57 size_t filtered;
58
59 /* Total amount of data currently stored in buf */
60 size_t size;
61
62 /*
63 * Buffer to hold a mix of filtered and unfiltered data. This
64 * needs to be big enough to hold Alignment + 2 * Look-ahead:
65 *
66 * Type Alignment Look-ahead
67 * x86 1 4
68 * PowerPC 4 0
69 * IA-64 16 0
70 * ARM 4 0
71 * ARM-Thumb 2 2
72 * SPARC 4 0
73 */
74 uint8_t buf[16];
75 } temp;
76};
77
78#ifdef XZ_DEC_X86
79/*
80 * This is used to test the most significant byte of a memory address
81 * in an x86 instruction.
82 */
83static inline int bcj_x86_test_msbyte(uint8_t b)
84{
85 return b == 0x00 || b == 0xFF;
86}
87
88static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
89{
90 static const bool mask_to_allowed_status[8]
91 = { true, true, true, false, true, false, false, false };
92
93 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
94
95 size_t i;
96 size_t prev_pos = (size_t)-1;
97 uint32_t prev_mask = s->x86_prev_mask;
98 uint32_t src;
99 uint32_t dest;
100 uint32_t j;
101 uint8_t b;
102
103 if (size <= 4)
104 return 0;
105
106 size -= 4;
107 for (i = 0; i < size; ++i) {
108 if ((buf[i] & 0xFE) != 0xE8)
109 continue;
110
111 prev_pos = i - prev_pos;
112 if (prev_pos > 3) {
113 prev_mask = 0;
114 } else {
115 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116 if (prev_mask != 0) {
117 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118 if (!mask_to_allowed_status[prev_mask]
119 || bcj_x86_test_msbyte(b)) {
120 prev_pos = i;
121 prev_mask = (prev_mask << 1) | 1;
122 continue;
123 }
124 }
125 }
126
127 prev_pos = i;
128
129 if (bcj_x86_test_msbyte(buf[i + 4])) {
130 src = get_unaligned_le32(buf + i + 1);
131 while (true) {
132 dest = src - (s->pos + (uint32_t)i + 5);
133 if (prev_mask == 0)
134 break;
135
136 j = mask_to_bit_num[prev_mask] * 8;
137 b = (uint8_t)(dest >> (24 - j));
138 if (!bcj_x86_test_msbyte(b))
139 break;
140
141 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
142 }
143
144 dest &= 0x01FFFFFF;
145 dest |= (uint32_t)0 - (dest & 0x01000000);
146 put_unaligned_le32(dest, buf + i + 1);
147 i += 4;
148 } else {
149 prev_mask = (prev_mask << 1) | 1;
150 }
151 }
152
153 prev_pos = i - prev_pos;
154 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
155 return i;
156}
157#endif
158
159#ifdef XZ_DEC_POWERPC
160static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
161{
162 size_t i;
163 uint32_t instr;
164
165 for (i = 0; i + 4 <= size; i += 4) {
166 instr = get_unaligned_be32(buf + i);
167 if ((instr & 0xFC000003) == 0x48000001) {
168 instr &= 0x03FFFFFC;
169 instr -= s->pos + (uint32_t)i;
170 instr &= 0x03FFFFFC;
171 instr |= 0x48000001;
172 put_unaligned_be32(instr, buf + i);
173 }
174 }
175
176 return i;
177}
178#endif
179
180#ifdef XZ_DEC_IA64
181static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182{
183 static const uint8_t branch_table[32] = {
184 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 0, 0, 0, 0, 0, 0, 0,
186 4, 4, 6, 6, 0, 0, 7, 7,
187 4, 4, 0, 0, 4, 4, 0, 0
188 };
189
190 /*
191 * The local variables take a little bit stack space, but it's less
192 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193 * stack usage here without doing that for the LZMA2 decoder too.
194 */
195
196 /* Loop counters */
197 size_t i;
198 size_t j;
199
200 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201 uint32_t slot;
202
203 /* Bitwise offset of the instruction indicated by slot */
204 uint32_t bit_pos;
205
206 /* bit_pos split into byte and bit parts */
207 uint32_t byte_pos;
208 uint32_t bit_res;
209
210 /* Address part of an instruction */
211 uint32_t addr;
212
213 /* Mask used to detect which instructions to convert */
214 uint32_t mask;
215
216 /* 41-bit instruction stored somewhere in the lowest 48 bits */
217 uint64_t instr;
218
219 /* Instruction normalized with bit_res for easier manipulation */
220 uint64_t norm;
221
222 for (i = 0; i + 16 <= size; i += 16) {
223 mask = branch_table[buf[i] & 0x1F];
224 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225 if (((mask >> slot) & 1) == 0)
226 continue;
227
228 byte_pos = bit_pos >> 3;
229 bit_res = bit_pos & 7;
230 instr = 0;
231 for (j = 0; j < 6; ++j)
232 instr |= (uint64_t)(buf[i + j + byte_pos])
233 << (8 * j);
234
235 norm = instr >> bit_res;
236
237 if (((norm >> 37) & 0x0F) == 0x05
238 && ((norm >> 9) & 0x07) == 0) {
239 addr = (norm >> 13) & 0x0FFFFF;
240 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241 addr <<= 4;
242 addr -= s->pos + (uint32_t)i;
243 addr >>= 4;
244
245 norm &= ~((uint64_t)0x8FFFFF << 13);
246 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247 norm |= (uint64_t)(addr & 0x100000)
248 << (36 - 20);
249
250 instr &= (1 << bit_res) - 1;
251 instr |= norm << bit_res;
252
253 for (j = 0; j < 6; j++)
254 buf[i + j + byte_pos]
255 = (uint8_t)(instr >> (8 * j));
256 }
257 }
258 }
259
260 return i;
261}
262#endif
263
264#ifdef XZ_DEC_ARM
265static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
266{
267 size_t i;
268 uint32_t addr;
269
270 for (i = 0; i + 4 <= size; i += 4) {
271 if (buf[i + 3] == 0xEB) {
272 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
273 | ((uint32_t)buf[i + 2] << 16);
274 addr <<= 2;
275 addr -= s->pos + (uint32_t)i + 8;
276 addr >>= 2;
277 buf[i] = (uint8_t)addr;
278 buf[i + 1] = (uint8_t)(addr >> 8);
279 buf[i + 2] = (uint8_t)(addr >> 16);
280 }
281 }
282
283 return i;
284}
285#endif
286
287#ifdef XZ_DEC_ARMTHUMB
288static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
289{
290 size_t i;
291 uint32_t addr;
292
293 for (i = 0; i + 4 <= size; i += 2) {
294 if ((buf[i + 1] & 0xF8) == 0xF0
295 && (buf[i + 3] & 0xF8) == 0xF8) {
296 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
297 | ((uint32_t)buf[i] << 11)
298 | (((uint32_t)buf[i + 3] & 0x07) << 8)
299 | (uint32_t)buf[i + 2];
300 addr <<= 1;
301 addr -= s->pos + (uint32_t)i + 4;
302 addr >>= 1;
303 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
304 buf[i] = (uint8_t)(addr >> 11);
305 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
306 buf[i + 2] = (uint8_t)addr;
307 i += 2;
308 }
309 }
310
311 return i;
312}
313#endif
314
315#ifdef XZ_DEC_SPARC
316static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
317{
318 size_t i;
319 uint32_t instr;
320
321 for (i = 0; i + 4 <= size; i += 4) {
322 instr = get_unaligned_be32(buf + i);
323 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
324 instr <<= 2;
325 instr -= s->pos + (uint32_t)i;
326 instr >>= 2;
327 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
328 | 0x40000000 | (instr & 0x3FFFFF);
329 put_unaligned_be32(instr, buf + i);
330 }
331 }
332
333 return i;
334}
335#endif
336
337/*
338 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
339 * of data that got filtered.
340 *
341 * NOTE: This is implemented as a switch statement to avoid using function
342 * pointers, which could be problematic in the kernel boot code, which must
343 * avoid pointers to static data (at least on x86).
344 */
345static void bcj_apply(struct xz_dec_bcj *s,
346 uint8_t *buf, size_t *pos, size_t size)
347{
348 size_t filtered;
349
350 buf += *pos;
351 size -= *pos;
352
353 switch (s->type) {
354#ifdef XZ_DEC_X86
355 case BCJ_X86:
356 filtered = bcj_x86(s, buf, size);
357 break;
358#endif
359#ifdef XZ_DEC_POWERPC
360 case BCJ_POWERPC:
361 filtered = bcj_powerpc(s, buf, size);
362 break;
363#endif
364#ifdef XZ_DEC_IA64
365 case BCJ_IA64:
366 filtered = bcj_ia64(s, buf, size);
367 break;
368#endif
369#ifdef XZ_DEC_ARM
370 case BCJ_ARM:
371 filtered = bcj_arm(s, buf, size);
372 break;
373#endif
374#ifdef XZ_DEC_ARMTHUMB
375 case BCJ_ARMTHUMB:
376 filtered = bcj_armthumb(s, buf, size);
377 break;
378#endif
379#ifdef XZ_DEC_SPARC
380 case BCJ_SPARC:
381 filtered = bcj_sparc(s, buf, size);
382 break;
383#endif
384 default:
385 /* Never reached but silence compiler warnings. */
386 filtered = 0;
387 break;
388 }
389
390 *pos += filtered;
391 s->pos += filtered;
392}
393
394/*
395 * Flush pending filtered data from temp to the output buffer.
396 * Move the remaining mixture of possibly filtered and unfiltered
397 * data to the beginning of temp.
398 */
399static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400{
401 size_t copy_size;
402
403 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405 b->out_pos += copy_size;
406
407 s->temp.filtered -= copy_size;
408 s->temp.size -= copy_size;
409 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410}
411
412/*
413 * The BCJ filter functions are primitive in sense that they process the
414 * data in chunks of 1-16 bytes. To hide this issue, this function does
415 * some buffering.
416 */
417XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
418 struct xz_dec_lzma2 *lzma2,
419 struct xz_buf *b)
420{
421 size_t out_start;
422
423 /*
424 * Flush pending already filtered data to the output buffer. Return
425 * immediatelly if we couldn't flush everything, or if the next
426 * filter in the chain had already returned XZ_STREAM_END.
427 */
428 if (s->temp.filtered > 0) {
429 bcj_flush(s, b);
430 if (s->temp.filtered > 0)
431 return XZ_OK;
432
433 if (s->ret == XZ_STREAM_END)
434 return XZ_STREAM_END;
435 }
436
437 /*
438 * If we have more output space than what is currently pending in
439 * temp, copy the unfiltered data from temp to the output buffer
440 * and try to fill the output buffer by decoding more data from the
441 * next filter in the chain. Apply the BCJ filter on the new data
442 * in the output buffer. If everything cannot be filtered, copy it
443 * to temp and rewind the output buffer position accordingly.
444 *
445 * This needs to be always run when temp.size == 0 to handle a special
446 * case where the output buffer is full and the next filter has no
447 * more output coming but hasn't returned XZ_STREAM_END yet.
448 */
449 if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
450 out_start = b->out_pos;
451 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
452 b->out_pos += s->temp.size;
453
454 s->ret = xz_dec_lzma2_run(lzma2, b);
455 if (s->ret != XZ_STREAM_END
456 && (s->ret != XZ_OK || s->single_call))
457 return s->ret;
458
459 bcj_apply(s, b->out, &out_start, b->out_pos);
460
461 /*
462 * As an exception, if the next filter returned XZ_STREAM_END,
463 * we can do that too, since the last few bytes that remain
464 * unfiltered are meant to remain unfiltered.
465 */
466 if (s->ret == XZ_STREAM_END)
467 return XZ_STREAM_END;
468
469 s->temp.size = b->out_pos - out_start;
470 b->out_pos -= s->temp.size;
471 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
472
473 /*
474 * If there wasn't enough input to the next filter to fill
475 * the output buffer with unfiltered data, there's no point
476 * to try decoding more data to temp.
477 */
478 if (b->out_pos + s->temp.size < b->out_size)
479 return XZ_OK;
480 }
481
482 /*
483 * We have unfiltered data in temp. If the output buffer isn't full
484 * yet, try to fill the temp buffer by decoding more data from the
485 * next filter. Apply the BCJ filter on temp. Then we hopefully can
486 * fill the actual output buffer by copying filtered data from temp.
487 * A mix of filtered and unfiltered data may be left in temp; it will
488 * be taken care on the next call to this function.
489 */
490 if (b->out_pos < b->out_size) {
491 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
492 s->out = b->out;
493 s->out_pos = b->out_pos;
494 s->out_size = b->out_size;
495 b->out = s->temp.buf;
496 b->out_pos = s->temp.size;
497 b->out_size = sizeof(s->temp.buf);
498
499 s->ret = xz_dec_lzma2_run(lzma2, b);
500
501 s->temp.size = b->out_pos;
502 b->out = s->out;
503 b->out_pos = s->out_pos;
504 b->out_size = s->out_size;
505
506 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
507 return s->ret;
508
509 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
510
511 /*
512 * If the next filter returned XZ_STREAM_END, we mark that
513 * everything is filtered, since the last unfiltered bytes
514 * of the stream are meant to be left as is.
515 */
516 if (s->ret == XZ_STREAM_END)
517 s->temp.filtered = s->temp.size;
518
519 bcj_flush(s, b);
520 if (s->temp.filtered > 0)
521 return XZ_OK;
522 }
523
524 return s->ret;
525}
526
527XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
528{
529 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
530 if (s != NULL)
531 s->single_call = single_call;
532
533 return s;
534}
535
536XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
537{
538 switch (id) {
539#ifdef XZ_DEC_X86
540 case BCJ_X86:
541#endif
542#ifdef XZ_DEC_POWERPC
543 case BCJ_POWERPC:
544#endif
545#ifdef XZ_DEC_IA64
546 case BCJ_IA64:
547#endif
548#ifdef XZ_DEC_ARM
549 case BCJ_ARM:
550#endif
551#ifdef XZ_DEC_ARMTHUMB
552 case BCJ_ARMTHUMB:
553#endif
554#ifdef XZ_DEC_SPARC
555 case BCJ_SPARC:
556#endif
557 break;
558
559 default:
560 /* Unsupported Filter ID */
561 return XZ_OPTIONS_ERROR;
562 }
563
564 s->type = id;
565 s->ret = XZ_OK;
566 s->pos = 0;
567 s->x86_prev_mask = 0;
568 s->temp.filtered = 0;
569 s->temp.size = 0;
570
571 return XZ_OK;
572}
573
574#endif
1// SPDX-License-Identifier: 0BSD
2
3/*
4 * Branch/Call/Jump (BCJ) filter decoders
5 *
6 * Authors: Lasse Collin <lasse.collin@tukaani.org>
7 * Igor Pavlov <https://7-zip.org/>
8 */
9
10#include "xz_private.h"
11
12/*
13 * The rest of the file is inside this ifdef. It makes things a little more
14 * convenient when building without support for any BCJ filters.
15 */
16#ifdef XZ_DEC_BCJ
17
18struct xz_dec_bcj {
19 /* Type of the BCJ filter being used */
20 enum {
21 BCJ_X86 = 4, /* x86 or x86-64 */
22 BCJ_POWERPC = 5, /* Big endian only */
23 BCJ_IA64 = 6, /* Big or little endian */
24 BCJ_ARM = 7, /* Little endian only */
25 BCJ_ARMTHUMB = 8, /* Little endian only */
26 BCJ_SPARC = 9, /* Big or little endian */
27 BCJ_ARM64 = 10, /* AArch64 */
28 BCJ_RISCV = 11 /* RV32GQC_Zfh, RV64GQC_Zfh */
29 } type;
30
31 /*
32 * Return value of the next filter in the chain. We need to preserve
33 * this information across calls, because we must not call the next
34 * filter anymore once it has returned XZ_STREAM_END.
35 */
36 enum xz_ret ret;
37
38 /* True if we are operating in single-call mode. */
39 bool single_call;
40
41 /*
42 * Absolute position relative to the beginning of the uncompressed
43 * data (in a single .xz Block). We care only about the lowest 32
44 * bits so this doesn't need to be uint64_t even with big files.
45 */
46 uint32_t pos;
47
48 /* x86 filter state */
49 uint32_t x86_prev_mask;
50
51 /* Temporary space to hold the variables from struct xz_buf */
52 uint8_t *out;
53 size_t out_pos;
54 size_t out_size;
55
56 struct {
57 /* Amount of already filtered data in the beginning of buf */
58 size_t filtered;
59
60 /* Total amount of data currently stored in buf */
61 size_t size;
62
63 /*
64 * Buffer to hold a mix of filtered and unfiltered data. This
65 * needs to be big enough to hold Alignment + 2 * Look-ahead:
66 *
67 * Type Alignment Look-ahead
68 * x86 1 4
69 * PowerPC 4 0
70 * IA-64 16 0
71 * ARM 4 0
72 * ARM-Thumb 2 2
73 * SPARC 4 0
74 */
75 uint8_t buf[16];
76 } temp;
77};
78
79#ifdef XZ_DEC_X86
80/*
81 * This is used to test the most significant byte of a memory address
82 * in an x86 instruction.
83 */
84static inline int bcj_x86_test_msbyte(uint8_t b)
85{
86 return b == 0x00 || b == 0xFF;
87}
88
89static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
90{
91 static const bool mask_to_allowed_status[8]
92 = { true, true, true, false, true, false, false, false };
93
94 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
95
96 size_t i;
97 size_t prev_pos = (size_t)-1;
98 uint32_t prev_mask = s->x86_prev_mask;
99 uint32_t src;
100 uint32_t dest;
101 uint32_t j;
102 uint8_t b;
103
104 if (size <= 4)
105 return 0;
106
107 size -= 4;
108 for (i = 0; i < size; ++i) {
109 if ((buf[i] & 0xFE) != 0xE8)
110 continue;
111
112 prev_pos = i - prev_pos;
113 if (prev_pos > 3) {
114 prev_mask = 0;
115 } else {
116 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
117 if (prev_mask != 0) {
118 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
119 if (!mask_to_allowed_status[prev_mask]
120 || bcj_x86_test_msbyte(b)) {
121 prev_pos = i;
122 prev_mask = (prev_mask << 1) | 1;
123 continue;
124 }
125 }
126 }
127
128 prev_pos = i;
129
130 if (bcj_x86_test_msbyte(buf[i + 4])) {
131 src = get_unaligned_le32(buf + i + 1);
132 while (true) {
133 dest = src - (s->pos + (uint32_t)i + 5);
134 if (prev_mask == 0)
135 break;
136
137 j = mask_to_bit_num[prev_mask] * 8;
138 b = (uint8_t)(dest >> (24 - j));
139 if (!bcj_x86_test_msbyte(b))
140 break;
141
142 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
143 }
144
145 dest &= 0x01FFFFFF;
146 dest |= (uint32_t)0 - (dest & 0x01000000);
147 put_unaligned_le32(dest, buf + i + 1);
148 i += 4;
149 } else {
150 prev_mask = (prev_mask << 1) | 1;
151 }
152 }
153
154 prev_pos = i - prev_pos;
155 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
156 return i;
157}
158#endif
159
160#ifdef XZ_DEC_POWERPC
161static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
162{
163 size_t i;
164 uint32_t instr;
165
166 size &= ~(size_t)3;
167
168 for (i = 0; i < size; i += 4) {
169 instr = get_unaligned_be32(buf + i);
170 if ((instr & 0xFC000003) == 0x48000001) {
171 instr &= 0x03FFFFFC;
172 instr -= s->pos + (uint32_t)i;
173 instr &= 0x03FFFFFC;
174 instr |= 0x48000001;
175 put_unaligned_be32(instr, buf + i);
176 }
177 }
178
179 return i;
180}
181#endif
182
183#ifdef XZ_DEC_IA64
184static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
185{
186 static const uint8_t branch_table[32] = {
187 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0,
189 4, 4, 6, 6, 0, 0, 7, 7,
190 4, 4, 0, 0, 4, 4, 0, 0
191 };
192
193 /*
194 * The local variables take a little bit stack space, but it's less
195 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
196 * stack usage here without doing that for the LZMA2 decoder too.
197 */
198
199 /* Loop counters */
200 size_t i;
201 size_t j;
202
203 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
204 uint32_t slot;
205
206 /* Bitwise offset of the instruction indicated by slot */
207 uint32_t bit_pos;
208
209 /* bit_pos split into byte and bit parts */
210 uint32_t byte_pos;
211 uint32_t bit_res;
212
213 /* Address part of an instruction */
214 uint32_t addr;
215
216 /* Mask used to detect which instructions to convert */
217 uint32_t mask;
218
219 /* 41-bit instruction stored somewhere in the lowest 48 bits */
220 uint64_t instr;
221
222 /* Instruction normalized with bit_res for easier manipulation */
223 uint64_t norm;
224
225 size &= ~(size_t)15;
226
227 for (i = 0; i < size; i += 16) {
228 mask = branch_table[buf[i] & 0x1F];
229 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
230 if (((mask >> slot) & 1) == 0)
231 continue;
232
233 byte_pos = bit_pos >> 3;
234 bit_res = bit_pos & 7;
235 instr = 0;
236 for (j = 0; j < 6; ++j)
237 instr |= (uint64_t)(buf[i + j + byte_pos])
238 << (8 * j);
239
240 norm = instr >> bit_res;
241
242 if (((norm >> 37) & 0x0F) == 0x05
243 && ((norm >> 9) & 0x07) == 0) {
244 addr = (norm >> 13) & 0x0FFFFF;
245 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
246 addr <<= 4;
247 addr -= s->pos + (uint32_t)i;
248 addr >>= 4;
249
250 norm &= ~((uint64_t)0x8FFFFF << 13);
251 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
252 norm |= (uint64_t)(addr & 0x100000)
253 << (36 - 20);
254
255 instr &= (1 << bit_res) - 1;
256 instr |= norm << bit_res;
257
258 for (j = 0; j < 6; j++)
259 buf[i + j + byte_pos]
260 = (uint8_t)(instr >> (8 * j));
261 }
262 }
263 }
264
265 return i;
266}
267#endif
268
269#ifdef XZ_DEC_ARM
270static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
271{
272 size_t i;
273 uint32_t addr;
274
275 size &= ~(size_t)3;
276
277 for (i = 0; i < size; i += 4) {
278 if (buf[i + 3] == 0xEB) {
279 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
280 | ((uint32_t)buf[i + 2] << 16);
281 addr <<= 2;
282 addr -= s->pos + (uint32_t)i + 8;
283 addr >>= 2;
284 buf[i] = (uint8_t)addr;
285 buf[i + 1] = (uint8_t)(addr >> 8);
286 buf[i + 2] = (uint8_t)(addr >> 16);
287 }
288 }
289
290 return i;
291}
292#endif
293
294#ifdef XZ_DEC_ARMTHUMB
295static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
296{
297 size_t i;
298 uint32_t addr;
299
300 if (size < 4)
301 return 0;
302
303 size -= 4;
304
305 for (i = 0; i <= size; i += 2) {
306 if ((buf[i + 1] & 0xF8) == 0xF0
307 && (buf[i + 3] & 0xF8) == 0xF8) {
308 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
309 | ((uint32_t)buf[i] << 11)
310 | (((uint32_t)buf[i + 3] & 0x07) << 8)
311 | (uint32_t)buf[i + 2];
312 addr <<= 1;
313 addr -= s->pos + (uint32_t)i + 4;
314 addr >>= 1;
315 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
316 buf[i] = (uint8_t)(addr >> 11);
317 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
318 buf[i + 2] = (uint8_t)addr;
319 i += 2;
320 }
321 }
322
323 return i;
324}
325#endif
326
327#ifdef XZ_DEC_SPARC
328static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
329{
330 size_t i;
331 uint32_t instr;
332
333 size &= ~(size_t)3;
334
335 for (i = 0; i < size; i += 4) {
336 instr = get_unaligned_be32(buf + i);
337 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
338 instr <<= 2;
339 instr -= s->pos + (uint32_t)i;
340 instr >>= 2;
341 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
342 | 0x40000000 | (instr & 0x3FFFFF);
343 put_unaligned_be32(instr, buf + i);
344 }
345 }
346
347 return i;
348}
349#endif
350
351#ifdef XZ_DEC_ARM64
352static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
353{
354 size_t i;
355 uint32_t instr;
356 uint32_t addr;
357
358 size &= ~(size_t)3;
359
360 for (i = 0; i < size; i += 4) {
361 instr = get_unaligned_le32(buf + i);
362
363 if ((instr >> 26) == 0x25) {
364 /* BL instruction */
365 addr = instr - ((s->pos + (uint32_t)i) >> 2);
366 instr = 0x94000000 | (addr & 0x03FFFFFF);
367 put_unaligned_le32(instr, buf + i);
368
369 } else if ((instr & 0x9F000000) == 0x90000000) {
370 /* ADRP instruction */
371 addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC);
372
373 /* Only convert values in the range +/-512 MiB. */
374 if ((addr + 0x020000) & 0x1C0000)
375 continue;
376
377 addr -= (s->pos + (uint32_t)i) >> 12;
378
379 instr &= 0x9000001F;
380 instr |= (addr & 3) << 29;
381 instr |= (addr & 0x03FFFC) << 3;
382 instr |= (0U - (addr & 0x020000)) & 0xE00000;
383
384 put_unaligned_le32(instr, buf + i);
385 }
386 }
387
388 return i;
389}
390#endif
391
392#ifdef XZ_DEC_RISCV
393static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
394{
395 size_t i;
396 uint32_t b1;
397 uint32_t b2;
398 uint32_t b3;
399 uint32_t instr;
400 uint32_t instr2;
401 uint32_t instr2_rs1;
402 uint32_t addr;
403
404 if (size < 8)
405 return 0;
406
407 size -= 8;
408
409 for (i = 0; i <= size; i += 2) {
410 instr = buf[i];
411
412 if (instr == 0xEF) {
413 /* JAL */
414 b1 = buf[i + 1];
415 if ((b1 & 0x0D) != 0)
416 continue;
417
418 b2 = buf[i + 2];
419 b3 = buf[i + 3];
420
421 addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1);
422 addr -= s->pos + (uint32_t)i;
423
424 buf[i + 1] = (uint8_t)((b1 & 0x0F)
425 | ((addr >> 8) & 0xF0));
426
427 buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F)
428 | ((addr >> 7) & 0x10)
429 | ((addr << 4) & 0xE0));
430
431 buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F)
432 | ((addr >> 13) & 0x80));
433
434 i += 4 - 2;
435
436 } else if ((instr & 0x7F) == 0x17) {
437 /* AUIPC */
438 instr |= (uint32_t)buf[i + 1] << 8;
439 instr |= (uint32_t)buf[i + 2] << 16;
440 instr |= (uint32_t)buf[i + 3] << 24;
441
442 if (instr & 0xE80) {
443 /* AUIPC's rd doesn't equal x0 or x2. */
444 instr2 = get_unaligned_le32(buf + i + 4);
445
446 if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) {
447 i += 6 - 2;
448 continue;
449 }
450
451 addr = (instr & 0xFFFFF000) + (instr2 >> 20);
452
453 instr = 0x17 | (2 << 7) | (instr2 << 12);
454 instr2 = addr;
455 } else {
456 /* AUIPC's rd equals x0 or x2. */
457 instr2_rs1 = instr >> 27;
458
459 if ((uint32_t)((instr - 0x3117) << 18)
460 >= (instr2_rs1 & 0x1D)) {
461 i += 4 - 2;
462 continue;
463 }
464
465 addr = get_unaligned_be32(buf + i + 4);
466 addr -= s->pos + (uint32_t)i;
467
468 instr2 = (instr >> 12) | (addr << 20);
469
470 instr = 0x17 | (instr2_rs1 << 7)
471 | ((addr + 0x800) & 0xFFFFF000);
472 }
473
474 put_unaligned_le32(instr, buf + i);
475 put_unaligned_le32(instr2, buf + i + 4);
476
477 i += 8 - 2;
478 }
479 }
480
481 return i;
482}
483#endif
484
485/*
486 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
487 * of data that got filtered.
488 *
489 * NOTE: This is implemented as a switch statement to avoid using function
490 * pointers, which could be problematic in the kernel boot code, which must
491 * avoid pointers to static data (at least on x86).
492 */
493static void bcj_apply(struct xz_dec_bcj *s,
494 uint8_t *buf, size_t *pos, size_t size)
495{
496 size_t filtered;
497
498 buf += *pos;
499 size -= *pos;
500
501 switch (s->type) {
502#ifdef XZ_DEC_X86
503 case BCJ_X86:
504 filtered = bcj_x86(s, buf, size);
505 break;
506#endif
507#ifdef XZ_DEC_POWERPC
508 case BCJ_POWERPC:
509 filtered = bcj_powerpc(s, buf, size);
510 break;
511#endif
512#ifdef XZ_DEC_IA64
513 case BCJ_IA64:
514 filtered = bcj_ia64(s, buf, size);
515 break;
516#endif
517#ifdef XZ_DEC_ARM
518 case BCJ_ARM:
519 filtered = bcj_arm(s, buf, size);
520 break;
521#endif
522#ifdef XZ_DEC_ARMTHUMB
523 case BCJ_ARMTHUMB:
524 filtered = bcj_armthumb(s, buf, size);
525 break;
526#endif
527#ifdef XZ_DEC_SPARC
528 case BCJ_SPARC:
529 filtered = bcj_sparc(s, buf, size);
530 break;
531#endif
532#ifdef XZ_DEC_ARM64
533 case BCJ_ARM64:
534 filtered = bcj_arm64(s, buf, size);
535 break;
536#endif
537#ifdef XZ_DEC_RISCV
538 case BCJ_RISCV:
539 filtered = bcj_riscv(s, buf, size);
540 break;
541#endif
542 default:
543 /* Never reached but silence compiler warnings. */
544 filtered = 0;
545 break;
546 }
547
548 *pos += filtered;
549 s->pos += filtered;
550}
551
552/*
553 * Flush pending filtered data from temp to the output buffer.
554 * Move the remaining mixture of possibly filtered and unfiltered
555 * data to the beginning of temp.
556 */
557static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
558{
559 size_t copy_size;
560
561 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
562 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
563 b->out_pos += copy_size;
564
565 s->temp.filtered -= copy_size;
566 s->temp.size -= copy_size;
567 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
568}
569
570/*
571 * The BCJ filter functions are primitive in sense that they process the
572 * data in chunks of 1-16 bytes. To hide this issue, this function does
573 * some buffering.
574 */
575enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2,
576 struct xz_buf *b)
577{
578 size_t out_start;
579
580 /*
581 * Flush pending already filtered data to the output buffer. Return
582 * immediately if we couldn't flush everything, or if the next
583 * filter in the chain had already returned XZ_STREAM_END.
584 */
585 if (s->temp.filtered > 0) {
586 bcj_flush(s, b);
587 if (s->temp.filtered > 0)
588 return XZ_OK;
589
590 if (s->ret == XZ_STREAM_END)
591 return XZ_STREAM_END;
592 }
593
594 /*
595 * If we have more output space than what is currently pending in
596 * temp, copy the unfiltered data from temp to the output buffer
597 * and try to fill the output buffer by decoding more data from the
598 * next filter in the chain. Apply the BCJ filter on the new data
599 * in the output buffer. If everything cannot be filtered, copy it
600 * to temp and rewind the output buffer position accordingly.
601 *
602 * This needs to be always run when temp.size == 0 to handle a special
603 * case where the output buffer is full and the next filter has no
604 * more output coming but hasn't returned XZ_STREAM_END yet.
605 */
606 if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
607 out_start = b->out_pos;
608 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
609 b->out_pos += s->temp.size;
610
611 s->ret = xz_dec_lzma2_run(lzma2, b);
612 if (s->ret != XZ_STREAM_END
613 && (s->ret != XZ_OK || s->single_call))
614 return s->ret;
615
616 bcj_apply(s, b->out, &out_start, b->out_pos);
617
618 /*
619 * As an exception, if the next filter returned XZ_STREAM_END,
620 * we can do that too, since the last few bytes that remain
621 * unfiltered are meant to remain unfiltered.
622 */
623 if (s->ret == XZ_STREAM_END)
624 return XZ_STREAM_END;
625
626 s->temp.size = b->out_pos - out_start;
627 b->out_pos -= s->temp.size;
628 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
629
630 /*
631 * If there wasn't enough input to the next filter to fill
632 * the output buffer with unfiltered data, there's no point
633 * to try decoding more data to temp.
634 */
635 if (b->out_pos + s->temp.size < b->out_size)
636 return XZ_OK;
637 }
638
639 /*
640 * We have unfiltered data in temp. If the output buffer isn't full
641 * yet, try to fill the temp buffer by decoding more data from the
642 * next filter. Apply the BCJ filter on temp. Then we hopefully can
643 * fill the actual output buffer by copying filtered data from temp.
644 * A mix of filtered and unfiltered data may be left in temp; it will
645 * be taken care on the next call to this function.
646 */
647 if (b->out_pos < b->out_size) {
648 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
649 s->out = b->out;
650 s->out_pos = b->out_pos;
651 s->out_size = b->out_size;
652 b->out = s->temp.buf;
653 b->out_pos = s->temp.size;
654 b->out_size = sizeof(s->temp.buf);
655
656 s->ret = xz_dec_lzma2_run(lzma2, b);
657
658 s->temp.size = b->out_pos;
659 b->out = s->out;
660 b->out_pos = s->out_pos;
661 b->out_size = s->out_size;
662
663 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
664 return s->ret;
665
666 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
667
668 /*
669 * If the next filter returned XZ_STREAM_END, we mark that
670 * everything is filtered, since the last unfiltered bytes
671 * of the stream are meant to be left as is.
672 */
673 if (s->ret == XZ_STREAM_END)
674 s->temp.filtered = s->temp.size;
675
676 bcj_flush(s, b);
677 if (s->temp.filtered > 0)
678 return XZ_OK;
679 }
680
681 return s->ret;
682}
683
684struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
685{
686 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
687 if (s != NULL)
688 s->single_call = single_call;
689
690 return s;
691}
692
693enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
694{
695 switch (id) {
696#ifdef XZ_DEC_X86
697 case BCJ_X86:
698#endif
699#ifdef XZ_DEC_POWERPC
700 case BCJ_POWERPC:
701#endif
702#ifdef XZ_DEC_IA64
703 case BCJ_IA64:
704#endif
705#ifdef XZ_DEC_ARM
706 case BCJ_ARM:
707#endif
708#ifdef XZ_DEC_ARMTHUMB
709 case BCJ_ARMTHUMB:
710#endif
711#ifdef XZ_DEC_SPARC
712 case BCJ_SPARC:
713#endif
714#ifdef XZ_DEC_ARM64
715 case BCJ_ARM64:
716#endif
717#ifdef XZ_DEC_RISCV
718 case BCJ_RISCV:
719#endif
720 break;
721
722 default:
723 /* Unsupported Filter ID */
724 return XZ_OPTIONS_ERROR;
725 }
726
727 s->type = id;
728 s->ret = XZ_OK;
729 s->pos = 0;
730 s->x86_prev_mask = 0;
731 s->temp.filtered = 0;
732 s->temp.size = 0;
733
734 return XZ_OK;
735}
736
737#endif