Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Feb 10-13, 2025
Register
Loading...
v4.17
 
 
  1/*
  2 * Branch/Call/Jump (BCJ) filter decoders
  3 *
  4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
  5 *          Igor Pavlov <http://7-zip.org/>
  6 *
  7 * This file has been put into the public domain.
  8 * You can do whatever you want with this file.
  9 */
 10
 11#include "xz_private.h"
 12
 13/*
 14 * The rest of the file is inside this ifdef. It makes things a little more
 15 * convenient when building without support for any BCJ filters.
 16 */
 17#ifdef XZ_DEC_BCJ
 18
 19struct xz_dec_bcj {
 20	/* Type of the BCJ filter being used */
 21	enum {
 22		BCJ_X86 = 4,        /* x86 or x86-64 */
 23		BCJ_POWERPC = 5,    /* Big endian only */
 24		BCJ_IA64 = 6,       /* Big or little endian */
 25		BCJ_ARM = 7,        /* Little endian only */
 26		BCJ_ARMTHUMB = 8,   /* Little endian only */
 27		BCJ_SPARC = 9       /* Big or little endian */
 
 
 28	} type;
 29
 30	/*
 31	 * Return value of the next filter in the chain. We need to preserve
 32	 * this information across calls, because we must not call the next
 33	 * filter anymore once it has returned XZ_STREAM_END.
 34	 */
 35	enum xz_ret ret;
 36
 37	/* True if we are operating in single-call mode. */
 38	bool single_call;
 39
 40	/*
 41	 * Absolute position relative to the beginning of the uncompressed
 42	 * data (in a single .xz Block). We care only about the lowest 32
 43	 * bits so this doesn't need to be uint64_t even with big files.
 44	 */
 45	uint32_t pos;
 46
 47	/* x86 filter state */
 48	uint32_t x86_prev_mask;
 49
 50	/* Temporary space to hold the variables from struct xz_buf */
 51	uint8_t *out;
 52	size_t out_pos;
 53	size_t out_size;
 54
 55	struct {
 56		/* Amount of already filtered data in the beginning of buf */
 57		size_t filtered;
 58
 59		/* Total amount of data currently stored in buf  */
 60		size_t size;
 61
 62		/*
 63		 * Buffer to hold a mix of filtered and unfiltered data. This
 64		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
 65		 *
 66		 * Type         Alignment   Look-ahead
 67		 * x86              1           4
 68		 * PowerPC          4           0
 69		 * IA-64           16           0
 70		 * ARM              4           0
 71		 * ARM-Thumb        2           2
 72		 * SPARC            4           0
 73		 */
 74		uint8_t buf[16];
 75	} temp;
 76};
 77
 78#ifdef XZ_DEC_X86
 79/*
 80 * This is used to test the most significant byte of a memory address
 81 * in an x86 instruction.
 82 */
 83static inline int bcj_x86_test_msbyte(uint8_t b)
 84{
 85	return b == 0x00 || b == 0xFF;
 86}
 87
 88static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 89{
 90	static const bool mask_to_allowed_status[8]
 91		= { true, true, true, false, true, false, false, false };
 92
 93	static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
 94
 95	size_t i;
 96	size_t prev_pos = (size_t)-1;
 97	uint32_t prev_mask = s->x86_prev_mask;
 98	uint32_t src;
 99	uint32_t dest;
100	uint32_t j;
101	uint8_t b;
102
103	if (size <= 4)
104		return 0;
105
106	size -= 4;
107	for (i = 0; i < size; ++i) {
108		if ((buf[i] & 0xFE) != 0xE8)
109			continue;
110
111		prev_pos = i - prev_pos;
112		if (prev_pos > 3) {
113			prev_mask = 0;
114		} else {
115			prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116			if (prev_mask != 0) {
117				b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118				if (!mask_to_allowed_status[prev_mask]
119						|| bcj_x86_test_msbyte(b)) {
120					prev_pos = i;
121					prev_mask = (prev_mask << 1) | 1;
122					continue;
123				}
124			}
125		}
126
127		prev_pos = i;
128
129		if (bcj_x86_test_msbyte(buf[i + 4])) {
130			src = get_unaligned_le32(buf + i + 1);
131			while (true) {
132				dest = src - (s->pos + (uint32_t)i + 5);
133				if (prev_mask == 0)
134					break;
135
136				j = mask_to_bit_num[prev_mask] * 8;
137				b = (uint8_t)(dest >> (24 - j));
138				if (!bcj_x86_test_msbyte(b))
139					break;
140
141				src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
142			}
143
144			dest &= 0x01FFFFFF;
145			dest |= (uint32_t)0 - (dest & 0x01000000);
146			put_unaligned_le32(dest, buf + i + 1);
147			i += 4;
148		} else {
149			prev_mask = (prev_mask << 1) | 1;
150		}
151	}
152
153	prev_pos = i - prev_pos;
154	s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
155	return i;
156}
157#endif
158
159#ifdef XZ_DEC_POWERPC
160static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
161{
162	size_t i;
163	uint32_t instr;
164
165	for (i = 0; i + 4 <= size; i += 4) {
 
 
166		instr = get_unaligned_be32(buf + i);
167		if ((instr & 0xFC000003) == 0x48000001) {
168			instr &= 0x03FFFFFC;
169			instr -= s->pos + (uint32_t)i;
170			instr &= 0x03FFFFFC;
171			instr |= 0x48000001;
172			put_unaligned_be32(instr, buf + i);
173		}
174	}
175
176	return i;
177}
178#endif
179
180#ifdef XZ_DEC_IA64
181static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182{
183	static const uint8_t branch_table[32] = {
184		0, 0, 0, 0, 0, 0, 0, 0,
185		0, 0, 0, 0, 0, 0, 0, 0,
186		4, 4, 6, 6, 0, 0, 7, 7,
187		4, 4, 0, 0, 4, 4, 0, 0
188	};
189
190	/*
191	 * The local variables take a little bit stack space, but it's less
192	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193	 * stack usage here without doing that for the LZMA2 decoder too.
194	 */
195
196	/* Loop counters */
197	size_t i;
198	size_t j;
199
200	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201	uint32_t slot;
202
203	/* Bitwise offset of the instruction indicated by slot */
204	uint32_t bit_pos;
205
206	/* bit_pos split into byte and bit parts */
207	uint32_t byte_pos;
208	uint32_t bit_res;
209
210	/* Address part of an instruction */
211	uint32_t addr;
212
213	/* Mask used to detect which instructions to convert */
214	uint32_t mask;
215
216	/* 41-bit instruction stored somewhere in the lowest 48 bits */
217	uint64_t instr;
218
219	/* Instruction normalized with bit_res for easier manipulation */
220	uint64_t norm;
221
222	for (i = 0; i + 16 <= size; i += 16) {
 
 
223		mask = branch_table[buf[i] & 0x1F];
224		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225			if (((mask >> slot) & 1) == 0)
226				continue;
227
228			byte_pos = bit_pos >> 3;
229			bit_res = bit_pos & 7;
230			instr = 0;
231			for (j = 0; j < 6; ++j)
232				instr |= (uint64_t)(buf[i + j + byte_pos])
233						<< (8 * j);
234
235			norm = instr >> bit_res;
236
237			if (((norm >> 37) & 0x0F) == 0x05
238					&& ((norm >> 9) & 0x07) == 0) {
239				addr = (norm >> 13) & 0x0FFFFF;
240				addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241				addr <<= 4;
242				addr -= s->pos + (uint32_t)i;
243				addr >>= 4;
244
245				norm &= ~((uint64_t)0x8FFFFF << 13);
246				norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247				norm |= (uint64_t)(addr & 0x100000)
248						<< (36 - 20);
249
250				instr &= (1 << bit_res) - 1;
251				instr |= norm << bit_res;
252
253				for (j = 0; j < 6; j++)
254					buf[i + j + byte_pos]
255						= (uint8_t)(instr >> (8 * j));
256			}
257		}
258	}
259
260	return i;
261}
262#endif
263
264#ifdef XZ_DEC_ARM
265static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
266{
267	size_t i;
268	uint32_t addr;
269
270	for (i = 0; i + 4 <= size; i += 4) {
 
 
271		if (buf[i + 3] == 0xEB) {
272			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
273					| ((uint32_t)buf[i + 2] << 16);
274			addr <<= 2;
275			addr -= s->pos + (uint32_t)i + 8;
276			addr >>= 2;
277			buf[i] = (uint8_t)addr;
278			buf[i + 1] = (uint8_t)(addr >> 8);
279			buf[i + 2] = (uint8_t)(addr >> 16);
280		}
281	}
282
283	return i;
284}
285#endif
286
287#ifdef XZ_DEC_ARMTHUMB
288static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
289{
290	size_t i;
291	uint32_t addr;
292
293	for (i = 0; i + 4 <= size; i += 2) {
 
 
 
 
 
294		if ((buf[i + 1] & 0xF8) == 0xF0
295				&& (buf[i + 3] & 0xF8) == 0xF8) {
296			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
297					| ((uint32_t)buf[i] << 11)
298					| (((uint32_t)buf[i + 3] & 0x07) << 8)
299					| (uint32_t)buf[i + 2];
300			addr <<= 1;
301			addr -= s->pos + (uint32_t)i + 4;
302			addr >>= 1;
303			buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
304			buf[i] = (uint8_t)(addr >> 11);
305			buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
306			buf[i + 2] = (uint8_t)addr;
307			i += 2;
308		}
309	}
310
311	return i;
312}
313#endif
314
315#ifdef XZ_DEC_SPARC
316static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
317{
318	size_t i;
319	uint32_t instr;
320
321	for (i = 0; i + 4 <= size; i += 4) {
 
 
322		instr = get_unaligned_be32(buf + i);
323		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
324			instr <<= 2;
325			instr -= s->pos + (uint32_t)i;
326			instr >>= 2;
327			instr = ((uint32_t)0x40000000 - (instr & 0x400000))
328					| 0x40000000 | (instr & 0x3FFFFF);
329			put_unaligned_be32(instr, buf + i);
330		}
331	}
332
333	return i;
334}
335#endif
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337/*
338 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
339 * of data that got filtered.
340 *
341 * NOTE: This is implemented as a switch statement to avoid using function
342 * pointers, which could be problematic in the kernel boot code, which must
343 * avoid pointers to static data (at least on x86).
344 */
345static void bcj_apply(struct xz_dec_bcj *s,
346		      uint8_t *buf, size_t *pos, size_t size)
347{
348	size_t filtered;
349
350	buf += *pos;
351	size -= *pos;
352
353	switch (s->type) {
354#ifdef XZ_DEC_X86
355	case BCJ_X86:
356		filtered = bcj_x86(s, buf, size);
357		break;
358#endif
359#ifdef XZ_DEC_POWERPC
360	case BCJ_POWERPC:
361		filtered = bcj_powerpc(s, buf, size);
362		break;
363#endif
364#ifdef XZ_DEC_IA64
365	case BCJ_IA64:
366		filtered = bcj_ia64(s, buf, size);
367		break;
368#endif
369#ifdef XZ_DEC_ARM
370	case BCJ_ARM:
371		filtered = bcj_arm(s, buf, size);
372		break;
373#endif
374#ifdef XZ_DEC_ARMTHUMB
375	case BCJ_ARMTHUMB:
376		filtered = bcj_armthumb(s, buf, size);
377		break;
378#endif
379#ifdef XZ_DEC_SPARC
380	case BCJ_SPARC:
381		filtered = bcj_sparc(s, buf, size);
382		break;
383#endif
 
 
 
 
 
 
 
 
 
 
384	default:
385		/* Never reached but silence compiler warnings. */
386		filtered = 0;
387		break;
388	}
389
390	*pos += filtered;
391	s->pos += filtered;
392}
393
394/*
395 * Flush pending filtered data from temp to the output buffer.
396 * Move the remaining mixture of possibly filtered and unfiltered
397 * data to the beginning of temp.
398 */
399static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400{
401	size_t copy_size;
402
403	copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404	memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405	b->out_pos += copy_size;
406
407	s->temp.filtered -= copy_size;
408	s->temp.size -= copy_size;
409	memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410}
411
412/*
413 * The BCJ filter functions are primitive in sense that they process the
414 * data in chunks of 1-16 bytes. To hide this issue, this function does
415 * some buffering.
416 */
417XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
418				     struct xz_dec_lzma2 *lzma2,
419				     struct xz_buf *b)
420{
421	size_t out_start;
422
423	/*
424	 * Flush pending already filtered data to the output buffer. Return
425	 * immediatelly if we couldn't flush everything, or if the next
426	 * filter in the chain had already returned XZ_STREAM_END.
427	 */
428	if (s->temp.filtered > 0) {
429		bcj_flush(s, b);
430		if (s->temp.filtered > 0)
431			return XZ_OK;
432
433		if (s->ret == XZ_STREAM_END)
434			return XZ_STREAM_END;
435	}
436
437	/*
438	 * If we have more output space than what is currently pending in
439	 * temp, copy the unfiltered data from temp to the output buffer
440	 * and try to fill the output buffer by decoding more data from the
441	 * next filter in the chain. Apply the BCJ filter on the new data
442	 * in the output buffer. If everything cannot be filtered, copy it
443	 * to temp and rewind the output buffer position accordingly.
444	 *
445	 * This needs to be always run when temp.size == 0 to handle a special
446	 * case where the output buffer is full and the next filter has no
447	 * more output coming but hasn't returned XZ_STREAM_END yet.
448	 */
449	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
450		out_start = b->out_pos;
451		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
452		b->out_pos += s->temp.size;
453
454		s->ret = xz_dec_lzma2_run(lzma2, b);
455		if (s->ret != XZ_STREAM_END
456				&& (s->ret != XZ_OK || s->single_call))
457			return s->ret;
458
459		bcj_apply(s, b->out, &out_start, b->out_pos);
460
461		/*
462		 * As an exception, if the next filter returned XZ_STREAM_END,
463		 * we can do that too, since the last few bytes that remain
464		 * unfiltered are meant to remain unfiltered.
465		 */
466		if (s->ret == XZ_STREAM_END)
467			return XZ_STREAM_END;
468
469		s->temp.size = b->out_pos - out_start;
470		b->out_pos -= s->temp.size;
471		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
472
473		/*
474		 * If there wasn't enough input to the next filter to fill
475		 * the output buffer with unfiltered data, there's no point
476		 * to try decoding more data to temp.
477		 */
478		if (b->out_pos + s->temp.size < b->out_size)
479			return XZ_OK;
480	}
481
482	/*
483	 * We have unfiltered data in temp. If the output buffer isn't full
484	 * yet, try to fill the temp buffer by decoding more data from the
485	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
486	 * fill the actual output buffer by copying filtered data from temp.
487	 * A mix of filtered and unfiltered data may be left in temp; it will
488	 * be taken care on the next call to this function.
489	 */
490	if (b->out_pos < b->out_size) {
491		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
492		s->out = b->out;
493		s->out_pos = b->out_pos;
494		s->out_size = b->out_size;
495		b->out = s->temp.buf;
496		b->out_pos = s->temp.size;
497		b->out_size = sizeof(s->temp.buf);
498
499		s->ret = xz_dec_lzma2_run(lzma2, b);
500
501		s->temp.size = b->out_pos;
502		b->out = s->out;
503		b->out_pos = s->out_pos;
504		b->out_size = s->out_size;
505
506		if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
507			return s->ret;
508
509		bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
510
511		/*
512		 * If the next filter returned XZ_STREAM_END, we mark that
513		 * everything is filtered, since the last unfiltered bytes
514		 * of the stream are meant to be left as is.
515		 */
516		if (s->ret == XZ_STREAM_END)
517			s->temp.filtered = s->temp.size;
518
519		bcj_flush(s, b);
520		if (s->temp.filtered > 0)
521			return XZ_OK;
522	}
523
524	return s->ret;
525}
526
527XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
528{
529	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
530	if (s != NULL)
531		s->single_call = single_call;
532
533	return s;
534}
535
536XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
537{
538	switch (id) {
539#ifdef XZ_DEC_X86
540	case BCJ_X86:
541#endif
542#ifdef XZ_DEC_POWERPC
543	case BCJ_POWERPC:
544#endif
545#ifdef XZ_DEC_IA64
546	case BCJ_IA64:
547#endif
548#ifdef XZ_DEC_ARM
549	case BCJ_ARM:
550#endif
551#ifdef XZ_DEC_ARMTHUMB
552	case BCJ_ARMTHUMB:
553#endif
554#ifdef XZ_DEC_SPARC
555	case BCJ_SPARC:
 
 
 
 
 
 
556#endif
557		break;
558
559	default:
560		/* Unsupported Filter ID */
561		return XZ_OPTIONS_ERROR;
562	}
563
564	s->type = id;
565	s->ret = XZ_OK;
566	s->pos = 0;
567	s->x86_prev_mask = 0;
568	s->temp.filtered = 0;
569	s->temp.size = 0;
570
571	return XZ_OK;
572}
573
574#endif
v6.13.7
  1// SPDX-License-Identifier: 0BSD
  2
  3/*
  4 * Branch/Call/Jump (BCJ) filter decoders
  5 *
  6 * Authors: Lasse Collin <lasse.collin@tukaani.org>
  7 *          Igor Pavlov <https://7-zip.org/>
 
 
 
  8 */
  9
 10#include "xz_private.h"
 11
 12/*
 13 * The rest of the file is inside this ifdef. It makes things a little more
 14 * convenient when building without support for any BCJ filters.
 15 */
 16#ifdef XZ_DEC_BCJ
 17
 18struct xz_dec_bcj {
 19	/* Type of the BCJ filter being used */
 20	enum {
 21		BCJ_X86 = 4,        /* x86 or x86-64 */
 22		BCJ_POWERPC = 5,    /* Big endian only */
 23		BCJ_IA64 = 6,       /* Big or little endian */
 24		BCJ_ARM = 7,        /* Little endian only */
 25		BCJ_ARMTHUMB = 8,   /* Little endian only */
 26		BCJ_SPARC = 9,      /* Big or little endian */
 27		BCJ_ARM64 = 10,     /* AArch64 */
 28		BCJ_RISCV = 11      /* RV32GQC_Zfh, RV64GQC_Zfh */
 29	} type;
 30
 31	/*
 32	 * Return value of the next filter in the chain. We need to preserve
 33	 * this information across calls, because we must not call the next
 34	 * filter anymore once it has returned XZ_STREAM_END.
 35	 */
 36	enum xz_ret ret;
 37
 38	/* True if we are operating in single-call mode. */
 39	bool single_call;
 40
 41	/*
 42	 * Absolute position relative to the beginning of the uncompressed
 43	 * data (in a single .xz Block). We care only about the lowest 32
 44	 * bits so this doesn't need to be uint64_t even with big files.
 45	 */
 46	uint32_t pos;
 47
 48	/* x86 filter state */
 49	uint32_t x86_prev_mask;
 50
 51	/* Temporary space to hold the variables from struct xz_buf */
 52	uint8_t *out;
 53	size_t out_pos;
 54	size_t out_size;
 55
 56	struct {
 57		/* Amount of already filtered data in the beginning of buf */
 58		size_t filtered;
 59
 60		/* Total amount of data currently stored in buf  */
 61		size_t size;
 62
 63		/*
 64		 * Buffer to hold a mix of filtered and unfiltered data. This
 65		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
 66		 *
 67		 * Type         Alignment   Look-ahead
 68		 * x86              1           4
 69		 * PowerPC          4           0
 70		 * IA-64           16           0
 71		 * ARM              4           0
 72		 * ARM-Thumb        2           2
 73		 * SPARC            4           0
 74		 */
 75		uint8_t buf[16];
 76	} temp;
 77};
 78
 79#ifdef XZ_DEC_X86
 80/*
 81 * This is used to test the most significant byte of a memory address
 82 * in an x86 instruction.
 83 */
 84static inline int bcj_x86_test_msbyte(uint8_t b)
 85{
 86	return b == 0x00 || b == 0xFF;
 87}
 88
 89static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 90{
 91	static const bool mask_to_allowed_status[8]
 92		= { true, true, true, false, true, false, false, false };
 93
 94	static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
 95
 96	size_t i;
 97	size_t prev_pos = (size_t)-1;
 98	uint32_t prev_mask = s->x86_prev_mask;
 99	uint32_t src;
100	uint32_t dest;
101	uint32_t j;
102	uint8_t b;
103
104	if (size <= 4)
105		return 0;
106
107	size -= 4;
108	for (i = 0; i < size; ++i) {
109		if ((buf[i] & 0xFE) != 0xE8)
110			continue;
111
112		prev_pos = i - prev_pos;
113		if (prev_pos > 3) {
114			prev_mask = 0;
115		} else {
116			prev_mask = (prev_mask << (prev_pos - 1)) & 7;
117			if (prev_mask != 0) {
118				b = buf[i + 4 - mask_to_bit_num[prev_mask]];
119				if (!mask_to_allowed_status[prev_mask]
120						|| bcj_x86_test_msbyte(b)) {
121					prev_pos = i;
122					prev_mask = (prev_mask << 1) | 1;
123					continue;
124				}
125			}
126		}
127
128		prev_pos = i;
129
130		if (bcj_x86_test_msbyte(buf[i + 4])) {
131			src = get_unaligned_le32(buf + i + 1);
132			while (true) {
133				dest = src - (s->pos + (uint32_t)i + 5);
134				if (prev_mask == 0)
135					break;
136
137				j = mask_to_bit_num[prev_mask] * 8;
138				b = (uint8_t)(dest >> (24 - j));
139				if (!bcj_x86_test_msbyte(b))
140					break;
141
142				src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
143			}
144
145			dest &= 0x01FFFFFF;
146			dest |= (uint32_t)0 - (dest & 0x01000000);
147			put_unaligned_le32(dest, buf + i + 1);
148			i += 4;
149		} else {
150			prev_mask = (prev_mask << 1) | 1;
151		}
152	}
153
154	prev_pos = i - prev_pos;
155	s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
156	return i;
157}
158#endif
159
160#ifdef XZ_DEC_POWERPC
161static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
162{
163	size_t i;
164	uint32_t instr;
165
166	size &= ~(size_t)3;
167
168	for (i = 0; i < size; i += 4) {
169		instr = get_unaligned_be32(buf + i);
170		if ((instr & 0xFC000003) == 0x48000001) {
171			instr &= 0x03FFFFFC;
172			instr -= s->pos + (uint32_t)i;
173			instr &= 0x03FFFFFC;
174			instr |= 0x48000001;
175			put_unaligned_be32(instr, buf + i);
176		}
177	}
178
179	return i;
180}
181#endif
182
183#ifdef XZ_DEC_IA64
184static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
185{
186	static const uint8_t branch_table[32] = {
187		0, 0, 0, 0, 0, 0, 0, 0,
188		0, 0, 0, 0, 0, 0, 0, 0,
189		4, 4, 6, 6, 0, 0, 7, 7,
190		4, 4, 0, 0, 4, 4, 0, 0
191	};
192
193	/*
194	 * The local variables take a little bit stack space, but it's less
195	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
196	 * stack usage here without doing that for the LZMA2 decoder too.
197	 */
198
199	/* Loop counters */
200	size_t i;
201	size_t j;
202
203	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
204	uint32_t slot;
205
206	/* Bitwise offset of the instruction indicated by slot */
207	uint32_t bit_pos;
208
209	/* bit_pos split into byte and bit parts */
210	uint32_t byte_pos;
211	uint32_t bit_res;
212
213	/* Address part of an instruction */
214	uint32_t addr;
215
216	/* Mask used to detect which instructions to convert */
217	uint32_t mask;
218
219	/* 41-bit instruction stored somewhere in the lowest 48 bits */
220	uint64_t instr;
221
222	/* Instruction normalized with bit_res for easier manipulation */
223	uint64_t norm;
224
225	size &= ~(size_t)15;
226
227	for (i = 0; i < size; i += 16) {
228		mask = branch_table[buf[i] & 0x1F];
229		for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
230			if (((mask >> slot) & 1) == 0)
231				continue;
232
233			byte_pos = bit_pos >> 3;
234			bit_res = bit_pos & 7;
235			instr = 0;
236			for (j = 0; j < 6; ++j)
237				instr |= (uint64_t)(buf[i + j + byte_pos])
238						<< (8 * j);
239
240			norm = instr >> bit_res;
241
242			if (((norm >> 37) & 0x0F) == 0x05
243					&& ((norm >> 9) & 0x07) == 0) {
244				addr = (norm >> 13) & 0x0FFFFF;
245				addr |= ((uint32_t)(norm >> 36) & 1) << 20;
246				addr <<= 4;
247				addr -= s->pos + (uint32_t)i;
248				addr >>= 4;
249
250				norm &= ~((uint64_t)0x8FFFFF << 13);
251				norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
252				norm |= (uint64_t)(addr & 0x100000)
253						<< (36 - 20);
254
255				instr &= (1 << bit_res) - 1;
256				instr |= norm << bit_res;
257
258				for (j = 0; j < 6; j++)
259					buf[i + j + byte_pos]
260						= (uint8_t)(instr >> (8 * j));
261			}
262		}
263	}
264
265	return i;
266}
267#endif
268
269#ifdef XZ_DEC_ARM
270static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
271{
272	size_t i;
273	uint32_t addr;
274
275	size &= ~(size_t)3;
276
277	for (i = 0; i < size; i += 4) {
278		if (buf[i + 3] == 0xEB) {
279			addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
280					| ((uint32_t)buf[i + 2] << 16);
281			addr <<= 2;
282			addr -= s->pos + (uint32_t)i + 8;
283			addr >>= 2;
284			buf[i] = (uint8_t)addr;
285			buf[i + 1] = (uint8_t)(addr >> 8);
286			buf[i + 2] = (uint8_t)(addr >> 16);
287		}
288	}
289
290	return i;
291}
292#endif
293
294#ifdef XZ_DEC_ARMTHUMB
295static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
296{
297	size_t i;
298	uint32_t addr;
299
300	if (size < 4)
301		return 0;
302
303	size -= 4;
304
305	for (i = 0; i <= size; i += 2) {
306		if ((buf[i + 1] & 0xF8) == 0xF0
307				&& (buf[i + 3] & 0xF8) == 0xF8) {
308			addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
309					| ((uint32_t)buf[i] << 11)
310					| (((uint32_t)buf[i + 3] & 0x07) << 8)
311					| (uint32_t)buf[i + 2];
312			addr <<= 1;
313			addr -= s->pos + (uint32_t)i + 4;
314			addr >>= 1;
315			buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
316			buf[i] = (uint8_t)(addr >> 11);
317			buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
318			buf[i + 2] = (uint8_t)addr;
319			i += 2;
320		}
321	}
322
323	return i;
324}
325#endif
326
327#ifdef XZ_DEC_SPARC
328static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
329{
330	size_t i;
331	uint32_t instr;
332
333	size &= ~(size_t)3;
334
335	for (i = 0; i < size; i += 4) {
336		instr = get_unaligned_be32(buf + i);
337		if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
338			instr <<= 2;
339			instr -= s->pos + (uint32_t)i;
340			instr >>= 2;
341			instr = ((uint32_t)0x40000000 - (instr & 0x400000))
342					| 0x40000000 | (instr & 0x3FFFFF);
343			put_unaligned_be32(instr, buf + i);
344		}
345	}
346
347	return i;
348}
349#endif
350
351#ifdef XZ_DEC_ARM64
352static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
353{
354	size_t i;
355	uint32_t instr;
356	uint32_t addr;
357
358	size &= ~(size_t)3;
359
360	for (i = 0; i < size; i += 4) {
361		instr = get_unaligned_le32(buf + i);
362
363		if ((instr >> 26) == 0x25) {
364			/* BL instruction */
365			addr = instr - ((s->pos + (uint32_t)i) >> 2);
366			instr = 0x94000000 | (addr & 0x03FFFFFF);
367			put_unaligned_le32(instr, buf + i);
368
369		} else if ((instr & 0x9F000000) == 0x90000000) {
370			/* ADRP instruction */
371			addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC);
372
373			/* Only convert values in the range +/-512 MiB. */
374			if ((addr + 0x020000) & 0x1C0000)
375				continue;
376
377			addr -= (s->pos + (uint32_t)i) >> 12;
378
379			instr &= 0x9000001F;
380			instr |= (addr & 3) << 29;
381			instr |= (addr & 0x03FFFC) << 3;
382			instr |= (0U - (addr & 0x020000)) & 0xE00000;
383
384			put_unaligned_le32(instr, buf + i);
385		}
386	}
387
388	return i;
389}
390#endif
391
392#ifdef XZ_DEC_RISCV
393static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
394{
395	size_t i;
396	uint32_t b1;
397	uint32_t b2;
398	uint32_t b3;
399	uint32_t instr;
400	uint32_t instr2;
401	uint32_t instr2_rs1;
402	uint32_t addr;
403
404	if (size < 8)
405		return 0;
406
407	size -= 8;
408
409	for (i = 0; i <= size; i += 2) {
410		instr = buf[i];
411
412		if (instr == 0xEF) {
413			/* JAL */
414			b1 = buf[i + 1];
415			if ((b1 & 0x0D) != 0)
416				continue;
417
418			b2 = buf[i + 2];
419			b3 = buf[i + 3];
420
421			addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1);
422			addr -= s->pos + (uint32_t)i;
423
424			buf[i + 1] = (uint8_t)((b1 & 0x0F)
425					| ((addr >> 8) & 0xF0));
426
427			buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F)
428					| ((addr >> 7) & 0x10)
429					| ((addr << 4) & 0xE0));
430
431			buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F)
432					| ((addr >> 13) & 0x80));
433
434			i += 4 - 2;
435
436		} else if ((instr & 0x7F) == 0x17) {
437			/* AUIPC */
438			instr |= (uint32_t)buf[i + 1] << 8;
439			instr |= (uint32_t)buf[i + 2] << 16;
440			instr |= (uint32_t)buf[i + 3] << 24;
441
442			if (instr & 0xE80) {
443				/* AUIPC's rd doesn't equal x0 or x2. */
444				instr2 = get_unaligned_le32(buf + i + 4);
445
446				if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) {
447					i += 6 - 2;
448					continue;
449				}
450
451				addr = (instr & 0xFFFFF000) + (instr2 >> 20);
452
453				instr = 0x17 | (2 << 7) | (instr2 << 12);
454				instr2 = addr;
455			} else {
456				/* AUIPC's rd equals x0 or x2. */
457				instr2_rs1 = instr >> 27;
458
459				if ((uint32_t)((instr - 0x3117) << 18)
460						>= (instr2_rs1 & 0x1D)) {
461					i += 4 - 2;
462					continue;
463				}
464
465				addr = get_unaligned_be32(buf + i + 4);
466				addr -= s->pos + (uint32_t)i;
467
468				instr2 = (instr >> 12) | (addr << 20);
469
470				instr = 0x17 | (instr2_rs1 << 7)
471					| ((addr + 0x800) & 0xFFFFF000);
472			}
473
474			put_unaligned_le32(instr, buf + i);
475			put_unaligned_le32(instr2, buf + i + 4);
476
477			i += 8 - 2;
478		}
479	}
480
481	return i;
482}
483#endif
484
485/*
486 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
487 * of data that got filtered.
488 *
489 * NOTE: This is implemented as a switch statement to avoid using function
490 * pointers, which could be problematic in the kernel boot code, which must
491 * avoid pointers to static data (at least on x86).
492 */
493static void bcj_apply(struct xz_dec_bcj *s,
494		      uint8_t *buf, size_t *pos, size_t size)
495{
496	size_t filtered;
497
498	buf += *pos;
499	size -= *pos;
500
501	switch (s->type) {
502#ifdef XZ_DEC_X86
503	case BCJ_X86:
504		filtered = bcj_x86(s, buf, size);
505		break;
506#endif
507#ifdef XZ_DEC_POWERPC
508	case BCJ_POWERPC:
509		filtered = bcj_powerpc(s, buf, size);
510		break;
511#endif
512#ifdef XZ_DEC_IA64
513	case BCJ_IA64:
514		filtered = bcj_ia64(s, buf, size);
515		break;
516#endif
517#ifdef XZ_DEC_ARM
518	case BCJ_ARM:
519		filtered = bcj_arm(s, buf, size);
520		break;
521#endif
522#ifdef XZ_DEC_ARMTHUMB
523	case BCJ_ARMTHUMB:
524		filtered = bcj_armthumb(s, buf, size);
525		break;
526#endif
527#ifdef XZ_DEC_SPARC
528	case BCJ_SPARC:
529		filtered = bcj_sparc(s, buf, size);
530		break;
531#endif
532#ifdef XZ_DEC_ARM64
533	case BCJ_ARM64:
534		filtered = bcj_arm64(s, buf, size);
535		break;
536#endif
537#ifdef XZ_DEC_RISCV
538	case BCJ_RISCV:
539		filtered = bcj_riscv(s, buf, size);
540		break;
541#endif
542	default:
543		/* Never reached but silence compiler warnings. */
544		filtered = 0;
545		break;
546	}
547
548	*pos += filtered;
549	s->pos += filtered;
550}
551
552/*
553 * Flush pending filtered data from temp to the output buffer.
554 * Move the remaining mixture of possibly filtered and unfiltered
555 * data to the beginning of temp.
556 */
557static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
558{
559	size_t copy_size;
560
561	copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
562	memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
563	b->out_pos += copy_size;
564
565	s->temp.filtered -= copy_size;
566	s->temp.size -= copy_size;
567	memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
568}
569
570/*
571 * The BCJ filter functions are primitive in sense that they process the
572 * data in chunks of 1-16 bytes. To hide this issue, this function does
573 * some buffering.
574 */
575enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2,
576			   struct xz_buf *b)
 
577{
578	size_t out_start;
579
580	/*
581	 * Flush pending already filtered data to the output buffer. Return
582	 * immediately if we couldn't flush everything, or if the next
583	 * filter in the chain had already returned XZ_STREAM_END.
584	 */
585	if (s->temp.filtered > 0) {
586		bcj_flush(s, b);
587		if (s->temp.filtered > 0)
588			return XZ_OK;
589
590		if (s->ret == XZ_STREAM_END)
591			return XZ_STREAM_END;
592	}
593
594	/*
595	 * If we have more output space than what is currently pending in
596	 * temp, copy the unfiltered data from temp to the output buffer
597	 * and try to fill the output buffer by decoding more data from the
598	 * next filter in the chain. Apply the BCJ filter on the new data
599	 * in the output buffer. If everything cannot be filtered, copy it
600	 * to temp and rewind the output buffer position accordingly.
601	 *
602	 * This needs to be always run when temp.size == 0 to handle a special
603	 * case where the output buffer is full and the next filter has no
604	 * more output coming but hasn't returned XZ_STREAM_END yet.
605	 */
606	if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
607		out_start = b->out_pos;
608		memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
609		b->out_pos += s->temp.size;
610
611		s->ret = xz_dec_lzma2_run(lzma2, b);
612		if (s->ret != XZ_STREAM_END
613				&& (s->ret != XZ_OK || s->single_call))
614			return s->ret;
615
616		bcj_apply(s, b->out, &out_start, b->out_pos);
617
618		/*
619		 * As an exception, if the next filter returned XZ_STREAM_END,
620		 * we can do that too, since the last few bytes that remain
621		 * unfiltered are meant to remain unfiltered.
622		 */
623		if (s->ret == XZ_STREAM_END)
624			return XZ_STREAM_END;
625
626		s->temp.size = b->out_pos - out_start;
627		b->out_pos -= s->temp.size;
628		memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
629
630		/*
631		 * If there wasn't enough input to the next filter to fill
632		 * the output buffer with unfiltered data, there's no point
633		 * to try decoding more data to temp.
634		 */
635		if (b->out_pos + s->temp.size < b->out_size)
636			return XZ_OK;
637	}
638
639	/*
640	 * We have unfiltered data in temp. If the output buffer isn't full
641	 * yet, try to fill the temp buffer by decoding more data from the
642	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
643	 * fill the actual output buffer by copying filtered data from temp.
644	 * A mix of filtered and unfiltered data may be left in temp; it will
645	 * be taken care on the next call to this function.
646	 */
647	if (b->out_pos < b->out_size) {
648		/* Make b->out{,_pos,_size} temporarily point to s->temp. */
649		s->out = b->out;
650		s->out_pos = b->out_pos;
651		s->out_size = b->out_size;
652		b->out = s->temp.buf;
653		b->out_pos = s->temp.size;
654		b->out_size = sizeof(s->temp.buf);
655
656		s->ret = xz_dec_lzma2_run(lzma2, b);
657
658		s->temp.size = b->out_pos;
659		b->out = s->out;
660		b->out_pos = s->out_pos;
661		b->out_size = s->out_size;
662
663		if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
664			return s->ret;
665
666		bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
667
668		/*
669		 * If the next filter returned XZ_STREAM_END, we mark that
670		 * everything is filtered, since the last unfiltered bytes
671		 * of the stream are meant to be left as is.
672		 */
673		if (s->ret == XZ_STREAM_END)
674			s->temp.filtered = s->temp.size;
675
676		bcj_flush(s, b);
677		if (s->temp.filtered > 0)
678			return XZ_OK;
679	}
680
681	return s->ret;
682}
683
684struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
685{
686	struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
687	if (s != NULL)
688		s->single_call = single_call;
689
690	return s;
691}
692
693enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
694{
695	switch (id) {
696#ifdef XZ_DEC_X86
697	case BCJ_X86:
698#endif
699#ifdef XZ_DEC_POWERPC
700	case BCJ_POWERPC:
701#endif
702#ifdef XZ_DEC_IA64
703	case BCJ_IA64:
704#endif
705#ifdef XZ_DEC_ARM
706	case BCJ_ARM:
707#endif
708#ifdef XZ_DEC_ARMTHUMB
709	case BCJ_ARMTHUMB:
710#endif
711#ifdef XZ_DEC_SPARC
712	case BCJ_SPARC:
713#endif
714#ifdef XZ_DEC_ARM64
715	case BCJ_ARM64:
716#endif
717#ifdef XZ_DEC_RISCV
718	case BCJ_RISCV:
719#endif
720		break;
721
722	default:
723		/* Unsupported Filter ID */
724		return XZ_OPTIONS_ERROR;
725	}
726
727	s->type = id;
728	s->ret = XZ_OK;
729	s->pos = 0;
730	s->x86_prev_mask = 0;
731	s->temp.filtered = 0;
732	s->temp.size = 0;
733
734	return XZ_OK;
735}
736
737#endif