xz_dec_lzma2.c - lib/xz/xz_dec_lzma2.c - Linux diff v5.14.15

   1/*
   2 * LZMA2 decoder
   3 *
   4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
   5 *          Igor Pavlov <https://7-zip.org/>
   6 *
   7 * This file has been put into the public domain.
   8 * You can do whatever you want with this file.
   9 */
  10
  11#include "xz_private.h"
  12#include "xz_lzma2.h"
  13
  14/*
  15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
  16 */
  17#define RC_INIT_BYTES 5
  18
  19/*
  20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
  21 * The worst case is that we decode 22 bits using probabilities and 26
  22 * direct bits. This may decode at maximum of 20 bytes of input. However,
  23 * lzma_main() does an extra normalization before returning, thus we
  24 * need to put 21 here.
  25 */
  26#define LZMA_IN_REQUIRED 21
  27
  28/*
  29 * Dictionary (history buffer)
  30 *
  31 * These are always true:
  32 *    start <= pos <= full <= end
  33 *    pos <= limit <= end
  34 *
  35 * In multi-call mode, also these are true:
  36 *    end == size
  37 *    size <= size_max
  38 *    allocated <= size
  39 *
  40 * Most of these variables are size_t to support single-call mode,
  41 * in which the dictionary variables address the actual output
  42 * buffer directly.
  43 */
  44struct dictionary {
  45	/* Beginning of the history buffer */
  46	uint8_t *buf;
  47
  48	/* Old position in buf (before decoding more data) */
  49	size_t start;
  50
  51	/* Position in buf */
  52	size_t pos;
  53
  54	/*
  55	 * How full dictionary is. This is used to detect corrupt input that
  56	 * would read beyond the beginning of the uncompressed stream.
  57	 */
  58	size_t full;
  59
  60	/* Write limit; we don't write to buf[limit] or later bytes. */
  61	size_t limit;
  62
  63	/*
  64	 * End of the dictionary buffer. In multi-call mode, this is
  65	 * the same as the dictionary size. In single-call mode, this
  66	 * indicates the size of the output buffer.
  67	 */
  68	size_t end;
  69
  70	/*
  71	 * Size of the dictionary as specified in Block Header. This is used
  72	 * together with "full" to detect corrupt input that would make us
  73	 * read beyond the beginning of the uncompressed stream.
  74	 */
  75	uint32_t size;
  76
  77	/*
  78	 * Maximum allowed dictionary size in multi-call mode.
  79	 * This is ignored in single-call mode.
  80	 */
  81	uint32_t size_max;
  82
  83	/*
  84	 * Amount of memory currently allocated for the dictionary.
  85	 * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
  86	 * size_max is always the same as the allocated size.)
  87	 */
  88	uint32_t allocated;
  89
  90	/* Operation mode */
  91	enum xz_mode mode;
  92};
  93
  94/* Range decoder */
  95struct rc_dec {
  96	uint32_t range;
  97	uint32_t code;
  98
  99	/*
 100	 * Number of initializing bytes remaining to be read
 101	 * by rc_read_init().
 102	 */
 103	uint32_t init_bytes_left;
 104
 105	/*
 106	 * Buffer from which we read our input. It can be either
 107	 * temp.buf or the caller-provided input buffer.
 108	 */
 109	const uint8_t *in;
 110	size_t in_pos;
 111	size_t in_limit;
 112};
 113
 114/* Probabilities for a length decoder. */
 115struct lzma_len_dec {
 116	/* Probability of match length being at least 10 */
 117	uint16_t choice;
 118
 119	/* Probability of match length being at least 18 */
 120	uint16_t choice2;
 121
 122	/* Probabilities for match lengths 2-9 */
 123	uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
 124
 125	/* Probabilities for match lengths 10-17 */
 126	uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
 127
 128	/* Probabilities for match lengths 18-273 */
 129	uint16_t high[LEN_HIGH_SYMBOLS];
 130};
 131
 132struct lzma_dec {
 133	/* Distances of latest four matches */
 134	uint32_t rep0;
 135	uint32_t rep1;
 136	uint32_t rep2;
 137	uint32_t rep3;
 138
 139	/* Types of the most recently seen LZMA symbols */
 140	enum lzma_state state;
 141
 142	/*
 143	 * Length of a match. This is updated so that dict_repeat can
 144	 * be called again to finish repeating the whole match.
 145	 */
 146	uint32_t len;
 147
 148	/*
 149	 * LZMA properties or related bit masks (number of literal
 150	 * context bits, a mask derived from the number of literal
 151	 * position bits, and a mask derived from the number
 152	 * position bits)
 153	 */
 154	uint32_t lc;
 155	uint32_t literal_pos_mask; /* (1 << lp) - 1 */
 156	uint32_t pos_mask;         /* (1 << pb) - 1 */
 157
 158	/* If 1, it's a match. Otherwise it's a single 8-bit literal. */
 159	uint16_t is_match[STATES][POS_STATES_MAX];
 160
 161	/* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
 162	uint16_t is_rep[STATES];
 163
 164	/*
 165	 * If 0, distance of a repeated match is rep0.
 166	 * Otherwise check is_rep1.
 167	 */
 168	uint16_t is_rep0[STATES];
 169
 170	/*
 171	 * If 0, distance of a repeated match is rep1.
 172	 * Otherwise check is_rep2.
 173	 */
 174	uint16_t is_rep1[STATES];
 175
 176	/* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
 177	uint16_t is_rep2[STATES];
 178
 179	/*
 180	 * If 1, the repeated match has length of one byte. Otherwise
 181	 * the length is decoded from rep_len_decoder.
 182	 */
 183	uint16_t is_rep0_long[STATES][POS_STATES_MAX];
 184
 185	/*
 186	 * Probability tree for the highest two bits of the match
 187	 * distance. There is a separate probability tree for match
 188	 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
 189	 */
 190	uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
 191
 192	/*
 193	 * Probility trees for additional bits for match distance
 194	 * when the distance is in the range [4, 127].
 195	 */
 196	uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
 197
 198	/*
 199	 * Probability tree for the lowest four bits of a match
 200	 * distance that is equal to or greater than 128.
 201	 */
 202	uint16_t dist_align[ALIGN_SIZE];
 203
 204	/* Length of a normal match */
 205	struct lzma_len_dec match_len_dec;
 206
 207	/* Length of a repeated match */
 208	struct lzma_len_dec rep_len_dec;
 209
 210	/* Probabilities of literals */
 211	uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
 212};
 213
 214struct lzma2_dec {
 215	/* Position in xz_dec_lzma2_run(). */
 216	enum lzma2_seq {
 217		SEQ_CONTROL,
 218		SEQ_UNCOMPRESSED_1,
 219		SEQ_UNCOMPRESSED_2,
 220		SEQ_COMPRESSED_0,
 221		SEQ_COMPRESSED_1,
 222		SEQ_PROPERTIES,
 223		SEQ_LZMA_PREPARE,
 224		SEQ_LZMA_RUN,
 225		SEQ_COPY
 226	} sequence;
 227
 228	/* Next position after decoding the compressed size of the chunk. */
 229	enum lzma2_seq next_sequence;
 230
 231	/* Uncompressed size of LZMA chunk (2 MiB at maximum) */
 232	uint32_t uncompressed;
 233
 234	/*
 235	 * Compressed size of LZMA chunk or compressed/uncompressed
 236	 * size of uncompressed chunk (64 KiB at maximum)
 237	 */
 238	uint32_t compressed;
 239
 240	/*
 241	 * True if dictionary reset is needed. This is false before
 242	 * the first chunk (LZMA or uncompressed).
 243	 */
 244	bool need_dict_reset;
 245
 246	/*
 247	 * True if new LZMA properties are needed. This is false
 248	 * before the first LZMA chunk.
 249	 */
 250	bool need_props;
 
 
 
 
 251};
 252
 253struct xz_dec_lzma2 {
 254	/*
 255	 * The order below is important on x86 to reduce code size and
 256	 * it shouldn't hurt on other platforms. Everything up to and
 257	 * including lzma.pos_mask are in the first 128 bytes on x86-32,
 258	 * which allows using smaller instructions to access those
 259	 * variables. On x86-64, fewer variables fit into the first 128
 260	 * bytes, but this is still the best order without sacrificing
 261	 * the readability by splitting the structures.
 262	 */
 263	struct rc_dec rc;
 264	struct dictionary dict;
 265	struct lzma2_dec lzma2;
 266	struct lzma_dec lzma;
 267
 268	/*
 269	 * Temporary buffer which holds small number of input bytes between
 270	 * decoder calls. See lzma2_lzma() for details.
 271	 */
 272	struct {
 273		uint32_t size;
 274		uint8_t buf[3 * LZMA_IN_REQUIRED];
 275	} temp;
 276};
 277
 278/**************
 279 * Dictionary *
 280 **************/
 281
 282/*
 283 * Reset the dictionary state. When in single-call mode, set up the beginning
 284 * of the dictionary to point to the actual output buffer.
 285 */
 286static void dict_reset(struct dictionary *dict, struct xz_buf *b)
 287{
 288	if (DEC_IS_SINGLE(dict->mode)) {
 289		dict->buf = b->out + b->out_pos;
 290		dict->end = b->out_size - b->out_pos;
 291	}
 292
 293	dict->start = 0;
 294	dict->pos = 0;
 295	dict->limit = 0;
 296	dict->full = 0;
 297}
 298
 299/* Set dictionary write limit */
 300static void dict_limit(struct dictionary *dict, size_t out_max)
 301{
 302	if (dict->end - dict->pos <= out_max)
 303		dict->limit = dict->end;
 304	else
 305		dict->limit = dict->pos + out_max;
 306}
 307
 308/* Return true if at least one byte can be written into the dictionary. */
 309static inline bool dict_has_space(const struct dictionary *dict)
 310{
 311	return dict->pos < dict->limit;
 312}
 313
 314/*
 315 * Get a byte from the dictionary at the given distance. The distance is
 316 * assumed to valid, or as a special case, zero when the dictionary is
 317 * still empty. This special case is needed for single-call decoding to
 318 * avoid writing a '\0' to the end of the destination buffer.
 319 */
 320static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
 321{
 322	size_t offset = dict->pos - dist - 1;
 323
 324	if (dist >= dict->pos)
 325		offset += dict->end;
 326
 327	return dict->full > 0 ? dict->buf[offset] : 0;
 328}
 329
 330/*
 331 * Put one byte into the dictionary. It is assumed that there is space for it.
 332 */
 333static inline void dict_put(struct dictionary *dict, uint8_t byte)
 334{
 335	dict->buf[dict->pos++] = byte;
 336
 337	if (dict->full < dict->pos)
 338		dict->full = dict->pos;
 339}
 340
 341/*
 342 * Repeat given number of bytes from the given distance. If the distance is
 343 * invalid, false is returned. On success, true is returned and *len is
 344 * updated to indicate how many bytes were left to be repeated.
 345 */
 346static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
 347{
 348	size_t back;
 349	uint32_t left;
 350
 351	if (dist >= dict->full || dist >= dict->size)
 352		return false;
 353
 354	left = min_t(size_t, dict->limit - dict->pos, *len);
 355	*len -= left;
 356
 357	back = dict->pos - dist - 1;
 358	if (dist >= dict->pos)
 359		back += dict->end;
 360
 361	do {
 362		dict->buf[dict->pos++] = dict->buf[back++];
 363		if (back == dict->end)
 364			back = 0;
 365	} while (--left > 0);
 366
 367	if (dict->full < dict->pos)
 368		dict->full = dict->pos;
 369
 370	return true;
 371}
 372
 373/* Copy uncompressed data as is from input to dictionary and output buffers. */
 374static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
 375			      uint32_t *left)
 376{
 377	size_t copy_size;
 378
 379	while (*left > 0 && b->in_pos < b->in_size
 380			&& b->out_pos < b->out_size) {
 381		copy_size = min(b->in_size - b->in_pos,
 382				b->out_size - b->out_pos);
 383		if (copy_size > dict->end - dict->pos)
 384			copy_size = dict->end - dict->pos;
 385		if (copy_size > *left)
 386			copy_size = *left;
 387
 388		*left -= copy_size;
 389
 390		memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
 
 
 
 
 
 
 
 391		dict->pos += copy_size;
 392
 393		if (dict->full < dict->pos)
 394			dict->full = dict->pos;
 395
 396		if (DEC_IS_MULTI(dict->mode)) {
 397			if (dict->pos == dict->end)
 398				dict->pos = 0;
 399
 400			memcpy(b->out + b->out_pos, b->in + b->in_pos,
 
 
 
 
 401					copy_size);
 402		}
 403
 404		dict->start = dict->pos;
 405
 406		b->out_pos += copy_size;
 407		b->in_pos += copy_size;
 408	}
 409}
 410
 
 
 
 
 
 
 411/*
 412 * Flush pending data from dictionary to b->out. It is assumed that there is
 413 * enough space in b->out. This is guaranteed because caller uses dict_limit()
 414 * before decoding data into the dictionary.
 415 */
 416static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
 417{
 418	size_t copy_size = dict->pos - dict->start;
 419
 420	if (DEC_IS_MULTI(dict->mode)) {
 421		if (dict->pos == dict->end)
 422			dict->pos = 0;
 423
 424		memcpy(b->out + b->out_pos, dict->buf + dict->start,
 425				copy_size);
 
 
 
 
 
 
 
 
 
 
 
 426	}
 427
 428	dict->start = dict->pos;
 429	b->out_pos += copy_size;
 430	return copy_size;
 431}
 432
 433/*****************
 434 * Range decoder *
 435 *****************/
 436
 437/* Reset the range decoder. */
 438static void rc_reset(struct rc_dec *rc)
 439{
 440	rc->range = (uint32_t)-1;
 441	rc->code = 0;
 442	rc->init_bytes_left = RC_INIT_BYTES;
 443}
 444
 445/*
 446 * Read the first five initial bytes into rc->code if they haven't been
 447 * read already. (Yes, the first byte gets completely ignored.)
 448 */
 449static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)
 450{
 451	while (rc->init_bytes_left > 0) {
 452		if (b->in_pos == b->in_size)
 453			return false;
 454
 455		rc->code = (rc->code << 8) + b->in[b->in_pos++];
 456		--rc->init_bytes_left;
 457	}
 458
 459	return true;
 460}
 461
 462/* Return true if there may not be enough input for the next decoding loop. */
 463static inline bool rc_limit_exceeded(const struct rc_dec *rc)
 464{
 465	return rc->in_pos > rc->in_limit;
 466}
 467
 468/*
 469 * Return true if it is possible (from point of view of range decoder) that
 470 * we have reached the end of the LZMA chunk.
 471 */
 472static inline bool rc_is_finished(const struct rc_dec *rc)
 473{
 474	return rc->code == 0;
 475}
 476
 477/* Read the next input byte if needed. */
 478static __always_inline void rc_normalize(struct rc_dec *rc)
 479{
 480	if (rc->range < RC_TOP_VALUE) {
 481		rc->range <<= RC_SHIFT_BITS;
 482		rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
 483	}
 484}
 485
 486/*
 487 * Decode one bit. In some versions, this function has been split in three
 488 * functions so that the compiler is supposed to be able to more easily avoid
 489 * an extra branch. In this particular version of the LZMA decoder, this
 490 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
 491 * on x86). Using a non-splitted version results in nicer looking code too.
 492 *
 493 * NOTE: This must return an int. Do not make it return a bool or the speed
 494 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
 495 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
 496 */
 497static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
 498{
 499	uint32_t bound;
 500	int bit;
 501
 502	rc_normalize(rc);
 503	bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
 504	if (rc->code < bound) {
 505		rc->range = bound;
 506		*prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
 507		bit = 0;
 508	} else {
 509		rc->range -= bound;
 510		rc->code -= bound;
 511		*prob -= *prob >> RC_MOVE_BITS;
 512		bit = 1;
 513	}
 514
 515	return bit;
 516}
 517
 518/* Decode a bittree starting from the most significant bit. */
 519static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
 520					   uint16_t *probs, uint32_t limit)
 521{
 522	uint32_t symbol = 1;
 523
 524	do {
 525		if (rc_bit(rc, &probs[symbol]))
 526			symbol = (symbol << 1) + 1;
 527		else
 528			symbol <<= 1;
 529	} while (symbol < limit);
 530
 531	return symbol;
 532}
 533
 534/* Decode a bittree starting from the least significant bit. */
 535static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
 536					       uint16_t *probs,
 537					       uint32_t *dest, uint32_t limit)
 538{
 539	uint32_t symbol = 1;
 540	uint32_t i = 0;
 541
 542	do {
 543		if (rc_bit(rc, &probs[symbol])) {
 544			symbol = (symbol << 1) + 1;
 545			*dest += 1 << i;
 546		} else {
 547			symbol <<= 1;
 548		}
 549	} while (++i < limit);
 550}
 551
 552/* Decode direct bits (fixed fifty-fifty probability) */
 553static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
 554{
 555	uint32_t mask;
 556
 557	do {
 558		rc_normalize(rc);
 559		rc->range >>= 1;
 560		rc->code -= rc->range;
 561		mask = (uint32_t)0 - (rc->code >> 31);
 562		rc->code += rc->range & mask;
 563		*dest = (*dest << 1) + (mask + 1);
 564	} while (--limit > 0);
 565}
 566
 567/********
 568 * LZMA *
 569 ********/
 570
 571/* Get pointer to literal coder probability array. */
 572static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
 573{
 574	uint32_t prev_byte = dict_get(&s->dict, 0);
 575	uint32_t low = prev_byte >> (8 - s->lzma.lc);
 576	uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
 577	return s->lzma.literal[low + high];
 578}
 579
 580/* Decode a literal (one 8-bit byte) */
 581static void lzma_literal(struct xz_dec_lzma2 *s)
 582{
 583	uint16_t *probs;
 584	uint32_t symbol;
 585	uint32_t match_byte;
 586	uint32_t match_bit;
 587	uint32_t offset;
 588	uint32_t i;
 589
 590	probs = lzma_literal_probs(s);
 591
 592	if (lzma_state_is_literal(s->lzma.state)) {
 593		symbol = rc_bittree(&s->rc, probs, 0x100);
 594	} else {
 595		symbol = 1;
 596		match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
 597		offset = 0x100;
 598
 599		do {
 600			match_bit = match_byte & offset;
 601			match_byte <<= 1;
 602			i = offset + match_bit + symbol;
 603
 604			if (rc_bit(&s->rc, &probs[i])) {
 605				symbol = (symbol << 1) + 1;
 606				offset &= match_bit;
 607			} else {
 608				symbol <<= 1;
 609				offset &= ~match_bit;
 610			}
 611		} while (symbol < 0x100);
 612	}
 613
 614	dict_put(&s->dict, (uint8_t)symbol);
 615	lzma_state_literal(&s->lzma.state);
 616}
 617
 618/* Decode the length of the match into s->lzma.len. */
 619static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
 620		     uint32_t pos_state)
 621{
 622	uint16_t *probs;
 623	uint32_t limit;
 624
 625	if (!rc_bit(&s->rc, &l->choice)) {
 626		probs = l->low[pos_state];
 627		limit = LEN_LOW_SYMBOLS;
 628		s->lzma.len = MATCH_LEN_MIN;
 629	} else {
 630		if (!rc_bit(&s->rc, &l->choice2)) {
 631			probs = l->mid[pos_state];
 632			limit = LEN_MID_SYMBOLS;
 633			s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
 634		} else {
 635			probs = l->high;
 636			limit = LEN_HIGH_SYMBOLS;
 637			s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
 638					+ LEN_MID_SYMBOLS;
 639		}
 640	}
 641
 642	s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
 643}
 644
 645/* Decode a match. The distance will be stored in s->lzma.rep0. */
 646static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
 647{
 648	uint16_t *probs;
 649	uint32_t dist_slot;
 650	uint32_t limit;
 651
 652	lzma_state_match(&s->lzma.state);
 653
 654	s->lzma.rep3 = s->lzma.rep2;
 655	s->lzma.rep2 = s->lzma.rep1;
 656	s->lzma.rep1 = s->lzma.rep0;
 657
 658	lzma_len(s, &s->lzma.match_len_dec, pos_state);
 659
 660	probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
 661	dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
 662
 663	if (dist_slot < DIST_MODEL_START) {
 664		s->lzma.rep0 = dist_slot;
 665	} else {
 666		limit = (dist_slot >> 1) - 1;
 667		s->lzma.rep0 = 2 + (dist_slot & 1);
 668
 669		if (dist_slot < DIST_MODEL_END) {
 670			s->lzma.rep0 <<= limit;
 671			probs = s->lzma.dist_special + s->lzma.rep0
 672					- dist_slot - 1;
 673			rc_bittree_reverse(&s->rc, probs,
 674					&s->lzma.rep0, limit);
 675		} else {
 676			rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
 677			s->lzma.rep0 <<= ALIGN_BITS;
 678			rc_bittree_reverse(&s->rc, s->lzma.dist_align,
 679					&s->lzma.rep0, ALIGN_BITS);
 680		}
 681	}
 682}
 683
 684/*
 685 * Decode a repeated match. The distance is one of the four most recently
 686 * seen matches. The distance will be stored in s->lzma.rep0.
 687 */
 688static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
 689{
 690	uint32_t tmp;
 691
 692	if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
 693		if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
 694				s->lzma.state][pos_state])) {
 695			lzma_state_short_rep(&s->lzma.state);
 696			s->lzma.len = 1;
 697			return;
 698		}
 699	} else {
 700		if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
 701			tmp = s->lzma.rep1;
 702		} else {
 703			if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
 704				tmp = s->lzma.rep2;
 705			} else {
 706				tmp = s->lzma.rep3;
 707				s->lzma.rep3 = s->lzma.rep2;
 708			}
 709
 710			s->lzma.rep2 = s->lzma.rep1;
 711		}
 712
 713		s->lzma.rep1 = s->lzma.rep0;
 714		s->lzma.rep0 = tmp;
 715	}
 716
 717	lzma_state_long_rep(&s->lzma.state);
 718	lzma_len(s, &s->lzma.rep_len_dec, pos_state);
 719}
 720
 721/* LZMA decoder core */
 722static bool lzma_main(struct xz_dec_lzma2 *s)
 723{
 724	uint32_t pos_state;
 725
 726	/*
 727	 * If the dictionary was reached during the previous call, try to
 728	 * finish the possibly pending repeat in the dictionary.
 729	 */
 730	if (dict_has_space(&s->dict) && s->lzma.len > 0)
 731		dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
 732
 733	/*
 734	 * Decode more LZMA symbols. One iteration may consume up to
 735	 * LZMA_IN_REQUIRED - 1 bytes.
 736	 */
 737	while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
 738		pos_state = s->dict.pos & s->lzma.pos_mask;
 739
 740		if (!rc_bit(&s->rc, &s->lzma.is_match[
 741				s->lzma.state][pos_state])) {
 742			lzma_literal(s);
 743		} else {
 744			if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
 745				lzma_rep_match(s, pos_state);
 746			else
 747				lzma_match(s, pos_state);
 748
 749			if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
 750				return false;
 751		}
 752	}
 753
 754	/*
 755	 * Having the range decoder always normalized when we are outside
 756	 * this function makes it easier to correctly handle end of the chunk.
 757	 */
 758	rc_normalize(&s->rc);
 759
 760	return true;
 761}
 762
 763/*
 764 * Reset the LZMA decoder and range decoder state. Dictionary is not reset
 765 * here, because LZMA state may be reset without resetting the dictionary.
 766 */
 767static void lzma_reset(struct xz_dec_lzma2 *s)
 768{
 769	uint16_t *probs;
 770	size_t i;
 771
 772	s->lzma.state = STATE_LIT_LIT;
 773	s->lzma.rep0 = 0;
 774	s->lzma.rep1 = 0;
 775	s->lzma.rep2 = 0;
 776	s->lzma.rep3 = 0;
 
 777
 778	/*
 779	 * All probabilities are initialized to the same value. This hack
 780	 * makes the code smaller by avoiding a separate loop for each
 781	 * probability array.
 782	 *
 783	 * This could be optimized so that only that part of literal
 784	 * probabilities that are actually required. In the common case
 785	 * we would write 12 KiB less.
 786	 */
 787	probs = s->lzma.is_match[0];
 788	for (i = 0; i < PROBS_TOTAL; ++i)
 789		probs[i] = RC_BIT_MODEL_TOTAL / 2;
 790
 791	rc_reset(&s->rc);
 792}
 793
 794/*
 795 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
 796 * from the decoded lp and pb values. On success, the LZMA decoder state is
 797 * reset and true is returned.
 798 */
 799static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
 800{
 801	if (props > (4 * 5 + 4) * 9 + 8)
 802		return false;
 803
 804	s->lzma.pos_mask = 0;
 805	while (props >= 9 * 5) {
 806		props -= 9 * 5;
 807		++s->lzma.pos_mask;
 808	}
 809
 810	s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
 811
 812	s->lzma.literal_pos_mask = 0;
 813	while (props >= 9) {
 814		props -= 9;
 815		++s->lzma.literal_pos_mask;
 816	}
 817
 818	s->lzma.lc = props;
 819
 820	if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
 821		return false;
 822
 823	s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
 824
 825	lzma_reset(s);
 826
 827	return true;
 828}
 829
 830/*********
 831 * LZMA2 *
 832 *********/
 833
 834/*
 835 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
 836 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
 837 * wrapper function takes care of making the LZMA decoder's assumption safe.
 838 *
 839 * As long as there is plenty of input left to be decoded in the current LZMA
 840 * chunk, we decode directly from the caller-supplied input buffer until
 841 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
 842 * s->temp.buf, which (hopefully) gets filled on the next call to this
 843 * function. We decode a few bytes from the temporary buffer so that we can
 844 * continue decoding from the caller-supplied input buffer again.
 845 */
 846static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
 847{
 848	size_t in_avail;
 849	uint32_t tmp;
 850
 851	in_avail = b->in_size - b->in_pos;
 852	if (s->temp.size > 0 || s->lzma2.compressed == 0) {
 853		tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
 854		if (tmp > s->lzma2.compressed - s->temp.size)
 855			tmp = s->lzma2.compressed - s->temp.size;
 856		if (tmp > in_avail)
 857			tmp = in_avail;
 858
 859		memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
 860
 861		if (s->temp.size + tmp == s->lzma2.compressed) {
 862			memzero(s->temp.buf + s->temp.size + tmp,
 863					sizeof(s->temp.buf)
 864						- s->temp.size - tmp);
 865			s->rc.in_limit = s->temp.size + tmp;
 866		} else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
 867			s->temp.size += tmp;
 868			b->in_pos += tmp;
 869			return true;
 870		} else {
 871			s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
 872		}
 873
 874		s->rc.in = s->temp.buf;
 875		s->rc.in_pos = 0;
 876
 877		if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
 878			return false;
 879
 880		s->lzma2.compressed -= s->rc.in_pos;
 881
 882		if (s->rc.in_pos < s->temp.size) {
 883			s->temp.size -= s->rc.in_pos;
 884			memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
 885					s->temp.size);
 886			return true;
 887		}
 888
 889		b->in_pos += s->rc.in_pos - s->temp.size;
 890		s->temp.size = 0;
 891	}
 892
 893	in_avail = b->in_size - b->in_pos;
 894	if (in_avail >= LZMA_IN_REQUIRED) {
 895		s->rc.in = b->in;
 896		s->rc.in_pos = b->in_pos;
 897
 898		if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
 899			s->rc.in_limit = b->in_pos + s->lzma2.compressed;
 900		else
 901			s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
 902
 903		if (!lzma_main(s))
 904			return false;
 905
 906		in_avail = s->rc.in_pos - b->in_pos;
 907		if (in_avail > s->lzma2.compressed)
 908			return false;
 909
 910		s->lzma2.compressed -= in_avail;
 911		b->in_pos = s->rc.in_pos;
 912	}
 913
 914	in_avail = b->in_size - b->in_pos;
 915	if (in_avail < LZMA_IN_REQUIRED) {
 916		if (in_avail > s->lzma2.compressed)
 917			in_avail = s->lzma2.compressed;
 918
 919		memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
 920		s->temp.size = in_avail;
 921		b->in_pos += in_avail;
 922	}
 923
 924	return true;
 925}
 926
 927/*
 928 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
 929 * decoding or copying of uncompressed chunks to other functions.
 930 */
 931XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 932				       struct xz_buf *b)
 933{
 934	uint32_t tmp;
 935
 936	while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
 937		switch (s->lzma2.sequence) {
 938		case SEQ_CONTROL:
 939			/*
 940			 * LZMA2 control byte
 941			 *
 942			 * Exact values:
 943			 *   0x00   End marker
 944			 *   0x01   Dictionary reset followed by
 945			 *          an uncompressed chunk
 946			 *   0x02   Uncompressed chunk (no dictionary reset)
 947			 *
 948			 * Highest three bits (s->control & 0xE0):
 949			 *   0xE0   Dictionary reset, new properties and state
 950			 *          reset, followed by LZMA compressed chunk
 951			 *   0xC0   New properties and state reset, followed
 952			 *          by LZMA compressed chunk (no dictionary
 953			 *          reset)
 954			 *   0xA0   State reset using old properties,
 955			 *          followed by LZMA compressed chunk (no
 956			 *          dictionary reset)
 957			 *   0x80   LZMA chunk (no dictionary or state reset)
 958			 *
 959			 * For LZMA compressed chunks, the lowest five bits
 960			 * (s->control & 1F) are the highest bits of the
 961			 * uncompressed size (bits 16-20).
 962			 *
 963			 * A new LZMA2 stream must begin with a dictionary
 964			 * reset. The first LZMA chunk must set new
 965			 * properties and reset the LZMA state.
 966			 *
 967			 * Values that don't match anything described above
 968			 * are invalid and we return XZ_DATA_ERROR.
 969			 */
 970			tmp = b->in[b->in_pos++];
 971
 972			if (tmp == 0x00)
 973				return XZ_STREAM_END;
 974
 975			if (tmp >= 0xE0 || tmp == 0x01) {
 976				s->lzma2.need_props = true;
 977				s->lzma2.need_dict_reset = false;
 978				dict_reset(&s->dict, b);
 979			} else if (s->lzma2.need_dict_reset) {
 980				return XZ_DATA_ERROR;
 981			}
 982
 983			if (tmp >= 0x80) {
 984				s->lzma2.uncompressed = (tmp & 0x1F) << 16;
 985				s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
 986
 987				if (tmp >= 0xC0) {
 988					/*
 989					 * When there are new properties,
 990					 * state reset is done at
 991					 * SEQ_PROPERTIES.
 992					 */
 993					s->lzma2.need_props = false;
 994					s->lzma2.next_sequence
 995							= SEQ_PROPERTIES;
 996
 997				} else if (s->lzma2.need_props) {
 998					return XZ_DATA_ERROR;
 999
1000				} else {
1001					s->lzma2.next_sequence
1002							= SEQ_LZMA_PREPARE;
1003					if (tmp >= 0xA0)
1004						lzma_reset(s);
1005				}
1006			} else {
1007				if (tmp > 0x02)
1008					return XZ_DATA_ERROR;
1009
1010				s->lzma2.sequence = SEQ_COMPRESSED_0;
1011				s->lzma2.next_sequence = SEQ_COPY;
1012			}
1013
1014			break;
1015
1016		case SEQ_UNCOMPRESSED_1:
1017			s->lzma2.uncompressed
1018					+= (uint32_t)b->in[b->in_pos++] << 8;
1019			s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1020			break;
1021
1022		case SEQ_UNCOMPRESSED_2:
1023			s->lzma2.uncompressed
1024					+= (uint32_t)b->in[b->in_pos++] + 1;
1025			s->lzma2.sequence = SEQ_COMPRESSED_0;
1026			break;
1027
1028		case SEQ_COMPRESSED_0:
1029			s->lzma2.compressed
1030					= (uint32_t)b->in[b->in_pos++] << 8;
1031			s->lzma2.sequence = SEQ_COMPRESSED_1;
1032			break;
1033
1034		case SEQ_COMPRESSED_1:
1035			s->lzma2.compressed
1036					+= (uint32_t)b->in[b->in_pos++] + 1;
1037			s->lzma2.sequence = s->lzma2.next_sequence;
1038			break;
1039
1040		case SEQ_PROPERTIES:
1041			if (!lzma_props(s, b->in[b->in_pos++]))
1042				return XZ_DATA_ERROR;
1043
1044			s->lzma2.sequence = SEQ_LZMA_PREPARE;
1045
1046			fallthrough;
1047
1048		case SEQ_LZMA_PREPARE:
1049			if (s->lzma2.compressed < RC_INIT_BYTES)
1050				return XZ_DATA_ERROR;
1051
1052			if (!rc_read_init(&s->rc, b))
1053				return XZ_OK;
1054
1055			s->lzma2.compressed -= RC_INIT_BYTES;
1056			s->lzma2.sequence = SEQ_LZMA_RUN;
1057
1058			fallthrough;
1059
1060		case SEQ_LZMA_RUN:
1061			/*
1062			 * Set dictionary limit to indicate how much we want
1063			 * to be encoded at maximum. Decode new data into the
1064			 * dictionary. Flush the new data from dictionary to
1065			 * b->out. Check if we finished decoding this chunk.
1066			 * In case the dictionary got full but we didn't fill
1067			 * the output buffer yet, we may run this loop
1068			 * multiple times without changing s->lzma2.sequence.
1069			 */
1070			dict_limit(&s->dict, min_t(size_t,
1071					b->out_size - b->out_pos,
1072					s->lzma2.uncompressed));
1073			if (!lzma2_lzma(s, b))
1074				return XZ_DATA_ERROR;
1075
1076			s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1077
1078			if (s->lzma2.uncompressed == 0) {
1079				if (s->lzma2.compressed > 0 || s->lzma.len > 0
1080						|| !rc_is_finished(&s->rc))
1081					return XZ_DATA_ERROR;
1082
1083				rc_reset(&s->rc);
1084				s->lzma2.sequence = SEQ_CONTROL;
1085
1086			} else if (b->out_pos == b->out_size
1087					|| (b->in_pos == b->in_size
1088						&& s->temp.size
1089						< s->lzma2.compressed)) {
1090				return XZ_OK;
1091			}
1092
1093			break;
1094
1095		case SEQ_COPY:
1096			dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1097			if (s->lzma2.compressed > 0)
1098				return XZ_OK;
1099
1100			s->lzma2.sequence = SEQ_CONTROL;
1101			break;
1102		}
1103	}
1104
1105	return XZ_OK;
1106}
1107
1108XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
1109						   uint32_t dict_max)
1110{
1111	struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
1112	if (s == NULL)
1113		return NULL;
1114
1115	s->dict.mode = mode;
1116	s->dict.size_max = dict_max;
1117
1118	if (DEC_IS_PREALLOC(mode)) {
1119		s->dict.buf = vmalloc(dict_max);
1120		if (s->dict.buf == NULL) {
1121			kfree(s);
1122			return NULL;
1123		}
1124	} else if (DEC_IS_DYNALLOC(mode)) {
1125		s->dict.buf = NULL;
1126		s->dict.allocated = 0;
1127	}
1128
1129	return s;
1130}
1131
1132XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
1133{
1134	/* This limits dictionary size to 3 GiB to keep parsing simpler. */
1135	if (props > 39)
1136		return XZ_OPTIONS_ERROR;
1137
1138	s->dict.size = 2 + (props & 1);
1139	s->dict.size <<= (props >> 1) + 11;
1140
1141	if (DEC_IS_MULTI(s->dict.mode)) {
1142		if (s->dict.size > s->dict.size_max)
1143			return XZ_MEMLIMIT_ERROR;
1144
1145		s->dict.end = s->dict.size;
1146
1147		if (DEC_IS_DYNALLOC(s->dict.mode)) {
1148			if (s->dict.allocated < s->dict.size) {
1149				s->dict.allocated = s->dict.size;
1150				vfree(s->dict.buf);
1151				s->dict.buf = vmalloc(s->dict.size);
1152				if (s->dict.buf == NULL) {
1153					s->dict.allocated = 0;
1154					return XZ_MEM_ERROR;
1155				}
1156			}
1157		}
1158	}
1159
1160	s->lzma.len = 0;
1161
1162	s->lzma2.sequence = SEQ_CONTROL;
1163	s->lzma2.need_dict_reset = true;
1164
1165	s->temp.size = 0;
1166
1167	return XZ_OK;
1168}
1169
1170XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1171{
1172	if (DEC_IS_MULTI(s->dict.mode))
1173		vfree(s->dict.buf);
1174
1175	kfree(s);
1176}

   1/*
   2 * LZMA2 decoder
   3 *
   4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
   5 *          Igor Pavlov <https://7-zip.org/>
   6 *
   7 * This file has been put into the public domain.
   8 * You can do whatever you want with this file.
   9 */
  10
  11#include "xz_private.h"
  12#include "xz_lzma2.h"
  13
  14/*
  15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
  16 */
  17#define RC_INIT_BYTES 5
  18
  19/*
  20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
  21 * The worst case is that we decode 22 bits using probabilities and 26
  22 * direct bits. This may decode at maximum of 20 bytes of input. However,
  23 * lzma_main() does an extra normalization before returning, thus we
  24 * need to put 21 here.
  25 */
  26#define LZMA_IN_REQUIRED 21
  27
  28/*
  29 * Dictionary (history buffer)
  30 *
  31 * These are always true:
  32 *    start <= pos <= full <= end
  33 *    pos <= limit <= end
  34 *
  35 * In multi-call mode, also these are true:
  36 *    end == size
  37 *    size <= size_max
  38 *    allocated <= size
  39 *
  40 * Most of these variables are size_t to support single-call mode,
  41 * in which the dictionary variables address the actual output
  42 * buffer directly.
  43 */
  44struct dictionary {
  45	/* Beginning of the history buffer */
  46	uint8_t *buf;
  47
  48	/* Old position in buf (before decoding more data) */
  49	size_t start;
  50
  51	/* Position in buf */
  52	size_t pos;
  53
  54	/*
  55	 * How full dictionary is. This is used to detect corrupt input that
  56	 * would read beyond the beginning of the uncompressed stream.
  57	 */
  58	size_t full;
  59
  60	/* Write limit; we don't write to buf[limit] or later bytes. */
  61	size_t limit;
  62
  63	/*
  64	 * End of the dictionary buffer. In multi-call mode, this is
  65	 * the same as the dictionary size. In single-call mode, this
  66	 * indicates the size of the output buffer.
  67	 */
  68	size_t end;
  69
  70	/*
  71	 * Size of the dictionary as specified in Block Header. This is used
  72	 * together with "full" to detect corrupt input that would make us
  73	 * read beyond the beginning of the uncompressed stream.
  74	 */
  75	uint32_t size;
  76
  77	/*
  78	 * Maximum allowed dictionary size in multi-call mode.
  79	 * This is ignored in single-call mode.
  80	 */
  81	uint32_t size_max;
  82
  83	/*
  84	 * Amount of memory currently allocated for the dictionary.
  85	 * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
  86	 * size_max is always the same as the allocated size.)
  87	 */
  88	uint32_t allocated;
  89
  90	/* Operation mode */
  91	enum xz_mode mode;
  92};
  93
  94/* Range decoder */
  95struct rc_dec {
  96	uint32_t range;
  97	uint32_t code;
  98
  99	/*
 100	 * Number of initializing bytes remaining to be read
 101	 * by rc_read_init().
 102	 */
 103	uint32_t init_bytes_left;
 104
 105	/*
 106	 * Buffer from which we read our input. It can be either
 107	 * temp.buf or the caller-provided input buffer.
 108	 */
 109	const uint8_t *in;
 110	size_t in_pos;
 111	size_t in_limit;
 112};
 113
 114/* Probabilities for a length decoder. */
 115struct lzma_len_dec {
 116	/* Probability of match length being at least 10 */
 117	uint16_t choice;
 118
 119	/* Probability of match length being at least 18 */
 120	uint16_t choice2;
 121
 122	/* Probabilities for match lengths 2-9 */
 123	uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
 124
 125	/* Probabilities for match lengths 10-17 */
 126	uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
 127
 128	/* Probabilities for match lengths 18-273 */
 129	uint16_t high[LEN_HIGH_SYMBOLS];
 130};
 131
 132struct lzma_dec {
 133	/* Distances of latest four matches */
 134	uint32_t rep0;
 135	uint32_t rep1;
 136	uint32_t rep2;
 137	uint32_t rep3;
 138
 139	/* Types of the most recently seen LZMA symbols */
 140	enum lzma_state state;
 141
 142	/*
 143	 * Length of a match. This is updated so that dict_repeat can
 144	 * be called again to finish repeating the whole match.
 145	 */
 146	uint32_t len;
 147
 148	/*
 149	 * LZMA properties or related bit masks (number of literal
 150	 * context bits, a mask derived from the number of literal
 151	 * position bits, and a mask derived from the number
 152	 * position bits)
 153	 */
 154	uint32_t lc;
 155	uint32_t literal_pos_mask; /* (1 << lp) - 1 */
 156	uint32_t pos_mask;         /* (1 << pb) - 1 */
 157
 158	/* If 1, it's a match. Otherwise it's a single 8-bit literal. */
 159	uint16_t is_match[STATES][POS_STATES_MAX];
 160
 161	/* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
 162	uint16_t is_rep[STATES];
 163
 164	/*
 165	 * If 0, distance of a repeated match is rep0.
 166	 * Otherwise check is_rep1.
 167	 */
 168	uint16_t is_rep0[STATES];
 169
 170	/*
 171	 * If 0, distance of a repeated match is rep1.
 172	 * Otherwise check is_rep2.
 173	 */
 174	uint16_t is_rep1[STATES];
 175
 176	/* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
 177	uint16_t is_rep2[STATES];
 178
 179	/*
 180	 * If 1, the repeated match has length of one byte. Otherwise
 181	 * the length is decoded from rep_len_decoder.
 182	 */
 183	uint16_t is_rep0_long[STATES][POS_STATES_MAX];
 184
 185	/*
 186	 * Probability tree for the highest two bits of the match
 187	 * distance. There is a separate probability tree for match
 188	 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
 189	 */
 190	uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
 191
 192	/*
 193	 * Probility trees for additional bits for match distance
 194	 * when the distance is in the range [4, 127].
 195	 */
 196	uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
 197
 198	/*
 199	 * Probability tree for the lowest four bits of a match
 200	 * distance that is equal to or greater than 128.
 201	 */
 202	uint16_t dist_align[ALIGN_SIZE];
 203
 204	/* Length of a normal match */
 205	struct lzma_len_dec match_len_dec;
 206
 207	/* Length of a repeated match */
 208	struct lzma_len_dec rep_len_dec;
 209
 210	/* Probabilities of literals */
 211	uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
 212};
 213
 214struct lzma2_dec {
 215	/* Position in xz_dec_lzma2_run(). */
 216	enum lzma2_seq {
 217		SEQ_CONTROL,
 218		SEQ_UNCOMPRESSED_1,
 219		SEQ_UNCOMPRESSED_2,
 220		SEQ_COMPRESSED_0,
 221		SEQ_COMPRESSED_1,
 222		SEQ_PROPERTIES,
 223		SEQ_LZMA_PREPARE,
 224		SEQ_LZMA_RUN,
 225		SEQ_COPY
 226	} sequence;
 227
 228	/* Next position after decoding the compressed size of the chunk. */
 229	enum lzma2_seq next_sequence;
 230
 231	/* Uncompressed size of LZMA chunk (2 MiB at maximum) */
 232	uint32_t uncompressed;
 233
 234	/*
 235	 * Compressed size of LZMA chunk or compressed/uncompressed
 236	 * size of uncompressed chunk (64 KiB at maximum)
 237	 */
 238	uint32_t compressed;
 239
 240	/*
 241	 * True if dictionary reset is needed. This is false before
 242	 * the first chunk (LZMA or uncompressed).
 243	 */
 244	bool need_dict_reset;
 245
 246	/*
 247	 * True if new LZMA properties are needed. This is false
 248	 * before the first LZMA chunk.
 249	 */
 250	bool need_props;
 251
 252#ifdef XZ_DEC_MICROLZMA
 253	bool pedantic_microlzma;
 254#endif
 255};
 256
 257struct xz_dec_lzma2 {
 258	/*
 259	 * The order below is important on x86 to reduce code size and
 260	 * it shouldn't hurt on other platforms. Everything up to and
 261	 * including lzma.pos_mask are in the first 128 bytes on x86-32,
 262	 * which allows using smaller instructions to access those
 263	 * variables. On x86-64, fewer variables fit into the first 128
 264	 * bytes, but this is still the best order without sacrificing
 265	 * the readability by splitting the structures.
 266	 */
 267	struct rc_dec rc;
 268	struct dictionary dict;
 269	struct lzma2_dec lzma2;
 270	struct lzma_dec lzma;
 271
 272	/*
 273	 * Temporary buffer which holds small number of input bytes between
 274	 * decoder calls. See lzma2_lzma() for details.
 275	 */
 276	struct {
 277		uint32_t size;
 278		uint8_t buf[3 * LZMA_IN_REQUIRED];
 279	} temp;
 280};
 281
 282/**************
 283 * Dictionary *
 284 **************/
 285
 286/*
 287 * Reset the dictionary state. When in single-call mode, set up the beginning
 288 * of the dictionary to point to the actual output buffer.
 289 */
 290static void dict_reset(struct dictionary *dict, struct xz_buf *b)
 291{
 292	if (DEC_IS_SINGLE(dict->mode)) {
 293		dict->buf = b->out + b->out_pos;
 294		dict->end = b->out_size - b->out_pos;
 295	}
 296
 297	dict->start = 0;
 298	dict->pos = 0;
 299	dict->limit = 0;
 300	dict->full = 0;
 301}
 302
 303/* Set dictionary write limit */
 304static void dict_limit(struct dictionary *dict, size_t out_max)
 305{
 306	if (dict->end - dict->pos <= out_max)
 307		dict->limit = dict->end;
 308	else
 309		dict->limit = dict->pos + out_max;
 310}
 311
 312/* Return true if at least one byte can be written into the dictionary. */
 313static inline bool dict_has_space(const struct dictionary *dict)
 314{
 315	return dict->pos < dict->limit;
 316}
 317
 318/*
 319 * Get a byte from the dictionary at the given distance. The distance is
 320 * assumed to valid, or as a special case, zero when the dictionary is
 321 * still empty. This special case is needed for single-call decoding to
 322 * avoid writing a '\0' to the end of the destination buffer.
 323 */
 324static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
 325{
 326	size_t offset = dict->pos - dist - 1;
 327
 328	if (dist >= dict->pos)
 329		offset += dict->end;
 330
 331	return dict->full > 0 ? dict->buf[offset] : 0;
 332}
 333
 334/*
 335 * Put one byte into the dictionary. It is assumed that there is space for it.
 336 */
 337static inline void dict_put(struct dictionary *dict, uint8_t byte)
 338{
 339	dict->buf[dict->pos++] = byte;
 340
 341	if (dict->full < dict->pos)
 342		dict->full = dict->pos;
 343}
 344
 345/*
 346 * Repeat given number of bytes from the given distance. If the distance is
 347 * invalid, false is returned. On success, true is returned and *len is
 348 * updated to indicate how many bytes were left to be repeated.
 349 */
 350static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
 351{
 352	size_t back;
 353	uint32_t left;
 354
 355	if (dist >= dict->full || dist >= dict->size)
 356		return false;
 357
 358	left = min_t(size_t, dict->limit - dict->pos, *len);
 359	*len -= left;
 360
 361	back = dict->pos - dist - 1;
 362	if (dist >= dict->pos)
 363		back += dict->end;
 364
 365	do {
 366		dict->buf[dict->pos++] = dict->buf[back++];
 367		if (back == dict->end)
 368			back = 0;
 369	} while (--left > 0);
 370
 371	if (dict->full < dict->pos)
 372		dict->full = dict->pos;
 373
 374	return true;
 375}
 376
 377/* Copy uncompressed data as is from input to dictionary and output buffers. */
 378static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
 379			      uint32_t *left)
 380{
 381	size_t copy_size;
 382
 383	while (*left > 0 && b->in_pos < b->in_size
 384			&& b->out_pos < b->out_size) {
 385		copy_size = min(b->in_size - b->in_pos,
 386				b->out_size - b->out_pos);
 387		if (copy_size > dict->end - dict->pos)
 388			copy_size = dict->end - dict->pos;
 389		if (copy_size > *left)
 390			copy_size = *left;
 391
 392		*left -= copy_size;
 393
 394		/*
 395		 * If doing in-place decompression in single-call mode and the
 396		 * uncompressed size of the file is larger than the caller
 397		 * thought (i.e. it is invalid input!), the buffers below may
 398		 * overlap and cause undefined behavior with memcpy().
 399		 * With valid inputs memcpy() would be fine here.
 400		 */
 401		memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
 402		dict->pos += copy_size;
 403
 404		if (dict->full < dict->pos)
 405			dict->full = dict->pos;
 406
 407		if (DEC_IS_MULTI(dict->mode)) {
 408			if (dict->pos == dict->end)
 409				dict->pos = 0;
 410
 411			/*
 412			 * Like above but for multi-call mode: use memmove()
 413			 * to avoid undefined behavior with invalid input.
 414			 */
 415			memmove(b->out + b->out_pos, b->in + b->in_pos,
 416					copy_size);
 417		}
 418
 419		dict->start = dict->pos;
 420
 421		b->out_pos += copy_size;
 422		b->in_pos += copy_size;
 423	}
 424}
 425
 426#ifdef XZ_DEC_MICROLZMA
 427#	define DICT_FLUSH_SUPPORTS_SKIPPING true
 428#else
 429#	define DICT_FLUSH_SUPPORTS_SKIPPING false
 430#endif
 431
 432/*
 433 * Flush pending data from dictionary to b->out. It is assumed that there is
 434 * enough space in b->out. This is guaranteed because caller uses dict_limit()
 435 * before decoding data into the dictionary.
 436 */
 437static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
 438{
 439	size_t copy_size = dict->pos - dict->start;
 440
 441	if (DEC_IS_MULTI(dict->mode)) {
 442		if (dict->pos == dict->end)
 443			dict->pos = 0;
 444
 445		/*
 446		 * These buffers cannot overlap even if doing in-place
 447		 * decompression because in multi-call mode dict->buf
 448		 * has been allocated by us in this file; it's not
 449		 * provided by the caller like in single-call mode.
 450		 *
 451		 * With MicroLZMA, b->out can be NULL to skip bytes that
 452		 * the caller doesn't need. This cannot be done with XZ
 453		 * because it would break BCJ filters.
 454		 */
 455		if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL)
 456			memcpy(b->out + b->out_pos, dict->buf + dict->start,
 457					copy_size);
 458	}
 459
 460	dict->start = dict->pos;
 461	b->out_pos += copy_size;
 462	return copy_size;
 463}
 464
 465/*****************
 466 * Range decoder *
 467 *****************/
 468
 469/* Reset the range decoder. */
 470static void rc_reset(struct rc_dec *rc)
 471{
 472	rc->range = (uint32_t)-1;
 473	rc->code = 0;
 474	rc->init_bytes_left = RC_INIT_BYTES;
 475}
 476
 477/*
 478 * Read the first five initial bytes into rc->code if they haven't been
 479 * read already. (Yes, the first byte gets completely ignored.)
 480 */
 481static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)
 482{
 483	while (rc->init_bytes_left > 0) {
 484		if (b->in_pos == b->in_size)
 485			return false;
 486
 487		rc->code = (rc->code << 8) + b->in[b->in_pos++];
 488		--rc->init_bytes_left;
 489	}
 490
 491	return true;
 492}
 493
 494/* Return true if there may not be enough input for the next decoding loop. */
 495static inline bool rc_limit_exceeded(const struct rc_dec *rc)
 496{
 497	return rc->in_pos > rc->in_limit;
 498}
 499
 500/*
 501 * Return true if it is possible (from point of view of range decoder) that
 502 * we have reached the end of the LZMA chunk.
 503 */
 504static inline bool rc_is_finished(const struct rc_dec *rc)
 505{
 506	return rc->code == 0;
 507}
 508
 509/* Read the next input byte if needed. */
 510static __always_inline void rc_normalize(struct rc_dec *rc)
 511{
 512	if (rc->range < RC_TOP_VALUE) {
 513		rc->range <<= RC_SHIFT_BITS;
 514		rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
 515	}
 516}
 517
 518/*
 519 * Decode one bit. In some versions, this function has been split in three
 520 * functions so that the compiler is supposed to be able to more easily avoid
 521 * an extra branch. In this particular version of the LZMA decoder, this
 522 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
 523 * on x86). Using a non-split version results in nicer looking code too.
 524 *
 525 * NOTE: This must return an int. Do not make it return a bool or the speed
 526 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
 527 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
 528 */
 529static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
 530{
 531	uint32_t bound;
 532	int bit;
 533
 534	rc_normalize(rc);
 535	bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
 536	if (rc->code < bound) {
 537		rc->range = bound;
 538		*prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
 539		bit = 0;
 540	} else {
 541		rc->range -= bound;
 542		rc->code -= bound;
 543		*prob -= *prob >> RC_MOVE_BITS;
 544		bit = 1;
 545	}
 546
 547	return bit;
 548}
 549
 550/* Decode a bittree starting from the most significant bit. */
 551static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
 552					   uint16_t *probs, uint32_t limit)
 553{
 554	uint32_t symbol = 1;
 555
 556	do {
 557		if (rc_bit(rc, &probs[symbol]))
 558			symbol = (symbol << 1) + 1;
 559		else
 560			symbol <<= 1;
 561	} while (symbol < limit);
 562
 563	return symbol;
 564}
 565
 566/* Decode a bittree starting from the least significant bit. */
 567static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
 568					       uint16_t *probs,
 569					       uint32_t *dest, uint32_t limit)
 570{
 571	uint32_t symbol = 1;
 572	uint32_t i = 0;
 573
 574	do {
 575		if (rc_bit(rc, &probs[symbol])) {
 576			symbol = (symbol << 1) + 1;
 577			*dest += 1 << i;
 578		} else {
 579			symbol <<= 1;
 580		}
 581	} while (++i < limit);
 582}
 583
 584/* Decode direct bits (fixed fifty-fifty probability) */
 585static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
 586{
 587	uint32_t mask;
 588
 589	do {
 590		rc_normalize(rc);
 591		rc->range >>= 1;
 592		rc->code -= rc->range;
 593		mask = (uint32_t)0 - (rc->code >> 31);
 594		rc->code += rc->range & mask;
 595		*dest = (*dest << 1) + (mask + 1);
 596	} while (--limit > 0);
 597}
 598
 599/********
 600 * LZMA *
 601 ********/
 602
 603/* Get pointer to literal coder probability array. */
 604static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
 605{
 606	uint32_t prev_byte = dict_get(&s->dict, 0);
 607	uint32_t low = prev_byte >> (8 - s->lzma.lc);
 608	uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
 609	return s->lzma.literal[low + high];
 610}
 611
 612/* Decode a literal (one 8-bit byte) */
 613static void lzma_literal(struct xz_dec_lzma2 *s)
 614{
 615	uint16_t *probs;
 616	uint32_t symbol;
 617	uint32_t match_byte;
 618	uint32_t match_bit;
 619	uint32_t offset;
 620	uint32_t i;
 621
 622	probs = lzma_literal_probs(s);
 623
 624	if (lzma_state_is_literal(s->lzma.state)) {
 625		symbol = rc_bittree(&s->rc, probs, 0x100);
 626	} else {
 627		symbol = 1;
 628		match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
 629		offset = 0x100;
 630
 631		do {
 632			match_bit = match_byte & offset;
 633			match_byte <<= 1;
 634			i = offset + match_bit + symbol;
 635
 636			if (rc_bit(&s->rc, &probs[i])) {
 637				symbol = (symbol << 1) + 1;
 638				offset &= match_bit;
 639			} else {
 640				symbol <<= 1;
 641				offset &= ~match_bit;
 642			}
 643		} while (symbol < 0x100);
 644	}
 645
 646	dict_put(&s->dict, (uint8_t)symbol);
 647	lzma_state_literal(&s->lzma.state);
 648}
 649
 650/* Decode the length of the match into s->lzma.len. */
 651static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
 652		     uint32_t pos_state)
 653{
 654	uint16_t *probs;
 655	uint32_t limit;
 656
 657	if (!rc_bit(&s->rc, &l->choice)) {
 658		probs = l->low[pos_state];
 659		limit = LEN_LOW_SYMBOLS;
 660		s->lzma.len = MATCH_LEN_MIN;
 661	} else {
 662		if (!rc_bit(&s->rc, &l->choice2)) {
 663			probs = l->mid[pos_state];
 664			limit = LEN_MID_SYMBOLS;
 665			s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
 666		} else {
 667			probs = l->high;
 668			limit = LEN_HIGH_SYMBOLS;
 669			s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
 670					+ LEN_MID_SYMBOLS;
 671		}
 672	}
 673
 674	s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
 675}
 676
 677/* Decode a match. The distance will be stored in s->lzma.rep0. */
 678static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
 679{
 680	uint16_t *probs;
 681	uint32_t dist_slot;
 682	uint32_t limit;
 683
 684	lzma_state_match(&s->lzma.state);
 685
 686	s->lzma.rep3 = s->lzma.rep2;
 687	s->lzma.rep2 = s->lzma.rep1;
 688	s->lzma.rep1 = s->lzma.rep0;
 689
 690	lzma_len(s, &s->lzma.match_len_dec, pos_state);
 691
 692	probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
 693	dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
 694
 695	if (dist_slot < DIST_MODEL_START) {
 696		s->lzma.rep0 = dist_slot;
 697	} else {
 698		limit = (dist_slot >> 1) - 1;
 699		s->lzma.rep0 = 2 + (dist_slot & 1);
 700
 701		if (dist_slot < DIST_MODEL_END) {
 702			s->lzma.rep0 <<= limit;
 703			probs = s->lzma.dist_special + s->lzma.rep0
 704					- dist_slot - 1;
 705			rc_bittree_reverse(&s->rc, probs,
 706					&s->lzma.rep0, limit);
 707		} else {
 708			rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
 709			s->lzma.rep0 <<= ALIGN_BITS;
 710			rc_bittree_reverse(&s->rc, s->lzma.dist_align,
 711					&s->lzma.rep0, ALIGN_BITS);
 712		}
 713	}
 714}
 715
 716/*
 717 * Decode a repeated match. The distance is one of the four most recently
 718 * seen matches. The distance will be stored in s->lzma.rep0.
 719 */
 720static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
 721{
 722	uint32_t tmp;
 723
 724	if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
 725		if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
 726				s->lzma.state][pos_state])) {
 727			lzma_state_short_rep(&s->lzma.state);
 728			s->lzma.len = 1;
 729			return;
 730		}
 731	} else {
 732		if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
 733			tmp = s->lzma.rep1;
 734		} else {
 735			if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
 736				tmp = s->lzma.rep2;
 737			} else {
 738				tmp = s->lzma.rep3;
 739				s->lzma.rep3 = s->lzma.rep2;
 740			}
 741
 742			s->lzma.rep2 = s->lzma.rep1;
 743		}
 744
 745		s->lzma.rep1 = s->lzma.rep0;
 746		s->lzma.rep0 = tmp;
 747	}
 748
 749	lzma_state_long_rep(&s->lzma.state);
 750	lzma_len(s, &s->lzma.rep_len_dec, pos_state);
 751}
 752
 753/* LZMA decoder core */
 754static bool lzma_main(struct xz_dec_lzma2 *s)
 755{
 756	uint32_t pos_state;
 757
 758	/*
 759	 * If the dictionary was reached during the previous call, try to
 760	 * finish the possibly pending repeat in the dictionary.
 761	 */
 762	if (dict_has_space(&s->dict) && s->lzma.len > 0)
 763		dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
 764
 765	/*
 766	 * Decode more LZMA symbols. One iteration may consume up to
 767	 * LZMA_IN_REQUIRED - 1 bytes.
 768	 */
 769	while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
 770		pos_state = s->dict.pos & s->lzma.pos_mask;
 771
 772		if (!rc_bit(&s->rc, &s->lzma.is_match[
 773				s->lzma.state][pos_state])) {
 774			lzma_literal(s);
 775		} else {
 776			if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
 777				lzma_rep_match(s, pos_state);
 778			else
 779				lzma_match(s, pos_state);
 780
 781			if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
 782				return false;
 783		}
 784	}
 785
 786	/*
 787	 * Having the range decoder always normalized when we are outside
 788	 * this function makes it easier to correctly handle end of the chunk.
 789	 */
 790	rc_normalize(&s->rc);
 791
 792	return true;
 793}
 794
 795/*
 796 * Reset the LZMA decoder and range decoder state. Dictionary is not reset
 797 * here, because LZMA state may be reset without resetting the dictionary.
 798 */
 799static void lzma_reset(struct xz_dec_lzma2 *s)
 800{
 801	uint16_t *probs;
 802	size_t i;
 803
 804	s->lzma.state = STATE_LIT_LIT;
 805	s->lzma.rep0 = 0;
 806	s->lzma.rep1 = 0;
 807	s->lzma.rep2 = 0;
 808	s->lzma.rep3 = 0;
 809	s->lzma.len = 0;
 810
 811	/*
 812	 * All probabilities are initialized to the same value. This hack
 813	 * makes the code smaller by avoiding a separate loop for each
 814	 * probability array.
 815	 *
 816	 * This could be optimized so that only that part of literal
 817	 * probabilities that are actually required. In the common case
 818	 * we would write 12 KiB less.
 819	 */
 820	probs = s->lzma.is_match[0];
 821	for (i = 0; i < PROBS_TOTAL; ++i)
 822		probs[i] = RC_BIT_MODEL_TOTAL / 2;
 823
 824	rc_reset(&s->rc);
 825}
 826
 827/*
 828 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
 829 * from the decoded lp and pb values. On success, the LZMA decoder state is
 830 * reset and true is returned.
 831 */
 832static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
 833{
 834	if (props > (4 * 5 + 4) * 9 + 8)
 835		return false;
 836
 837	s->lzma.pos_mask = 0;
 838	while (props >= 9 * 5) {
 839		props -= 9 * 5;
 840		++s->lzma.pos_mask;
 841	}
 842
 843	s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
 844
 845	s->lzma.literal_pos_mask = 0;
 846	while (props >= 9) {
 847		props -= 9;
 848		++s->lzma.literal_pos_mask;
 849	}
 850
 851	s->lzma.lc = props;
 852
 853	if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
 854		return false;
 855
 856	s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
 857
 858	lzma_reset(s);
 859
 860	return true;
 861}
 862
 863/*********
 864 * LZMA2 *
 865 *********/
 866
 867/*
 868 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
 869 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
 870 * wrapper function takes care of making the LZMA decoder's assumption safe.
 871 *
 872 * As long as there is plenty of input left to be decoded in the current LZMA
 873 * chunk, we decode directly from the caller-supplied input buffer until
 874 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
 875 * s->temp.buf, which (hopefully) gets filled on the next call to this
 876 * function. We decode a few bytes from the temporary buffer so that we can
 877 * continue decoding from the caller-supplied input buffer again.
 878 */
 879static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
 880{
 881	size_t in_avail;
 882	uint32_t tmp;
 883
 884	in_avail = b->in_size - b->in_pos;
 885	if (s->temp.size > 0 || s->lzma2.compressed == 0) {
 886		tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
 887		if (tmp > s->lzma2.compressed - s->temp.size)
 888			tmp = s->lzma2.compressed - s->temp.size;
 889		if (tmp > in_avail)
 890			tmp = in_avail;
 891
 892		memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
 893
 894		if (s->temp.size + tmp == s->lzma2.compressed) {
 895			memzero(s->temp.buf + s->temp.size + tmp,
 896					sizeof(s->temp.buf)
 897						- s->temp.size - tmp);
 898			s->rc.in_limit = s->temp.size + tmp;
 899		} else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
 900			s->temp.size += tmp;
 901			b->in_pos += tmp;
 902			return true;
 903		} else {
 904			s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
 905		}
 906
 907		s->rc.in = s->temp.buf;
 908		s->rc.in_pos = 0;
 909
 910		if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
 911			return false;
 912
 913		s->lzma2.compressed -= s->rc.in_pos;
 914
 915		if (s->rc.in_pos < s->temp.size) {
 916			s->temp.size -= s->rc.in_pos;
 917			memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
 918					s->temp.size);
 919			return true;
 920		}
 921
 922		b->in_pos += s->rc.in_pos - s->temp.size;
 923		s->temp.size = 0;
 924	}
 925
 926	in_avail = b->in_size - b->in_pos;
 927	if (in_avail >= LZMA_IN_REQUIRED) {
 928		s->rc.in = b->in;
 929		s->rc.in_pos = b->in_pos;
 930
 931		if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
 932			s->rc.in_limit = b->in_pos + s->lzma2.compressed;
 933		else
 934			s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
 935
 936		if (!lzma_main(s))
 937			return false;
 938
 939		in_avail = s->rc.in_pos - b->in_pos;
 940		if (in_avail > s->lzma2.compressed)
 941			return false;
 942
 943		s->lzma2.compressed -= in_avail;
 944		b->in_pos = s->rc.in_pos;
 945	}
 946
 947	in_avail = b->in_size - b->in_pos;
 948	if (in_avail < LZMA_IN_REQUIRED) {
 949		if (in_avail > s->lzma2.compressed)
 950			in_avail = s->lzma2.compressed;
 951
 952		memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
 953		s->temp.size = in_avail;
 954		b->in_pos += in_avail;
 955	}
 956
 957	return true;
 958}
 959
 960/*
 961 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
 962 * decoding or copying of uncompressed chunks to other functions.
 963 */
 964XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 965				       struct xz_buf *b)
 966{
 967	uint32_t tmp;
 968
 969	while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
 970		switch (s->lzma2.sequence) {
 971		case SEQ_CONTROL:
 972			/*
 973			 * LZMA2 control byte
 974			 *
 975			 * Exact values:
 976			 *   0x00   End marker
 977			 *   0x01   Dictionary reset followed by
 978			 *          an uncompressed chunk
 979			 *   0x02   Uncompressed chunk (no dictionary reset)
 980			 *
 981			 * Highest three bits (s->control & 0xE0):
 982			 *   0xE0   Dictionary reset, new properties and state
 983			 *          reset, followed by LZMA compressed chunk
 984			 *   0xC0   New properties and state reset, followed
 985			 *          by LZMA compressed chunk (no dictionary
 986			 *          reset)
 987			 *   0xA0   State reset using old properties,
 988			 *          followed by LZMA compressed chunk (no
 989			 *          dictionary reset)
 990			 *   0x80   LZMA chunk (no dictionary or state reset)
 991			 *
 992			 * For LZMA compressed chunks, the lowest five bits
 993			 * (s->control & 1F) are the highest bits of the
 994			 * uncompressed size (bits 16-20).
 995			 *
 996			 * A new LZMA2 stream must begin with a dictionary
 997			 * reset. The first LZMA chunk must set new
 998			 * properties and reset the LZMA state.
 999			 *
1000			 * Values that don't match anything described above
1001			 * are invalid and we return XZ_DATA_ERROR.
1002			 */
1003			tmp = b->in[b->in_pos++];
1004
1005			if (tmp == 0x00)
1006				return XZ_STREAM_END;
1007
1008			if (tmp >= 0xE0 || tmp == 0x01) {
1009				s->lzma2.need_props = true;
1010				s->lzma2.need_dict_reset = false;
1011				dict_reset(&s->dict, b);
1012			} else if (s->lzma2.need_dict_reset) {
1013				return XZ_DATA_ERROR;
1014			}
1015
1016			if (tmp >= 0x80) {
1017				s->lzma2.uncompressed = (tmp & 0x1F) << 16;
1018				s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
1019
1020				if (tmp >= 0xC0) {
1021					/*
1022					 * When there are new properties,
1023					 * state reset is done at
1024					 * SEQ_PROPERTIES.
1025					 */
1026					s->lzma2.need_props = false;
1027					s->lzma2.next_sequence
1028							= SEQ_PROPERTIES;
1029
1030				} else if (s->lzma2.need_props) {
1031					return XZ_DATA_ERROR;
1032
1033				} else {
1034					s->lzma2.next_sequence
1035							= SEQ_LZMA_PREPARE;
1036					if (tmp >= 0xA0)
1037						lzma_reset(s);
1038				}
1039			} else {
1040				if (tmp > 0x02)
1041					return XZ_DATA_ERROR;
1042
1043				s->lzma2.sequence = SEQ_COMPRESSED_0;
1044				s->lzma2.next_sequence = SEQ_COPY;
1045			}
1046
1047			break;
1048
1049		case SEQ_UNCOMPRESSED_1:
1050			s->lzma2.uncompressed
1051					+= (uint32_t)b->in[b->in_pos++] << 8;
1052			s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1053			break;
1054
1055		case SEQ_UNCOMPRESSED_2:
1056			s->lzma2.uncompressed
1057					+= (uint32_t)b->in[b->in_pos++] + 1;
1058			s->lzma2.sequence = SEQ_COMPRESSED_0;
1059			break;
1060
1061		case SEQ_COMPRESSED_0:
1062			s->lzma2.compressed
1063					= (uint32_t)b->in[b->in_pos++] << 8;
1064			s->lzma2.sequence = SEQ_COMPRESSED_1;
1065			break;
1066
1067		case SEQ_COMPRESSED_1:
1068			s->lzma2.compressed
1069					+= (uint32_t)b->in[b->in_pos++] + 1;
1070			s->lzma2.sequence = s->lzma2.next_sequence;
1071			break;
1072
1073		case SEQ_PROPERTIES:
1074			if (!lzma_props(s, b->in[b->in_pos++]))
1075				return XZ_DATA_ERROR;
1076
1077			s->lzma2.sequence = SEQ_LZMA_PREPARE;
1078
1079			fallthrough;
1080
1081		case SEQ_LZMA_PREPARE:
1082			if (s->lzma2.compressed < RC_INIT_BYTES)
1083				return XZ_DATA_ERROR;
1084
1085			if (!rc_read_init(&s->rc, b))
1086				return XZ_OK;
1087
1088			s->lzma2.compressed -= RC_INIT_BYTES;
1089			s->lzma2.sequence = SEQ_LZMA_RUN;
1090
1091			fallthrough;
1092
1093		case SEQ_LZMA_RUN:
1094			/*
1095			 * Set dictionary limit to indicate how much we want
1096			 * to be encoded at maximum. Decode new data into the
1097			 * dictionary. Flush the new data from dictionary to
1098			 * b->out. Check if we finished decoding this chunk.
1099			 * In case the dictionary got full but we didn't fill
1100			 * the output buffer yet, we may run this loop
1101			 * multiple times without changing s->lzma2.sequence.
1102			 */
1103			dict_limit(&s->dict, min_t(size_t,
1104					b->out_size - b->out_pos,
1105					s->lzma2.uncompressed));
1106			if (!lzma2_lzma(s, b))
1107				return XZ_DATA_ERROR;
1108
1109			s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1110
1111			if (s->lzma2.uncompressed == 0) {
1112				if (s->lzma2.compressed > 0 || s->lzma.len > 0
1113						|| !rc_is_finished(&s->rc))
1114					return XZ_DATA_ERROR;
1115
1116				rc_reset(&s->rc);
1117				s->lzma2.sequence = SEQ_CONTROL;
1118
1119			} else if (b->out_pos == b->out_size
1120					|| (b->in_pos == b->in_size
1121						&& s->temp.size
1122						< s->lzma2.compressed)) {
1123				return XZ_OK;
1124			}
1125
1126			break;
1127
1128		case SEQ_COPY:
1129			dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1130			if (s->lzma2.compressed > 0)
1131				return XZ_OK;
1132
1133			s->lzma2.sequence = SEQ_CONTROL;
1134			break;
1135		}
1136	}
1137
1138	return XZ_OK;
1139}
1140
1141XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
1142						   uint32_t dict_max)
1143{
1144	struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
1145	if (s == NULL)
1146		return NULL;
1147
1148	s->dict.mode = mode;
1149	s->dict.size_max = dict_max;
1150
1151	if (DEC_IS_PREALLOC(mode)) {
1152		s->dict.buf = vmalloc(dict_max);
1153		if (s->dict.buf == NULL) {
1154			kfree(s);
1155			return NULL;
1156		}
1157	} else if (DEC_IS_DYNALLOC(mode)) {
1158		s->dict.buf = NULL;
1159		s->dict.allocated = 0;
1160	}
1161
1162	return s;
1163}
1164
1165XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
1166{
1167	/* This limits dictionary size to 3 GiB to keep parsing simpler. */
1168	if (props > 39)
1169		return XZ_OPTIONS_ERROR;
1170
1171	s->dict.size = 2 + (props & 1);
1172	s->dict.size <<= (props >> 1) + 11;
1173
1174	if (DEC_IS_MULTI(s->dict.mode)) {
1175		if (s->dict.size > s->dict.size_max)
1176			return XZ_MEMLIMIT_ERROR;
1177
1178		s->dict.end = s->dict.size;
1179
1180		if (DEC_IS_DYNALLOC(s->dict.mode)) {
1181			if (s->dict.allocated < s->dict.size) {
1182				s->dict.allocated = s->dict.size;
1183				vfree(s->dict.buf);
1184				s->dict.buf = vmalloc(s->dict.size);
1185				if (s->dict.buf == NULL) {
1186					s->dict.allocated = 0;
1187					return XZ_MEM_ERROR;
1188				}
1189			}
1190		}
1191	}
1192
 
 
1193	s->lzma2.sequence = SEQ_CONTROL;
1194	s->lzma2.need_dict_reset = true;
1195
1196	s->temp.size = 0;
1197
1198	return XZ_OK;
1199}
1200
1201XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1202{
1203	if (DEC_IS_MULTI(s->dict.mode))
1204		vfree(s->dict.buf);
1205
1206	kfree(s);
1207}
1208
1209#ifdef XZ_DEC_MICROLZMA
1210/* This is a wrapper struct to have a nice struct name in the public API. */
1211struct xz_dec_microlzma {
1212	struct xz_dec_lzma2 s;
1213};
1214
1215enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
1216				 struct xz_buf *b)
1217{
1218	struct xz_dec_lzma2 *s = &s_ptr->s;
1219
1220	/*
1221	 * sequence is SEQ_PROPERTIES before the first input byte,
1222	 * SEQ_LZMA_PREPARE until a total of five bytes have been read,
1223	 * and SEQ_LZMA_RUN for the rest of the input stream.
1224	 */
1225	if (s->lzma2.sequence != SEQ_LZMA_RUN) {
1226		if (s->lzma2.sequence == SEQ_PROPERTIES) {
1227			/* One byte is needed for the props. */
1228			if (b->in_pos >= b->in_size)
1229				return XZ_OK;
1230
1231			/*
1232			 * Don't increment b->in_pos here. The same byte is
1233			 * also passed to rc_read_init() which will ignore it.
1234			 */
1235			if (!lzma_props(s, ~b->in[b->in_pos]))
1236				return XZ_DATA_ERROR;
1237
1238			s->lzma2.sequence = SEQ_LZMA_PREPARE;
1239		}
1240
1241		/*
1242		 * xz_dec_microlzma_reset() doesn't validate the compressed
1243		 * size so we do it here. We have to limit the maximum size
1244		 * to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice
1245		 * round number and much more than users of this code should
1246		 * ever need.
1247		 */
1248		if (s->lzma2.compressed < RC_INIT_BYTES
1249				|| s->lzma2.compressed > (3U << 30))
1250			return XZ_DATA_ERROR;
1251
1252		if (!rc_read_init(&s->rc, b))
1253			return XZ_OK;
1254
1255		s->lzma2.compressed -= RC_INIT_BYTES;
1256		s->lzma2.sequence = SEQ_LZMA_RUN;
1257
1258		dict_reset(&s->dict, b);
1259	}
1260
1261	/* This is to allow increasing b->out_size between calls. */
1262	if (DEC_IS_SINGLE(s->dict.mode))
1263		s->dict.end = b->out_size - b->out_pos;
1264
1265	while (true) {
1266		dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,
1267					   s->lzma2.uncompressed));
1268
1269		if (!lzma2_lzma(s, b))
1270			return XZ_DATA_ERROR;
1271
1272		s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1273
1274		if (s->lzma2.uncompressed == 0) {
1275			if (s->lzma2.pedantic_microlzma) {
1276				if (s->lzma2.compressed > 0 || s->lzma.len > 0
1277						|| !rc_is_finished(&s->rc))
1278					return XZ_DATA_ERROR;
1279			}
1280
1281			return XZ_STREAM_END;
1282		}
1283
1284		if (b->out_pos == b->out_size)
1285			return XZ_OK;
1286
1287		if (b->in_pos == b->in_size
1288				&& s->temp.size < s->lzma2.compressed)
1289			return XZ_OK;
1290	}
1291}
1292
1293struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
1294						uint32_t dict_size)
1295{
1296	struct xz_dec_microlzma *s;
1297
1298	/* Restrict dict_size to the same range as in the LZMA2 code. */
1299	if (dict_size < 4096 || dict_size > (3U << 30))
1300		return NULL;
1301
1302	s = kmalloc(sizeof(*s), GFP_KERNEL);
1303	if (s == NULL)
1304		return NULL;
1305
1306	s->s.dict.mode = mode;
1307	s->s.dict.size = dict_size;
1308
1309	if (DEC_IS_MULTI(mode)) {
1310		s->s.dict.end = dict_size;
1311
1312		s->s.dict.buf = vmalloc(dict_size);
1313		if (s->s.dict.buf == NULL) {
1314			kfree(s);
1315			return NULL;
1316		}
1317	}
1318
1319	return s;
1320}
1321
1322void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
1323			    uint32_t uncomp_size, int uncomp_size_is_exact)
1324{
1325	/*
1326	 * comp_size is validated in xz_dec_microlzma_run().
1327	 * uncomp_size can safely be anything.
1328	 */
1329	s->s.lzma2.compressed = comp_size;
1330	s->s.lzma2.uncompressed = uncomp_size;
1331	s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;
1332
1333	s->s.lzma2.sequence = SEQ_PROPERTIES;
1334	s->s.temp.size = 0;
1335}
1336
1337void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
1338{
1339	if (DEC_IS_MULTI(s->s.dict.mode))
1340		vfree(s->s.dict.buf);
1341
1342	kfree(s);
1343}
1344#endif