Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
  1/*
  2 * Huffman decoder, part of New Generation Entropy library
  3 * Copyright (C) 2013-2016, Yann Collet.
  4 *
  5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions are
  9 * met:
 10 *
 11 *   * Redistributions of source code must retain the above copyright
 12 * notice, this list of conditions and the following disclaimer.
 13 *   * Redistributions in binary form must reproduce the above
 14 * copyright notice, this list of conditions and the following disclaimer
 15 * in the documentation and/or other materials provided with the
 16 * distribution.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 *
 30 * This program is free software; you can redistribute it and/or modify it under
 31 * the terms of the GNU General Public License version 2 as published by the
 32 * Free Software Foundation. This program is dual-licensed; you may select
 33 * either version 2 of the GNU General Public License ("GPL") or BSD license
 34 * ("BSD").
 35 *
 36 * You can contact the author at :
 37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 38 */
 39
 40/* **************************************************************
 41*  Compiler specifics
 42****************************************************************/
 43#define FORCE_INLINE static __always_inline
 44
 45/* **************************************************************
 46*  Dependencies
 47****************************************************************/
 48#include "bitstream.h" /* BIT_* */
 49#include "fse.h"       /* header compression */
 50#include "huf.h"
 51#include <linux/compiler.h>
 52#include <linux/kernel.h>
 53#include <linux/string.h> /* memcpy, memset */
 54
 55/* **************************************************************
 56*  Error Management
 57****************************************************************/
 58#define HUF_STATIC_ASSERT(c)                                   \
 59	{                                                      \
 60		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
 61	} /* use only *after* variable declarations */
 62
 63/*-***************************/
 64/*  generic DTableDesc       */
 65/*-***************************/
 66
 67typedef struct {
 68	BYTE maxTableLog;
 69	BYTE tableType;
 70	BYTE tableLog;
 71	BYTE reserved;
 72} DTableDesc;
 73
 74static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
 75{
 76	DTableDesc dtd;
 77	memcpy(&dtd, table, sizeof(dtd));
 78	return dtd;
 79}
 80
 81/*-***************************/
 82/*  single-symbol decoding   */
 83/*-***************************/
 84
 85typedef struct {
 86	BYTE byte;
 87	BYTE nbBits;
 88} HUF_DEltX2; /* single-symbol decoding */
 89
 90size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
 91{
 92	U32 tableLog = 0;
 93	U32 nbSymbols = 0;
 94	size_t iSize;
 95	void *const dtPtr = DTable + 1;
 96	HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
 97
 98	U32 *rankVal;
 99	BYTE *huffWeight;
100	size_t spaceUsed32 = 0;
101
102	rankVal = (U32 *)workspace + spaceUsed32;
103	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
104	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
105	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
106
107	if ((spaceUsed32 << 2) > workspaceSize)
108		return ERROR(tableLog_tooLarge);
109	workspace = (U32 *)workspace + spaceUsed32;
110	workspaceSize -= (spaceUsed32 << 2);
111
112	HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
113	/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
114
115	iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
116	if (HUF_isError(iSize))
117		return iSize;
118
119	/* Table header */
120	{
121		DTableDesc dtd = HUF_getDTableDesc(DTable);
122		if (tableLog > (U32)(dtd.maxTableLog + 1))
123			return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
124		dtd.tableType = 0;
125		dtd.tableLog = (BYTE)tableLog;
126		memcpy(DTable, &dtd, sizeof(dtd));
127	}
128
129	/* Calculate starting value for each rank */
130	{
131		U32 n, nextRankStart = 0;
132		for (n = 1; n < tableLog + 1; n++) {
133			U32 const curr = nextRankStart;
134			nextRankStart += (rankVal[n] << (n - 1));
135			rankVal[n] = curr;
136		}
137	}
138
139	/* fill DTable */
140	{
141		U32 n;
142		for (n = 0; n < nbSymbols; n++) {
143			U32 const w = huffWeight[n];
144			U32 const length = (1 << w) >> 1;
145			U32 u;
146			HUF_DEltX2 D;
147			D.byte = (BYTE)n;
148			D.nbBits = (BYTE)(tableLog + 1 - w);
149			for (u = rankVal[w]; u < rankVal[w] + length; u++)
150				dt[u] = D;
151			rankVal[w] += length;
152		}
153	}
154
155	return iSize;
156}
157
158static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
159{
160	size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
161	BYTE const c = dt[val].byte;
162	BIT_skipBits(Dstream, dt[val].nbBits);
163	return c;
164}
165
166#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
167
168#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)         \
169	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
170	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
171
172#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
173	if (ZSTD_64bits())                     \
174	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
175
176FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
177{
178	BYTE *const pStart = p;
179
180	/* up to 4 symbols at a time */
181	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
182		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
183		HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
184		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
185		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
186	}
187
188	/* closer to the end */
189	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
190		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
191
192	/* no more data to retrieve from bitstream, hence no need to reload */
193	while (p < pEnd)
194		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
195
196	return pEnd - pStart;
197}
198
199static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
200{
201	BYTE *op = (BYTE *)dst;
202	BYTE *const oend = op + dstSize;
203	const void *dtPtr = DTable + 1;
204	const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
205	BIT_DStream_t bitD;
206	DTableDesc const dtd = HUF_getDTableDesc(DTable);
207	U32 const dtLog = dtd.tableLog;
208
209	{
210		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
211		if (HUF_isError(errorCode))
212			return errorCode;
213	}
214
215	HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
216
217	/* check */
218	if (!BIT_endOfDStream(&bitD))
219		return ERROR(corruption_detected);
220
221	return dstSize;
222}
223
224size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
225{
226	DTableDesc dtd = HUF_getDTableDesc(DTable);
227	if (dtd.tableType != 0)
228		return ERROR(GENERIC);
229	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
230}
231
232size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
233{
234	const BYTE *ip = (const BYTE *)cSrc;
235
236	size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
237	if (HUF_isError(hSize))
238		return hSize;
239	if (hSize >= cSrcSize)
240		return ERROR(srcSize_wrong);
241	ip += hSize;
242	cSrcSize -= hSize;
243
244	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
245}
246
247static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
248{
249	/* Check */
250	if (cSrcSize < 10)
251		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
252
253	{
254		const BYTE *const istart = (const BYTE *)cSrc;
255		BYTE *const ostart = (BYTE *)dst;
256		BYTE *const oend = ostart + dstSize;
257		const void *const dtPtr = DTable + 1;
258		const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
259
260		/* Init */
261		BIT_DStream_t bitD1;
262		BIT_DStream_t bitD2;
263		BIT_DStream_t bitD3;
264		BIT_DStream_t bitD4;
265		size_t const length1 = ZSTD_readLE16(istart);
266		size_t const length2 = ZSTD_readLE16(istart + 2);
267		size_t const length3 = ZSTD_readLE16(istart + 4);
268		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
269		const BYTE *const istart1 = istart + 6; /* jumpTable */
270		const BYTE *const istart2 = istart1 + length1;
271		const BYTE *const istart3 = istart2 + length2;
272		const BYTE *const istart4 = istart3 + length3;
273		const size_t segmentSize = (dstSize + 3) / 4;
274		BYTE *const opStart2 = ostart + segmentSize;
275		BYTE *const opStart3 = opStart2 + segmentSize;
276		BYTE *const opStart4 = opStart3 + segmentSize;
277		BYTE *op1 = ostart;
278		BYTE *op2 = opStart2;
279		BYTE *op3 = opStart3;
280		BYTE *op4 = opStart4;
281		U32 endSignal;
282		DTableDesc const dtd = HUF_getDTableDesc(DTable);
283		U32 const dtLog = dtd.tableLog;
284
285		if (length4 > cSrcSize)
286			return ERROR(corruption_detected); /* overflow */
287		{
288			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
289			if (HUF_isError(errorCode))
290				return errorCode;
291		}
292		{
293			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
294			if (HUF_isError(errorCode))
295				return errorCode;
296		}
297		{
298			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
299			if (HUF_isError(errorCode))
300				return errorCode;
301		}
302		{
303			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
304			if (HUF_isError(errorCode))
305				return errorCode;
306		}
307
308		/* 16-32 symbols per loop (4-8 symbols per stream) */
309		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
310		for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
311			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
312			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
313			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
314			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
315			HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
316			HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
317			HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
318			HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
319			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
320			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
321			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
322			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
323			HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
324			HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
325			HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
326			HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
327			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
328		}
329
330		/* check corruption */
331		if (op1 > opStart2)
332			return ERROR(corruption_detected);
333		if (op2 > opStart3)
334			return ERROR(corruption_detected);
335		if (op3 > opStart4)
336			return ERROR(corruption_detected);
337		/* note : op4 supposed already verified within main loop */
338
339		/* finish bitStreams one by one */
340		HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
341		HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
342		HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
343		HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
344
345		/* check */
346		endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
347		if (!endSignal)
348			return ERROR(corruption_detected);
349
350		/* decoded size */
351		return dstSize;
352	}
353}
354
355size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
356{
357	DTableDesc dtd = HUF_getDTableDesc(DTable);
358	if (dtd.tableType != 0)
359		return ERROR(GENERIC);
360	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
361}
362
363size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
364{
365	const BYTE *ip = (const BYTE *)cSrc;
366
367	size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
368	if (HUF_isError(hSize))
369		return hSize;
370	if (hSize >= cSrcSize)
371		return ERROR(srcSize_wrong);
372	ip += hSize;
373	cSrcSize -= hSize;
374
375	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
376}
377
378/* *************************/
379/* double-symbols decoding */
380/* *************************/
381typedef struct {
382	U16 sequence;
383	BYTE nbBits;
384	BYTE length;
385} HUF_DEltX4; /* double-symbols decoding */
386
387typedef struct {
388	BYTE symbol;
389	BYTE weight;
390} sortedSymbol_t;
391
392/* HUF_fillDTableX4Level2() :
393 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
394static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
395				   const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
396{
397	HUF_DEltX4 DElt;
398	U32 rankVal[HUF_TABLELOG_MAX + 1];
399
400	/* get pre-calculated rankVal */
401	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
402
403	/* fill skipped values */
404	if (minWeight > 1) {
405		U32 i, skipSize = rankVal[minWeight];
406		ZSTD_writeLE16(&(DElt.sequence), baseSeq);
407		DElt.nbBits = (BYTE)(consumed);
408		DElt.length = 1;
409		for (i = 0; i < skipSize; i++)
410			DTable[i] = DElt;
411	}
412
413	/* fill DTable */
414	{
415		U32 s;
416		for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
417			const U32 symbol = sortedSymbols[s].symbol;
418			const U32 weight = sortedSymbols[s].weight;
419			const U32 nbBits = nbBitsBaseline - weight;
420			const U32 length = 1 << (sizeLog - nbBits);
421			const U32 start = rankVal[weight];
422			U32 i = start;
423			const U32 end = start + length;
424
425			ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
426			DElt.nbBits = (BYTE)(nbBits + consumed);
427			DElt.length = 2;
428			do {
429				DTable[i++] = DElt;
430			} while (i < end); /* since length >= 1 */
431
432			rankVal[weight] += length;
433		}
434	}
435}
436
437typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
438typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
439
440static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
441			     rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
442{
443	U32 rankVal[HUF_TABLELOG_MAX + 1];
444	const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
445	const U32 minBits = nbBitsBaseline - maxWeight;
446	U32 s;
447
448	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
449
450	/* fill DTable */
451	for (s = 0; s < sortedListSize; s++) {
452		const U16 symbol = sortedList[s].symbol;
453		const U32 weight = sortedList[s].weight;
454		const U32 nbBits = nbBitsBaseline - weight;
455		const U32 start = rankVal[weight];
456		const U32 length = 1 << (targetLog - nbBits);
457
458		if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
459			U32 sortedRank;
460			int minWeight = nbBits + scaleLog;
461			if (minWeight < 1)
462				minWeight = 1;
463			sortedRank = rankStart[minWeight];
464			HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
465					       sortedListSize - sortedRank, nbBitsBaseline, symbol);
466		} else {
467			HUF_DEltX4 DElt;
468			ZSTD_writeLE16(&(DElt.sequence), symbol);
469			DElt.nbBits = (BYTE)(nbBits);
470			DElt.length = 1;
471			{
472				U32 const end = start + length;
473				U32 u;
474				for (u = start; u < end; u++)
475					DTable[u] = DElt;
476			}
477		}
478		rankVal[weight] += length;
479	}
480}
481
482size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
483{
484	U32 tableLog, maxW, sizeOfSort, nbSymbols;
485	DTableDesc dtd = HUF_getDTableDesc(DTable);
486	U32 const maxTableLog = dtd.maxTableLog;
487	size_t iSize;
488	void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
489	HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
490	U32 *rankStart;
491
492	rankValCol_t *rankVal;
493	U32 *rankStats;
494	U32 *rankStart0;
495	sortedSymbol_t *sortedSymbol;
496	BYTE *weightList;
497	size_t spaceUsed32 = 0;
498
499	HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
500
501	rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
502	spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
503	rankStats = (U32 *)workspace + spaceUsed32;
504	spaceUsed32 += HUF_TABLELOG_MAX + 1;
505	rankStart0 = (U32 *)workspace + spaceUsed32;
506	spaceUsed32 += HUF_TABLELOG_MAX + 2;
507	sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
508	spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
509	weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
510	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
511
512	if ((spaceUsed32 << 2) > workspaceSize)
513		return ERROR(tableLog_tooLarge);
514	workspace = (U32 *)workspace + spaceUsed32;
515	workspaceSize -= (spaceUsed32 << 2);
516
517	rankStart = rankStart0 + 1;
518	memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
519
520	HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
521	if (maxTableLog > HUF_TABLELOG_MAX)
522		return ERROR(tableLog_tooLarge);
523	/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
524
525	iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
526	if (HUF_isError(iSize))
527		return iSize;
528
529	/* check result */
530	if (tableLog > maxTableLog)
531		return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
532
533	/* find maxWeight */
534	for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
535	} /* necessarily finds a solution before 0 */
536
537	/* Get start index of each weight */
538	{
539		U32 w, nextRankStart = 0;
540		for (w = 1; w < maxW + 1; w++) {
541			U32 curr = nextRankStart;
542			nextRankStart += rankStats[w];
543			rankStart[w] = curr;
544		}
545		rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
546		sizeOfSort = nextRankStart;
547	}
548
549	/* sort symbols by weight */
550	{
551		U32 s;
552		for (s = 0; s < nbSymbols; s++) {
553			U32 const w = weightList[s];
554			U32 const r = rankStart[w]++;
555			sortedSymbol[r].symbol = (BYTE)s;
556			sortedSymbol[r].weight = (BYTE)w;
557		}
558		rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
559	}
560
561	/* Build rankVal */
562	{
563		U32 *const rankVal0 = rankVal[0];
564		{
565			int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
566			U32 nextRankVal = 0;
567			U32 w;
568			for (w = 1; w < maxW + 1; w++) {
569				U32 curr = nextRankVal;
570				nextRankVal += rankStats[w] << (w + rescale);
571				rankVal0[w] = curr;
572			}
573		}
574		{
575			U32 const minBits = tableLog + 1 - maxW;
576			U32 consumed;
577			for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
578				U32 *const rankValPtr = rankVal[consumed];
579				U32 w;
580				for (w = 1; w < maxW + 1; w++) {
581					rankValPtr[w] = rankVal0[w] >> consumed;
582				}
583			}
584		}
585	}
586
587	HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
588
589	dtd.tableLog = (BYTE)maxTableLog;
590	dtd.tableType = 1;
591	memcpy(DTable, &dtd, sizeof(dtd));
592	return iSize;
593}
594
595static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
596{
597	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
598	memcpy(op, dt + val, 2);
599	BIT_skipBits(DStream, dt[val].nbBits);
600	return dt[val].length;
601}
602
603static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
604{
605	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
606	memcpy(op, dt + val, 1);
607	if (dt[val].length == 1)
608		BIT_skipBits(DStream, dt[val].nbBits);
609	else {
610		if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
611			BIT_skipBits(DStream, dt[val].nbBits);
612			if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
613				/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
614				DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
615		}
616	}
617	return 1;
618}
619
620#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
621
622#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr)         \
623	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
624	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
625
626#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
627	if (ZSTD_64bits())                     \
628	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
629
630FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
631{
632	BYTE *const pStart = p;
633
634	/* up to 8 symbols at a time */
635	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
636		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
637		HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
638		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
639		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
640	}
641
642	/* closer to end : up to 2 symbols at a time */
643	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
644		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
645
646	while (p <= pEnd - 2)
647		HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
648
649	if (p < pEnd)
650		p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
651
652	return p - pStart;
653}
654
655static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
656{
657	BIT_DStream_t bitD;
658
659	/* Init */
660	{
661		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
662		if (HUF_isError(errorCode))
663			return errorCode;
664	}
665
666	/* decode */
667	{
668		BYTE *const ostart = (BYTE *)dst;
669		BYTE *const oend = ostart + dstSize;
670		const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
671		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
672		DTableDesc const dtd = HUF_getDTableDesc(DTable);
673		HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
674	}
675
676	/* check */
677	if (!BIT_endOfDStream(&bitD))
678		return ERROR(corruption_detected);
679
680	/* decoded size */
681	return dstSize;
682}
683
684size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
685{
686	DTableDesc dtd = HUF_getDTableDesc(DTable);
687	if (dtd.tableType != 1)
688		return ERROR(GENERIC);
689	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
690}
691
692size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
693{
694	const BYTE *ip = (const BYTE *)cSrc;
695
696	size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
697	if (HUF_isError(hSize))
698		return hSize;
699	if (hSize >= cSrcSize)
700		return ERROR(srcSize_wrong);
701	ip += hSize;
702	cSrcSize -= hSize;
703
704	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
705}
706
707static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
708{
709	if (cSrcSize < 10)
710		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
711
712	{
713		const BYTE *const istart = (const BYTE *)cSrc;
714		BYTE *const ostart = (BYTE *)dst;
715		BYTE *const oend = ostart + dstSize;
716		const void *const dtPtr = DTable + 1;
717		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
718
719		/* Init */
720		BIT_DStream_t bitD1;
721		BIT_DStream_t bitD2;
722		BIT_DStream_t bitD3;
723		BIT_DStream_t bitD4;
724		size_t const length1 = ZSTD_readLE16(istart);
725		size_t const length2 = ZSTD_readLE16(istart + 2);
726		size_t const length3 = ZSTD_readLE16(istart + 4);
727		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
728		const BYTE *const istart1 = istart + 6; /* jumpTable */
729		const BYTE *const istart2 = istart1 + length1;
730		const BYTE *const istart3 = istart2 + length2;
731		const BYTE *const istart4 = istart3 + length3;
732		size_t const segmentSize = (dstSize + 3) / 4;
733		BYTE *const opStart2 = ostart + segmentSize;
734		BYTE *const opStart3 = opStart2 + segmentSize;
735		BYTE *const opStart4 = opStart3 + segmentSize;
736		BYTE *op1 = ostart;
737		BYTE *op2 = opStart2;
738		BYTE *op3 = opStart3;
739		BYTE *op4 = opStart4;
740		U32 endSignal;
741		DTableDesc const dtd = HUF_getDTableDesc(DTable);
742		U32 const dtLog = dtd.tableLog;
743
744		if (length4 > cSrcSize)
745			return ERROR(corruption_detected); /* overflow */
746		{
747			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
748			if (HUF_isError(errorCode))
749				return errorCode;
750		}
751		{
752			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
753			if (HUF_isError(errorCode))
754				return errorCode;
755		}
756		{
757			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
758			if (HUF_isError(errorCode))
759				return errorCode;
760		}
761		{
762			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
763			if (HUF_isError(errorCode))
764				return errorCode;
765		}
766
767		/* 16-32 symbols per loop (4-8 symbols per stream) */
768		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
769		for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
770			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
771			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
772			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
773			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
774			HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
775			HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
776			HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
777			HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
778			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
779			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
780			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
781			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
782			HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
783			HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
784			HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
785			HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
786
787			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
788		}
789
790		/* check corruption */
791		if (op1 > opStart2)
792			return ERROR(corruption_detected);
793		if (op2 > opStart3)
794			return ERROR(corruption_detected);
795		if (op3 > opStart4)
796			return ERROR(corruption_detected);
797		/* note : op4 already verified within main loop */
798
799		/* finish bitStreams one by one */
800		HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
801		HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
802		HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
803		HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
804
805		/* check */
806		{
807			U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
808			if (!endCheck)
809				return ERROR(corruption_detected);
810		}
811
812		/* decoded size */
813		return dstSize;
814	}
815}
816
817size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
818{
819	DTableDesc dtd = HUF_getDTableDesc(DTable);
820	if (dtd.tableType != 1)
821		return ERROR(GENERIC);
822	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
823}
824
825size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
826{
827	const BYTE *ip = (const BYTE *)cSrc;
828
829	size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
830	if (HUF_isError(hSize))
831		return hSize;
832	if (hSize >= cSrcSize)
833		return ERROR(srcSize_wrong);
834	ip += hSize;
835	cSrcSize -= hSize;
836
837	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
838}
839
840/* ********************************/
841/* Generic decompression selector */
842/* ********************************/
843
844size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
845{
846	DTableDesc const dtd = HUF_getDTableDesc(DTable);
847	return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
848			     : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
849}
850
851size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
852{
853	DTableDesc const dtd = HUF_getDTableDesc(DTable);
854	return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
855			     : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
856}
857
858typedef struct {
859	U32 tableTime;
860	U32 decode256Time;
861} algo_time_t;
862static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
863    /* single, double, quad */
864    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==0 : impossible */
865    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==1 : impossible */
866    {{38, 130}, {1313, 74}, {2151, 38}},     /* Q == 2 : 12-18% */
867    {{448, 128}, {1353, 74}, {2238, 41}},    /* Q == 3 : 18-25% */
868    {{556, 128}, {1353, 74}, {2238, 47}},    /* Q == 4 : 25-32% */
869    {{714, 128}, {1418, 74}, {2436, 53}},    /* Q == 5 : 32-38% */
870    {{883, 128}, {1437, 74}, {2464, 61}},    /* Q == 6 : 38-44% */
871    {{897, 128}, {1515, 75}, {2622, 68}},    /* Q == 7 : 44-50% */
872    {{926, 128}, {1613, 75}, {2730, 75}},    /* Q == 8 : 50-56% */
873    {{947, 128}, {1729, 77}, {3359, 77}},    /* Q == 9 : 56-62% */
874    {{1107, 128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
875    {{1177, 128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
876    {{1242, 128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
877    {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
878    {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
879    {{722, 128}, {1891, 145}, {1936, 146}},  /* Q ==15 : 93-99% */
880};
881
882/** HUF_selectDecoder() :
883*   Tells which decoder is likely to decode faster,
884*   based on a set of pre-determined metrics.
885*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
886*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
887U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
888{
889	/* decoder timing evaluation */
890	U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
891	U32 const D256 = (U32)(dstSize >> 8);
892	U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
893	U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
894	DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
895
896	return DTime1 < DTime0;
897}
898
899typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
900
901size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
902{
903	/* validation checks */
904	if (dstSize == 0)
905		return ERROR(dstSize_tooSmall);
906	if (cSrcSize > dstSize)
907		return ERROR(corruption_detected); /* invalid */
908	if (cSrcSize == dstSize) {
909		memcpy(dst, cSrc, dstSize);
910		return dstSize;
911	} /* not compressed */
912	if (cSrcSize == 1) {
913		memset(dst, *(const BYTE *)cSrc, dstSize);
914		return dstSize;
915	} /* RLE */
916
917	{
918		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
919		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
920			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
921	}
922}
923
924size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
925{
926	/* validation checks */
927	if (dstSize == 0)
928		return ERROR(dstSize_tooSmall);
929	if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
930		return ERROR(corruption_detected); /* invalid */
931
932	{
933		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
934		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
935			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
936	}
937}
938
939size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
940{
941	/* validation checks */
942	if (dstSize == 0)
943		return ERROR(dstSize_tooSmall);
944	if (cSrcSize > dstSize)
945		return ERROR(corruption_detected); /* invalid */
946	if (cSrcSize == dstSize) {
947		memcpy(dst, cSrc, dstSize);
948		return dstSize;
949	} /* not compressed */
950	if (cSrcSize == 1) {
951		memset(dst, *(const BYTE *)cSrc, dstSize);
952		return dstSize;
953	} /* RLE */
954
955	{
956		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
957		return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
958			      : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
959	}
960}