Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
  1/*
  2 * FSE : Finite State Entropy encoder
  3 * Copyright (C) 2013-2015, Yann Collet.
  4 *
  5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6 *
  7 * Redistribution and use in source and binary forms, with or without
  8 * modification, are permitted provided that the following conditions are
  9 * met:
 10 *
 11 *   * Redistributions of source code must retain the above copyright
 12 * notice, this list of conditions and the following disclaimer.
 13 *   * Redistributions in binary form must reproduce the above
 14 * copyright notice, this list of conditions and the following disclaimer
 15 * in the documentation and/or other materials provided with the
 16 * distribution.
 17 *
 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 *
 30 * This program is free software; you can redistribute it and/or modify it under
 31 * the terms of the GNU General Public License version 2 as published by the
 32 * Free Software Foundation. This program is dual-licensed; you may select
 33 * either version 2 of the GNU General Public License ("GPL") or BSD license
 34 * ("BSD").
 35 *
 36 * You can contact the author at :
 37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 38 */
 39
 40/* **************************************************************
 41*  Compiler specifics
 42****************************************************************/
 43#define FORCE_INLINE static __always_inline
 44
 45/* **************************************************************
 46*  Includes
 47****************************************************************/
 48#include "bitstream.h"
 49#include "fse.h"
 50#include <linux/compiler.h>
 51#include <linux/kernel.h>
 52#include <linux/math64.h>
 53#include <linux/string.h> /* memcpy, memset */
 54
 55/* **************************************************************
 56*  Error Management
 57****************************************************************/
 58#define FSE_STATIC_ASSERT(c)                                   \
 59	{                                                      \
 60		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
 61	} /* use only *after* variable declarations */
 62
 63/* **************************************************************
 64*  Templates
 65****************************************************************/
 66/*
 67  designed to be included
 68  for type-specific functions (template emulation in C)
 69  Objective is to write these functions only once, for improved maintenance
 70*/
 71
 72/* safety checks */
 73#ifndef FSE_FUNCTION_EXTENSION
 74#error "FSE_FUNCTION_EXTENSION must be defined"
 75#endif
 76#ifndef FSE_FUNCTION_TYPE
 77#error "FSE_FUNCTION_TYPE must be defined"
 78#endif
 79
 80/* Function names */
 81#define FSE_CAT(X, Y) X##Y
 82#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
 83#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
 84
 85/* Function templates */
 86
 87/* FSE_buildCTable_wksp() :
 88 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
 89 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
 90 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
 91 */
 92size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
 93{
 94	U32 const tableSize = 1 << tableLog;
 95	U32 const tableMask = tableSize - 1;
 96	void *const ptr = ct;
 97	U16 *const tableU16 = ((U16 *)ptr) + 2;
 98	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
 99	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
100	U32 const step = FSE_TABLESTEP(tableSize);
101	U32 highThreshold = tableSize - 1;
102
103	U32 *cumul;
104	FSE_FUNCTION_TYPE *tableSymbol;
105	size_t spaceUsed32 = 0;
106
107	cumul = (U32 *)workspace + spaceUsed32;
108	spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
109	tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
110	spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
111
112	if ((spaceUsed32 << 2) > workspaceSize)
113		return ERROR(tableLog_tooLarge);
114	workspace = (U32 *)workspace + spaceUsed32;
115	workspaceSize -= (spaceUsed32 << 2);
116
117	/* CTable header */
118	tableU16[-2] = (U16)tableLog;
119	tableU16[-1] = (U16)maxSymbolValue;
120
121	/* For explanations on how to distribute symbol values over the table :
122	*  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
123
124	/* symbol start positions */
125	{
126		U32 u;
127		cumul[0] = 0;
128		for (u = 1; u <= maxSymbolValue + 1; u++) {
129			if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
130				cumul[u] = cumul[u - 1] + 1;
131				tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
132			} else {
133				cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
134			}
135		}
136		cumul[maxSymbolValue + 1] = tableSize + 1;
137	}
138
139	/* Spread symbols */
140	{
141		U32 position = 0;
142		U32 symbol;
143		for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
144			int nbOccurences;
145			for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
146				tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
147				position = (position + step) & tableMask;
148				while (position > highThreshold)
149					position = (position + step) & tableMask; /* Low proba area */
150			}
151		}
152
153		if (position != 0)
154			return ERROR(GENERIC); /* Must have gone through all positions */
155	}
156
157	/* Build table */
158	{
159		U32 u;
160		for (u = 0; u < tableSize; u++) {
161			FSE_FUNCTION_TYPE s = tableSymbol[u];	/* note : static analyzer may not understand tableSymbol is properly initialized */
162			tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
163		}
164	}
165
166	/* Build Symbol Transformation Table */
167	{
168		unsigned total = 0;
169		unsigned s;
170		for (s = 0; s <= maxSymbolValue; s++) {
171			switch (normalizedCounter[s]) {
172			case 0: break;
173
174			case -1:
175			case 1:
176				symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
177				symbolTT[s].deltaFindState = total - 1;
178				total++;
179				break;
180			default: {
181				U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
182				U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
183				symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
184				symbolTT[s].deltaFindState = total - normalizedCounter[s];
185				total += normalizedCounter[s];
186			}
187			}
188		}
189	}
190
191	return 0;
192}
193
194/*-**************************************************************
195*  FSE NCount encoding-decoding
196****************************************************************/
197size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
198{
199	size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
200	return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
201}
202
203static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
204				      unsigned writeIsSafe)
205{
206	BYTE *const ostart = (BYTE *)header;
207	BYTE *out = ostart;
208	BYTE *const oend = ostart + headerBufferSize;
209	int nbBits;
210	const int tableSize = 1 << tableLog;
211	int remaining;
212	int threshold;
213	U32 bitStream;
214	int bitCount;
215	unsigned charnum = 0;
216	int previous0 = 0;
217
218	bitStream = 0;
219	bitCount = 0;
220	/* Table Size */
221	bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
222	bitCount += 4;
223
224	/* Init */
225	remaining = tableSize + 1; /* +1 for extra accuracy */
226	threshold = tableSize;
227	nbBits = tableLog + 1;
228
229	while (remaining > 1) { /* stops at 1 */
230		if (previous0) {
231			unsigned start = charnum;
232			while (!normalizedCounter[charnum])
233				charnum++;
234			while (charnum >= start + 24) {
235				start += 24;
236				bitStream += 0xFFFFU << bitCount;
237				if ((!writeIsSafe) && (out > oend - 2))
238					return ERROR(dstSize_tooSmall); /* Buffer overflow */
239				out[0] = (BYTE)bitStream;
240				out[1] = (BYTE)(bitStream >> 8);
241				out += 2;
242				bitStream >>= 16;
243			}
244			while (charnum >= start + 3) {
245				start += 3;
246				bitStream += 3 << bitCount;
247				bitCount += 2;
248			}
249			bitStream += (charnum - start) << bitCount;
250			bitCount += 2;
251			if (bitCount > 16) {
252				if ((!writeIsSafe) && (out > oend - 2))
253					return ERROR(dstSize_tooSmall); /* Buffer overflow */
254				out[0] = (BYTE)bitStream;
255				out[1] = (BYTE)(bitStream >> 8);
256				out += 2;
257				bitStream >>= 16;
258				bitCount -= 16;
259			}
260		}
261		{
262			int count = normalizedCounter[charnum++];
263			int const max = (2 * threshold - 1) - remaining;
264			remaining -= count < 0 ? -count : count;
265			count++; /* +1 for extra accuracy */
266			if (count >= threshold)
267				count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
268			bitStream += count << bitCount;
269			bitCount += nbBits;
270			bitCount -= (count < max);
271			previous0 = (count == 1);
272			if (remaining < 1)
273				return ERROR(GENERIC);
274			while (remaining < threshold)
275				nbBits--, threshold >>= 1;
276		}
277		if (bitCount > 16) {
278			if ((!writeIsSafe) && (out > oend - 2))
279				return ERROR(dstSize_tooSmall); /* Buffer overflow */
280			out[0] = (BYTE)bitStream;
281			out[1] = (BYTE)(bitStream >> 8);
282			out += 2;
283			bitStream >>= 16;
284			bitCount -= 16;
285		}
286	}
287
288	/* flush remaining bitStream */
289	if ((!writeIsSafe) && (out > oend - 2))
290		return ERROR(dstSize_tooSmall); /* Buffer overflow */
291	out[0] = (BYTE)bitStream;
292	out[1] = (BYTE)(bitStream >> 8);
293	out += (bitCount + 7) / 8;
294
295	if (charnum > maxSymbolValue + 1)
296		return ERROR(GENERIC);
297
298	return (out - ostart);
299}
300
301size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
302{
303	if (tableLog > FSE_MAX_TABLELOG)
304		return ERROR(tableLog_tooLarge); /* Unsupported */
305	if (tableLog < FSE_MIN_TABLELOG)
306		return ERROR(GENERIC); /* Unsupported */
307
308	if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
309		return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
310
311	return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
312}
313
314/*-**************************************************************
315*  Counting histogram
316****************************************************************/
317/*! FSE_count_simple
318	This function counts byte values within `src`, and store the histogram into table `count`.
319	It doesn't use any additional memory.
320	But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
321	For this reason, prefer using a table `count` with 256 elements.
322	@return : count of most numerous element
323*/
324size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
325{
326	const BYTE *ip = (const BYTE *)src;
327	const BYTE *const end = ip + srcSize;
328	unsigned maxSymbolValue = *maxSymbolValuePtr;
329	unsigned max = 0;
330
331	memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
332	if (srcSize == 0) {
333		*maxSymbolValuePtr = 0;
334		return 0;
335	}
336
337	while (ip < end)
338		count[*ip++]++;
339
340	while (!count[maxSymbolValue])
341		maxSymbolValue--;
342	*maxSymbolValuePtr = maxSymbolValue;
343
344	{
345		U32 s;
346		for (s = 0; s <= maxSymbolValue; s++)
347			if (count[s] > max)
348				max = count[s];
349	}
350
351	return (size_t)max;
352}
353
354/* FSE_count_parallel_wksp() :
355 * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
356 * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
357static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
358				      unsigned *const workSpace)
359{
360	const BYTE *ip = (const BYTE *)source;
361	const BYTE *const iend = ip + sourceSize;
362	unsigned maxSymbolValue = *maxSymbolValuePtr;
363	unsigned max = 0;
364	U32 *const Counting1 = workSpace;
365	U32 *const Counting2 = Counting1 + 256;
366	U32 *const Counting3 = Counting2 + 256;
367	U32 *const Counting4 = Counting3 + 256;
368
369	memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
370
371	/* safety checks */
372	if (!sourceSize) {
373		memset(count, 0, maxSymbolValue + 1);
374		*maxSymbolValuePtr = 0;
375		return 0;
376	}
377	if (!maxSymbolValue)
378		maxSymbolValue = 255; /* 0 == default */
379
380	/* by stripes of 16 bytes */
381	{
382		U32 cached = ZSTD_read32(ip);
383		ip += 4;
384		while (ip < iend - 15) {
385			U32 c = cached;
386			cached = ZSTD_read32(ip);
387			ip += 4;
388			Counting1[(BYTE)c]++;
389			Counting2[(BYTE)(c >> 8)]++;
390			Counting3[(BYTE)(c >> 16)]++;
391			Counting4[c >> 24]++;
392			c = cached;
393			cached = ZSTD_read32(ip);
394			ip += 4;
395			Counting1[(BYTE)c]++;
396			Counting2[(BYTE)(c >> 8)]++;
397			Counting3[(BYTE)(c >> 16)]++;
398			Counting4[c >> 24]++;
399			c = cached;
400			cached = ZSTD_read32(ip);
401			ip += 4;
402			Counting1[(BYTE)c]++;
403			Counting2[(BYTE)(c >> 8)]++;
404			Counting3[(BYTE)(c >> 16)]++;
405			Counting4[c >> 24]++;
406			c = cached;
407			cached = ZSTD_read32(ip);
408			ip += 4;
409			Counting1[(BYTE)c]++;
410			Counting2[(BYTE)(c >> 8)]++;
411			Counting3[(BYTE)(c >> 16)]++;
412			Counting4[c >> 24]++;
413		}
414		ip -= 4;
415	}
416
417	/* finish last symbols */
418	while (ip < iend)
419		Counting1[*ip++]++;
420
421	if (checkMax) { /* verify stats will fit into destination table */
422		U32 s;
423		for (s = 255; s > maxSymbolValue; s--) {
424			Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
425			if (Counting1[s])
426				return ERROR(maxSymbolValue_tooSmall);
427		}
428	}
429
430	{
431		U32 s;
432		for (s = 0; s <= maxSymbolValue; s++) {
433			count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
434			if (count[s] > max)
435				max = count[s];
436		}
437	}
438
439	while (!count[maxSymbolValue])
440		maxSymbolValue--;
441	*maxSymbolValuePtr = maxSymbolValue;
442	return (size_t)max;
443}
444
445/* FSE_countFast_wksp() :
446 * Same as FSE_countFast(), but using an externally provided scratch buffer.
447 * `workSpace` size must be table of >= `1024` unsigned */
448size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
449{
450	if (sourceSize < 1500)
451		return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
452	return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
453}
454
455/* FSE_count_wksp() :
456 * Same as FSE_count(), but using an externally provided scratch buffer.
457 * `workSpace` size must be table of >= `1024` unsigned */
458size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
459{
460	if (*maxSymbolValuePtr < 255)
461		return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
462	*maxSymbolValuePtr = 255;
463	return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
464}
465
466/*-**************************************************************
467*  FSE Compression Code
468****************************************************************/
469/*! FSE_sizeof_CTable() :
470	FSE_CTable is a variable size structure which contains :
471	`U16 tableLog;`
472	`U16 maxSymbolValue;`
473	`U16 nextStateNumber[1 << tableLog];`                         // This size is variable
474	`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
475Allocation is manual (C standard does not support variable-size structures).
476*/
477size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
478{
479	if (tableLog > FSE_MAX_TABLELOG)
480		return ERROR(tableLog_tooLarge);
481	return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
482}
483
484/* provides the minimum logSize to safely represent a distribution */
485static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
486{
487	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
488	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
489	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
490	return minBits;
491}
492
493unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
494{
495	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
496	U32 tableLog = maxTableLog;
497	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
498	if (tableLog == 0)
499		tableLog = FSE_DEFAULT_TABLELOG;
500	if (maxBitsSrc < tableLog)
501		tableLog = maxBitsSrc; /* Accuracy can be reduced */
502	if (minBits > tableLog)
503		tableLog = minBits; /* Need a minimum to safely represent all symbol values */
504	if (tableLog < FSE_MIN_TABLELOG)
505		tableLog = FSE_MIN_TABLELOG;
506	if (tableLog > FSE_MAX_TABLELOG)
507		tableLog = FSE_MAX_TABLELOG;
508	return tableLog;
509}
510
511unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
512{
513	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
514}
515
516/* Secondary normalization method.
517   To be used when primary method fails. */
518
519static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
520{
521	short const NOT_YET_ASSIGNED = -2;
522	U32 s;
523	U32 distributed = 0;
524	U32 ToDistribute;
525
526	/* Init */
527	U32 const lowThreshold = (U32)(total >> tableLog);
528	U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
529
530	for (s = 0; s <= maxSymbolValue; s++) {
531		if (count[s] == 0) {
532			norm[s] = 0;
533			continue;
534		}
535		if (count[s] <= lowThreshold) {
536			norm[s] = -1;
537			distributed++;
538			total -= count[s];
539			continue;
540		}
541		if (count[s] <= lowOne) {
542			norm[s] = 1;
543			distributed++;
544			total -= count[s];
545			continue;
546		}
547
548		norm[s] = NOT_YET_ASSIGNED;
549	}
550	ToDistribute = (1 << tableLog) - distributed;
551
552	if ((total / ToDistribute) > lowOne) {
553		/* risk of rounding to zero */
554		lowOne = (U32)((total * 3) / (ToDistribute * 2));
555		for (s = 0; s <= maxSymbolValue; s++) {
556			if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
557				norm[s] = 1;
558				distributed++;
559				total -= count[s];
560				continue;
561			}
562		}
563		ToDistribute = (1 << tableLog) - distributed;
564	}
565
566	if (distributed == maxSymbolValue + 1) {
567		/* all values are pretty poor;
568		   probably incompressible data (should have already been detected);
569		   find max, then give all remaining points to max */
570		U32 maxV = 0, maxC = 0;
571		for (s = 0; s <= maxSymbolValue; s++)
572			if (count[s] > maxC)
573				maxV = s, maxC = count[s];
574		norm[maxV] += (short)ToDistribute;
575		return 0;
576	}
577
578	if (total == 0) {
579		/* all of the symbols were low enough for the lowOne or lowThreshold */
580		for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
581			if (norm[s] > 0)
582				ToDistribute--, norm[s]++;
583		return 0;
584	}
585
586	{
587		U64 const vStepLog = 62 - tableLog;
588		U64 const mid = (1ULL << (vStepLog - 1)) - 1;
589		U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
590		U64 tmpTotal = mid;
591		for (s = 0; s <= maxSymbolValue; s++) {
592			if (norm[s] == NOT_YET_ASSIGNED) {
593				U64 const end = tmpTotal + (count[s] * rStep);
594				U32 const sStart = (U32)(tmpTotal >> vStepLog);
595				U32 const sEnd = (U32)(end >> vStepLog);
596				U32 const weight = sEnd - sStart;
597				if (weight < 1)
598					return ERROR(GENERIC);
599				norm[s] = (short)weight;
600				tmpTotal = end;
601			}
602		}
603	}
604
605	return 0;
606}
607
608size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
609{
610	/* Sanity checks */
611	if (tableLog == 0)
612		tableLog = FSE_DEFAULT_TABLELOG;
613	if (tableLog < FSE_MIN_TABLELOG)
614		return ERROR(GENERIC); /* Unsupported size */
615	if (tableLog > FSE_MAX_TABLELOG)
616		return ERROR(tableLog_tooLarge); /* Unsupported size */
617	if (tableLog < FSE_minTableLog(total, maxSymbolValue))
618		return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
619
620	{
621		U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
622		U64 const scale = 62 - tableLog;
623		U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
624		U64 const vStep = 1ULL << (scale - 20);
625		int stillToDistribute = 1 << tableLog;
626		unsigned s;
627		unsigned largest = 0;
628		short largestP = 0;
629		U32 lowThreshold = (U32)(total >> tableLog);
630
631		for (s = 0; s <= maxSymbolValue; s++) {
632			if (count[s] == total)
633				return 0; /* rle special case */
634			if (count[s] == 0) {
635				normalizedCounter[s] = 0;
636				continue;
637			}
638			if (count[s] <= lowThreshold) {
639				normalizedCounter[s] = -1;
640				stillToDistribute--;
641			} else {
642				short proba = (short)((count[s] * step) >> scale);
643				if (proba < 8) {
644					U64 restToBeat = vStep * rtbTable[proba];
645					proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
646				}
647				if (proba > largestP)
648					largestP = proba, largest = s;
649				normalizedCounter[s] = proba;
650				stillToDistribute -= proba;
651			}
652		}
653		if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
654			/* corner case, need another normalization method */
655			size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
656			if (FSE_isError(errorCode))
657				return errorCode;
658		} else
659			normalizedCounter[largest] += (short)stillToDistribute;
660	}
661
662	return tableLog;
663}
664
665/* fake FSE_CTable, for raw (uncompressed) input */
666size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
667{
668	const unsigned tableSize = 1 << nbBits;
669	const unsigned tableMask = tableSize - 1;
670	const unsigned maxSymbolValue = tableMask;
671	void *const ptr = ct;
672	U16 *const tableU16 = ((U16 *)ptr) + 2;
673	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
674	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
675	unsigned s;
676
677	/* Sanity checks */
678	if (nbBits < 1)
679		return ERROR(GENERIC); /* min size */
680
681	/* header */
682	tableU16[-2] = (U16)nbBits;
683	tableU16[-1] = (U16)maxSymbolValue;
684
685	/* Build table */
686	for (s = 0; s < tableSize; s++)
687		tableU16[s] = (U16)(tableSize + s);
688
689	/* Build Symbol Transformation Table */
690	{
691		const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
692		for (s = 0; s <= maxSymbolValue; s++) {
693			symbolTT[s].deltaNbBits = deltaNbBits;
694			symbolTT[s].deltaFindState = s - 1;
695		}
696	}
697
698	return 0;
699}
700
701/* fake FSE_CTable, for rle input (always same symbol) */
702size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
703{
704	void *ptr = ct;
705	U16 *tableU16 = ((U16 *)ptr) + 2;
706	void *FSCTptr = (U32 *)ptr + 2;
707	FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
708
709	/* header */
710	tableU16[-2] = (U16)0;
711	tableU16[-1] = (U16)symbolValue;
712
713	/* Build table */
714	tableU16[0] = 0;
715	tableU16[1] = 0; /* just in case */
716
717	/* Build Symbol Transformation Table */
718	symbolTT[symbolValue].deltaNbBits = 0;
719	symbolTT[symbolValue].deltaFindState = 0;
720
721	return 0;
722}
723
724static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
725{
726	const BYTE *const istart = (const BYTE *)src;
727	const BYTE *const iend = istart + srcSize;
728	const BYTE *ip = iend;
729
730	BIT_CStream_t bitC;
731	FSE_CState_t CState1, CState2;
732
733	/* init */
734	if (srcSize <= 2)
735		return 0;
736	{
737		size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
738		if (FSE_isError(initError))
739			return 0; /* not enough space available to write a bitstream */
740	}
741
742#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
743
744	if (srcSize & 1) {
745		FSE_initCState2(&CState1, ct, *--ip);
746		FSE_initCState2(&CState2, ct, *--ip);
747		FSE_encodeSymbol(&bitC, &CState1, *--ip);
748		FSE_FLUSHBITS(&bitC);
749	} else {
750		FSE_initCState2(&CState2, ct, *--ip);
751		FSE_initCState2(&CState1, ct, *--ip);
752	}
753
754	/* join to mod 4 */
755	srcSize -= 2;
756	if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
757		FSE_encodeSymbol(&bitC, &CState2, *--ip);
758		FSE_encodeSymbol(&bitC, &CState1, *--ip);
759		FSE_FLUSHBITS(&bitC);
760	}
761
762	/* 2 or 4 encoding per loop */
763	while (ip > istart) {
764
765		FSE_encodeSymbol(&bitC, &CState2, *--ip);
766
767		if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
768			FSE_FLUSHBITS(&bitC);
769
770		FSE_encodeSymbol(&bitC, &CState1, *--ip);
771
772		if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
773			FSE_encodeSymbol(&bitC, &CState2, *--ip);
774			FSE_encodeSymbol(&bitC, &CState1, *--ip);
775		}
776
777		FSE_FLUSHBITS(&bitC);
778	}
779
780	FSE_flushCState(&bitC, &CState2);
781	FSE_flushCState(&bitC, &CState1);
782	return BIT_closeCStream(&bitC);
783}
784
785size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
786{
787	unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
788
789	if (fast)
790		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
791	else
792		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
793}
794
795size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }