Linux Audio

Check our new training course

Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * 842 Software Compression
  4 *
  5 * Copyright (C) 2015 Dan Streetman, IBM Corp
 
 
 
 
 
 
 
 
 
 
  6 *
  7 * See 842.h for details of the 842 compressed format.
  8 */
  9
 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 11#define MODULE_NAME "842_compress"
 12
 13#include <linux/hashtable.h>
 14
 15#include "842.h"
 16#include "842_debugfs.h"
 17
 18#define SW842_HASHTABLE8_BITS	(10)
 19#define SW842_HASHTABLE4_BITS	(11)
 20#define SW842_HASHTABLE2_BITS	(10)
 21
 22/* By default, we allow compressing input buffers of any length, but we must
 23 * use the non-standard "short data" template so the decompressor can correctly
 24 * reproduce the uncompressed data buffer at the right length.  However the
 25 * hardware 842 compressor will not recognize the "short data" template, and
 26 * will fail to decompress any compressed buffer containing it (I have no idea
 27 * why anyone would want to use software to compress and hardware to decompress
 28 * but that's beside the point).  This parameter forces the compression
 29 * function to simply reject any input buffer that isn't a multiple of 8 bytes
 30 * long, instead of using the "short data" template, so that all compressed
 31 * buffers produced by this function will be decompressable by the 842 hardware
 32 * decompressor.  Unless you have a specific need for that, leave this disabled
 33 * so that any length buffer can be compressed.
 34 */
 35static bool sw842_strict;
 36module_param_named(strict, sw842_strict, bool, 0644);
 37
 38static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
 39	{ I8, N0, N0, N0, 0x19 }, /* 8 */
 40	{ I4, I4, N0, N0, 0x18 }, /* 18 */
 41	{ I4, I2, I2, N0, 0x17 }, /* 25 */
 42	{ I2, I2, I4, N0, 0x13 }, /* 25 */
 43	{ I2, I2, I2, I2, 0x12 }, /* 32 */
 44	{ I4, I2, D2, N0, 0x16 }, /* 33 */
 45	{ I4, D2, I2, N0, 0x15 }, /* 33 */
 46	{ I2, D2, I4, N0, 0x0e }, /* 33 */
 47	{ D2, I2, I4, N0, 0x09 }, /* 33 */
 48	{ I2, I2, I2, D2, 0x11 }, /* 40 */
 49	{ I2, I2, D2, I2, 0x10 }, /* 40 */
 50	{ I2, D2, I2, I2, 0x0d }, /* 40 */
 51	{ D2, I2, I2, I2, 0x08 }, /* 40 */
 52	{ I4, D4, N0, N0, 0x14 }, /* 41 */
 53	{ D4, I4, N0, N0, 0x04 }, /* 41 */
 54	{ I2, I2, D4, N0, 0x0f }, /* 48 */
 55	{ I2, D2, I2, D2, 0x0c }, /* 48 */
 56	{ I2, D4, I2, N0, 0x0b }, /* 48 */
 57	{ D2, I2, I2, D2, 0x07 }, /* 48 */
 58	{ D2, I2, D2, I2, 0x06 }, /* 48 */
 59	{ D4, I2, I2, N0, 0x03 }, /* 48 */
 60	{ I2, D2, D4, N0, 0x0a }, /* 56 */
 61	{ D2, I2, D4, N0, 0x05 }, /* 56 */
 62	{ D4, I2, D2, N0, 0x02 }, /* 56 */
 63	{ D4, D2, I2, N0, 0x01 }, /* 56 */
 64	{ D8, N0, N0, N0, 0x00 }, /* 64 */
 65};
 66
 67struct sw842_hlist_node8 {
 68	struct hlist_node node;
 69	u64 data;
 70	u8 index;
 71};
 72
 73struct sw842_hlist_node4 {
 74	struct hlist_node node;
 75	u32 data;
 76	u16 index;
 77};
 78
 79struct sw842_hlist_node2 {
 80	struct hlist_node node;
 81	u16 data;
 82	u8 index;
 83};
 84
 85#define INDEX_NOT_FOUND		(-1)
 86#define INDEX_NOT_CHECKED	(-2)
 87
 88struct sw842_param {
 89	u8 *in;
 90	u8 *instart;
 91	u64 ilen;
 92	u8 *out;
 93	u64 olen;
 94	u8 bit;
 95	u64 data8[1];
 96	u32 data4[2];
 97	u16 data2[4];
 98	int index8[1];
 99	int index4[2];
100	int index2[4];
101	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
102	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
103	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
104	struct sw842_hlist_node8 node8[1 << I8_BITS];
105	struct sw842_hlist_node4 node4[1 << I4_BITS];
106	struct sw842_hlist_node2 node2[1 << I2_BITS];
107};
108
109#define get_input_data(p, o, b)						\
110	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
111
112#define init_hashtable_nodes(p, b)	do {			\
113	int _i;							\
114	hash_init((p)->htable##b);				\
115	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
116		(p)->node##b[_i].index = _i;			\
117		(p)->node##b[_i].data = 0;			\
118		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
119	}							\
120} while (0)
121
122#define find_index(p, b, n)	({					\
123	struct sw842_hlist_node##b *_n;					\
124	p->index##b[n] = INDEX_NOT_FOUND;				\
125	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
126		if (p->data##b[n] == _n->data) {			\
127			p->index##b[n] = _n->index;			\
128			break;						\
129		}							\
130	}								\
131	p->index##b[n] >= 0;						\
132})
133
134#define check_index(p, b, n)			\
135	((p)->index##b[n] == INDEX_NOT_CHECKED	\
136	 ? find_index(p, b, n)			\
137	 : (p)->index##b[n] >= 0)
138
139#define replace_hash(p, b, i, d)	do {				\
140	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
141	hash_del(&_n->node);						\
142	_n->data = (p)->data##b[d];					\
143	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
144		 (unsigned int)_n->index,				\
145		 (unsigned int)((p)->in - (p)->instart),		\
146		 (unsigned long)_n->data);				\
147	hash_add((p)->htable##b, &_n->node, _n->data);			\
148} while (0)
149
150static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
151
152static int add_bits(struct sw842_param *p, u64 d, u8 n);
153
154static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
155{
156	int ret;
157
158	if (n <= s)
159		return -EINVAL;
160
161	ret = add_bits(p, d >> s, n - s);
162	if (ret)
163		return ret;
164	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
165}
166
167static int add_bits(struct sw842_param *p, u64 d, u8 n)
168{
169	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
170	u64 o;
171	u8 *out = p->out;
172
173	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
174
175	if (n > 64)
176		return -EINVAL;
177
178	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
179	 * or if we're at the end of the output buffer and would write past end
180	 */
181	if (bits > 64)
182		return __split_add_bits(p, d, n, 32);
183	else if (p->olen < 8 && bits > 32 && bits <= 56)
184		return __split_add_bits(p, d, n, 16);
185	else if (p->olen < 4 && bits > 16 && bits <= 24)
186		return __split_add_bits(p, d, n, 8);
187
188	if (DIV_ROUND_UP(bits, 8) > p->olen)
189		return -ENOSPC;
190
191	o = *out & bmask[b];
192	d <<= s;
193
194	if (bits <= 8)
195		*out = o | d;
196	else if (bits <= 16)
197		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
198	else if (bits <= 24)
199		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
200	else if (bits <= 32)
201		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
202	else if (bits <= 40)
203		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
204	else if (bits <= 48)
205		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
206	else if (bits <= 56)
207		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
208	else
209		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
210
211	p->bit += n;
212
213	if (p->bit > 7) {
214		p->out += p->bit / 8;
215		p->olen -= p->bit / 8;
216		p->bit %= 8;
217	}
218
219	return 0;
220}
221
222static int add_template(struct sw842_param *p, u8 c)
223{
224	int ret, i, b = 0;
225	u8 *t = comp_ops[c];
226	bool inv = false;
227
228	if (c >= OPS_MAX)
229		return -EINVAL;
230
231	pr_debug("template %x\n", t[4]);
232
233	ret = add_bits(p, t[4], OP_BITS);
234	if (ret)
235		return ret;
236
237	for (i = 0; i < 4; i++) {
238		pr_debug("op %x\n", t[i]);
239
240		switch (t[i] & OP_AMOUNT) {
241		case OP_AMOUNT_8:
242			if (b)
243				inv = true;
244			else if (t[i] & OP_ACTION_INDEX)
245				ret = add_bits(p, p->index8[0], I8_BITS);
246			else if (t[i] & OP_ACTION_DATA)
247				ret = add_bits(p, p->data8[0], 64);
248			else
249				inv = true;
250			break;
251		case OP_AMOUNT_4:
252			if (b == 2 && t[i] & OP_ACTION_DATA)
253				ret = add_bits(p, get_input_data(p, 2, 32), 32);
254			else if (b != 0 && b != 4)
255				inv = true;
256			else if (t[i] & OP_ACTION_INDEX)
257				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
258			else if (t[i] & OP_ACTION_DATA)
259				ret = add_bits(p, p->data4[b >> 2], 32);
260			else
261				inv = true;
262			break;
263		case OP_AMOUNT_2:
264			if (b != 0 && b != 2 && b != 4 && b != 6)
265				inv = true;
266			if (t[i] & OP_ACTION_INDEX)
267				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
268			else if (t[i] & OP_ACTION_DATA)
269				ret = add_bits(p, p->data2[b >> 1], 16);
270			else
271				inv = true;
272			break;
273		case OP_AMOUNT_0:
274			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
275			break;
276		default:
277			inv = true;
278			break;
279		}
280
281		if (ret)
282			return ret;
283
284		if (inv) {
285			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
286			       c, i, t[0], t[1], t[2], t[3]);
287			return -EINVAL;
288		}
289
290		b += t[i] & OP_AMOUNT;
291	}
292
293	if (b != 8) {
294		pr_err("Invalid template %x len %x : %x %x %x %x\n",
295		       c, b, t[0], t[1], t[2], t[3]);
296		return -EINVAL;
297	}
298
299	if (sw842_template_counts)
300		atomic_inc(&template_count[t[4]]);
301
302	return 0;
303}
304
305static int add_repeat_template(struct sw842_param *p, u8 r)
306{
307	int ret;
308
309	/* repeat param is 0-based */
310	if (!r || --r > REPEAT_BITS_MAX)
311		return -EINVAL;
312
313	ret = add_bits(p, OP_REPEAT, OP_BITS);
314	if (ret)
315		return ret;
316
317	ret = add_bits(p, r, REPEAT_BITS);
318	if (ret)
319		return ret;
320
321	if (sw842_template_counts)
322		atomic_inc(&template_repeat_count);
323
324	return 0;
325}
326
327static int add_short_data_template(struct sw842_param *p, u8 b)
328{
329	int ret, i;
330
331	if (!b || b > SHORT_DATA_BITS_MAX)
332		return -EINVAL;
333
334	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
335	if (ret)
336		return ret;
337
338	ret = add_bits(p, b, SHORT_DATA_BITS);
339	if (ret)
340		return ret;
341
342	for (i = 0; i < b; i++) {
343		ret = add_bits(p, p->in[i], 8);
344		if (ret)
345			return ret;
346	}
347
348	if (sw842_template_counts)
349		atomic_inc(&template_short_data_count);
350
351	return 0;
352}
353
354static int add_zeros_template(struct sw842_param *p)
355{
356	int ret = add_bits(p, OP_ZEROS, OP_BITS);
357
358	if (ret)
359		return ret;
360
361	if (sw842_template_counts)
362		atomic_inc(&template_zeros_count);
363
364	return 0;
365}
366
367static int add_end_template(struct sw842_param *p)
368{
369	int ret = add_bits(p, OP_END, OP_BITS);
370
371	if (ret)
372		return ret;
373
374	if (sw842_template_counts)
375		atomic_inc(&template_end_count);
376
377	return 0;
378}
379
380static bool check_template(struct sw842_param *p, u8 c)
381{
382	u8 *t = comp_ops[c];
383	int i, match, b = 0;
384
385	if (c >= OPS_MAX)
386		return false;
387
388	for (i = 0; i < 4; i++) {
389		if (t[i] & OP_ACTION_INDEX) {
390			if (t[i] & OP_AMOUNT_2)
391				match = check_index(p, 2, b >> 1);
392			else if (t[i] & OP_AMOUNT_4)
393				match = check_index(p, 4, b >> 2);
394			else if (t[i] & OP_AMOUNT_8)
395				match = check_index(p, 8, 0);
396			else
397				return false;
398			if (!match)
399				return false;
400		}
401
402		b += t[i] & OP_AMOUNT;
403	}
404
405	return true;
406}
407
408static void get_next_data(struct sw842_param *p)
409{
410	p->data8[0] = get_input_data(p, 0, 64);
411	p->data4[0] = get_input_data(p, 0, 32);
412	p->data4[1] = get_input_data(p, 4, 32);
413	p->data2[0] = get_input_data(p, 0, 16);
414	p->data2[1] = get_input_data(p, 2, 16);
415	p->data2[2] = get_input_data(p, 4, 16);
416	p->data2[3] = get_input_data(p, 6, 16);
417}
418
419/* update the hashtable entries.
420 * only call this after finding/adding the current template
421 * the dataN fields for the current 8 byte block must be already updated
422 */
423static void update_hashtables(struct sw842_param *p)
424{
425	u64 pos = p->in - p->instart;
426	u64 n8 = (pos >> 3) % (1 << I8_BITS);
427	u64 n4 = (pos >> 2) % (1 << I4_BITS);
428	u64 n2 = (pos >> 1) % (1 << I2_BITS);
429
430	replace_hash(p, 8, n8, 0);
431	replace_hash(p, 4, n4, 0);
432	replace_hash(p, 4, n4, 1);
433	replace_hash(p, 2, n2, 0);
434	replace_hash(p, 2, n2, 1);
435	replace_hash(p, 2, n2, 2);
436	replace_hash(p, 2, n2, 3);
437}
438
439/* find the next template to use, and add it
440 * the p->dataN fields must already be set for the current 8 byte block
441 */
442static int process_next(struct sw842_param *p)
443{
444	int ret, i;
445
446	p->index8[0] = INDEX_NOT_CHECKED;
447	p->index4[0] = INDEX_NOT_CHECKED;
448	p->index4[1] = INDEX_NOT_CHECKED;
449	p->index2[0] = INDEX_NOT_CHECKED;
450	p->index2[1] = INDEX_NOT_CHECKED;
451	p->index2[2] = INDEX_NOT_CHECKED;
452	p->index2[3] = INDEX_NOT_CHECKED;
453
454	/* check up to OPS_MAX - 1; last op is our fallback */
455	for (i = 0; i < OPS_MAX - 1; i++) {
456		if (check_template(p, i))
457			break;
458	}
459
460	ret = add_template(p, i);
461	if (ret)
462		return ret;
463
464	return 0;
465}
466
467/**
468 * sw842_compress
469 *
470 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
471 * @out, using no more than @olen bytes, using the 842 compression format.
472 *
473 * Returns: 0 on success, error on failure.  The @olen parameter
474 * will contain the number of output bytes written on success, or
475 * 0 on error.
476 */
477int sw842_compress(const u8 *in, unsigned int ilen,
478		   u8 *out, unsigned int *olen, void *wmem)
479{
480	struct sw842_param *p = (struct sw842_param *)wmem;
481	int ret;
482	u64 last, next, pad, total;
483	u8 repeat_count = 0;
484	u32 crc;
485
486	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
487
488	init_hashtable_nodes(p, 8);
489	init_hashtable_nodes(p, 4);
490	init_hashtable_nodes(p, 2);
491
492	p->in = (u8 *)in;
493	p->instart = p->in;
494	p->ilen = ilen;
495	p->out = out;
496	p->olen = *olen;
497	p->bit = 0;
498
499	total = p->olen;
500
501	*olen = 0;
502
503	/* if using strict mode, we can only compress a multiple of 8 */
504	if (sw842_strict && (ilen % 8)) {
505		pr_err("Using strict mode, can't compress len %d\n", ilen);
506		return -EINVAL;
507	}
508
509	/* let's compress at least 8 bytes, mkay? */
510	if (unlikely(ilen < 8))
511		goto skip_comp;
512
513	/* make initial 'last' different so we don't match the first time */
514	last = ~get_unaligned((u64 *)p->in);
515
516	while (p->ilen > 7) {
517		next = get_unaligned((u64 *)p->in);
518
519		/* must get the next data, as we need to update the hashtable
520		 * entries with the new data every time
521		 */
522		get_next_data(p);
523
524		/* we don't care about endianness in last or next;
525		 * we're just comparing 8 bytes to another 8 bytes,
526		 * they're both the same endianness
527		 */
528		if (next == last) {
529			/* repeat count bits are 0-based, so we stop at +1 */
530			if (++repeat_count <= REPEAT_BITS_MAX)
531				goto repeat;
532		}
533		if (repeat_count) {
534			ret = add_repeat_template(p, repeat_count);
535			repeat_count = 0;
536			if (next == last) /* reached max repeat bits */
537				goto repeat;
538		}
539
540		if (next == 0)
541			ret = add_zeros_template(p);
542		else
543			ret = process_next(p);
544
545		if (ret)
546			return ret;
547
548repeat:
549		last = next;
550		update_hashtables(p);
551		p->in += 8;
552		p->ilen -= 8;
553	}
554
555	if (repeat_count) {
556		ret = add_repeat_template(p, repeat_count);
557		if (ret)
558			return ret;
559	}
560
561skip_comp:
562	if (p->ilen > 0) {
563		ret = add_short_data_template(p, p->ilen);
564		if (ret)
565			return ret;
566
567		p->in += p->ilen;
568		p->ilen = 0;
569	}
570
571	ret = add_end_template(p);
572	if (ret)
573		return ret;
574
575	/*
576	 * crc(0:31) is appended to target data starting with the next
577	 * bit after End of stream template.
578	 * nx842 calculates CRC for data in big-endian format. So doing
579	 * same here so that sw842 decompression can be used for both
580	 * compressed data.
581	 */
582	crc = crc32_be(0, in, ilen);
583	ret = add_bits(p, crc, CRC_BITS);
584	if (ret)
585		return ret;
586
587	if (p->bit) {
588		p->out++;
589		p->olen--;
590		p->bit = 0;
591	}
592
593	/* pad compressed length to multiple of 8 */
594	pad = (8 - ((total - p->olen) % 8)) % 8;
595	if (pad) {
596		if (pad > p->olen) /* we were so close! */
597			return -ENOSPC;
598		memset(p->out, 0, pad);
599		p->out += pad;
600		p->olen -= pad;
601	}
602
603	if (unlikely((total - p->olen) > UINT_MAX))
604		return -ENOSPC;
605
606	*olen = total - p->olen;
607
608	return 0;
609}
610EXPORT_SYMBOL_GPL(sw842_compress);
611
612static int __init sw842_init(void)
613{
614	if (sw842_template_counts)
615		sw842_debugfs_create();
616
617	return 0;
618}
619module_init(sw842_init);
620
621static void __exit sw842_exit(void)
622{
623	if (sw842_template_counts)
624		sw842_debugfs_remove();
625}
626module_exit(sw842_exit);
627
628MODULE_LICENSE("GPL");
629MODULE_DESCRIPTION("Software 842 Compressor");
630MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
v4.10.11
 
  1/*
  2 * 842 Software Compression
  3 *
  4 * Copyright (C) 2015 Dan Streetman, IBM Corp
  5 *
  6 * This program is free software; you can redistribute it and/or modify
  7 * it under the terms of the GNU General Public License as published by
  8 * the Free Software Foundation; either version 2 of the License, or
  9 * (at your option) any later version.
 10 *
 11 * This program is distributed in the hope that it will be useful,
 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 * GNU General Public License for more details.
 15 *
 16 * See 842.h for details of the 842 compressed format.
 17 */
 18
 19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 20#define MODULE_NAME "842_compress"
 21
 22#include <linux/hashtable.h>
 23
 24#include "842.h"
 25#include "842_debugfs.h"
 26
 27#define SW842_HASHTABLE8_BITS	(10)
 28#define SW842_HASHTABLE4_BITS	(11)
 29#define SW842_HASHTABLE2_BITS	(10)
 30
 31/* By default, we allow compressing input buffers of any length, but we must
 32 * use the non-standard "short data" template so the decompressor can correctly
 33 * reproduce the uncompressed data buffer at the right length.  However the
 34 * hardware 842 compressor will not recognize the "short data" template, and
 35 * will fail to decompress any compressed buffer containing it (I have no idea
 36 * why anyone would want to use software to compress and hardware to decompress
 37 * but that's beside the point).  This parameter forces the compression
 38 * function to simply reject any input buffer that isn't a multiple of 8 bytes
 39 * long, instead of using the "short data" template, so that all compressed
 40 * buffers produced by this function will be decompressable by the 842 hardware
 41 * decompressor.  Unless you have a specific need for that, leave this disabled
 42 * so that any length buffer can be compressed.
 43 */
 44static bool sw842_strict;
 45module_param_named(strict, sw842_strict, bool, 0644);
 46
 47static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
 48	{ I8, N0, N0, N0, 0x19 }, /* 8 */
 49	{ I4, I4, N0, N0, 0x18 }, /* 18 */
 50	{ I4, I2, I2, N0, 0x17 }, /* 25 */
 51	{ I2, I2, I4, N0, 0x13 }, /* 25 */
 52	{ I2, I2, I2, I2, 0x12 }, /* 32 */
 53	{ I4, I2, D2, N0, 0x16 }, /* 33 */
 54	{ I4, D2, I2, N0, 0x15 }, /* 33 */
 55	{ I2, D2, I4, N0, 0x0e }, /* 33 */
 56	{ D2, I2, I4, N0, 0x09 }, /* 33 */
 57	{ I2, I2, I2, D2, 0x11 }, /* 40 */
 58	{ I2, I2, D2, I2, 0x10 }, /* 40 */
 59	{ I2, D2, I2, I2, 0x0d }, /* 40 */
 60	{ D2, I2, I2, I2, 0x08 }, /* 40 */
 61	{ I4, D4, N0, N0, 0x14 }, /* 41 */
 62	{ D4, I4, N0, N0, 0x04 }, /* 41 */
 63	{ I2, I2, D4, N0, 0x0f }, /* 48 */
 64	{ I2, D2, I2, D2, 0x0c }, /* 48 */
 65	{ I2, D4, I2, N0, 0x0b }, /* 48 */
 66	{ D2, I2, I2, D2, 0x07 }, /* 48 */
 67	{ D2, I2, D2, I2, 0x06 }, /* 48 */
 68	{ D4, I2, I2, N0, 0x03 }, /* 48 */
 69	{ I2, D2, D4, N0, 0x0a }, /* 56 */
 70	{ D2, I2, D4, N0, 0x05 }, /* 56 */
 71	{ D4, I2, D2, N0, 0x02 }, /* 56 */
 72	{ D4, D2, I2, N0, 0x01 }, /* 56 */
 73	{ D8, N0, N0, N0, 0x00 }, /* 64 */
 74};
 75
 76struct sw842_hlist_node8 {
 77	struct hlist_node node;
 78	u64 data;
 79	u8 index;
 80};
 81
 82struct sw842_hlist_node4 {
 83	struct hlist_node node;
 84	u32 data;
 85	u16 index;
 86};
 87
 88struct sw842_hlist_node2 {
 89	struct hlist_node node;
 90	u16 data;
 91	u8 index;
 92};
 93
 94#define INDEX_NOT_FOUND		(-1)
 95#define INDEX_NOT_CHECKED	(-2)
 96
 97struct sw842_param {
 98	u8 *in;
 99	u8 *instart;
100	u64 ilen;
101	u8 *out;
102	u64 olen;
103	u8 bit;
104	u64 data8[1];
105	u32 data4[2];
106	u16 data2[4];
107	int index8[1];
108	int index4[2];
109	int index2[4];
110	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
111	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
112	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
113	struct sw842_hlist_node8 node8[1 << I8_BITS];
114	struct sw842_hlist_node4 node4[1 << I4_BITS];
115	struct sw842_hlist_node2 node2[1 << I2_BITS];
116};
117
118#define get_input_data(p, o, b)						\
119	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
120
121#define init_hashtable_nodes(p, b)	do {			\
122	int _i;							\
123	hash_init((p)->htable##b);				\
124	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
125		(p)->node##b[_i].index = _i;			\
126		(p)->node##b[_i].data = 0;			\
127		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
128	}							\
129} while (0)
130
131#define find_index(p, b, n)	({					\
132	struct sw842_hlist_node##b *_n;					\
133	p->index##b[n] = INDEX_NOT_FOUND;				\
134	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
135		if (p->data##b[n] == _n->data) {			\
136			p->index##b[n] = _n->index;			\
137			break;						\
138		}							\
139	}								\
140	p->index##b[n] >= 0;						\
141})
142
143#define check_index(p, b, n)			\
144	((p)->index##b[n] == INDEX_NOT_CHECKED	\
145	 ? find_index(p, b, n)			\
146	 : (p)->index##b[n] >= 0)
147
148#define replace_hash(p, b, i, d)	do {				\
149	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
150	hash_del(&_n->node);						\
151	_n->data = (p)->data##b[d];					\
152	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
153		 (unsigned int)_n->index,				\
154		 (unsigned int)((p)->in - (p)->instart),		\
155		 (unsigned long)_n->data);				\
156	hash_add((p)->htable##b, &_n->node, _n->data);			\
157} while (0)
158
159static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
160
161static int add_bits(struct sw842_param *p, u64 d, u8 n);
162
163static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
164{
165	int ret;
166
167	if (n <= s)
168		return -EINVAL;
169
170	ret = add_bits(p, d >> s, n - s);
171	if (ret)
172		return ret;
173	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
174}
175
176static int add_bits(struct sw842_param *p, u64 d, u8 n)
177{
178	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
179	u64 o;
180	u8 *out = p->out;
181
182	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
183
184	if (n > 64)
185		return -EINVAL;
186
187	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
188	 * or if we're at the end of the output buffer and would write past end
189	 */
190	if (bits > 64)
191		return __split_add_bits(p, d, n, 32);
192	else if (p->olen < 8 && bits > 32 && bits <= 56)
193		return __split_add_bits(p, d, n, 16);
194	else if (p->olen < 4 && bits > 16 && bits <= 24)
195		return __split_add_bits(p, d, n, 8);
196
197	if (DIV_ROUND_UP(bits, 8) > p->olen)
198		return -ENOSPC;
199
200	o = *out & bmask[b];
201	d <<= s;
202
203	if (bits <= 8)
204		*out = o | d;
205	else if (bits <= 16)
206		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
207	else if (bits <= 24)
208		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
209	else if (bits <= 32)
210		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
211	else if (bits <= 40)
212		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
213	else if (bits <= 48)
214		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
215	else if (bits <= 56)
216		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
217	else
218		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
219
220	p->bit += n;
221
222	if (p->bit > 7) {
223		p->out += p->bit / 8;
224		p->olen -= p->bit / 8;
225		p->bit %= 8;
226	}
227
228	return 0;
229}
230
231static int add_template(struct sw842_param *p, u8 c)
232{
233	int ret, i, b = 0;
234	u8 *t = comp_ops[c];
235	bool inv = false;
236
237	if (c >= OPS_MAX)
238		return -EINVAL;
239
240	pr_debug("template %x\n", t[4]);
241
242	ret = add_bits(p, t[4], OP_BITS);
243	if (ret)
244		return ret;
245
246	for (i = 0; i < 4; i++) {
247		pr_debug("op %x\n", t[i]);
248
249		switch (t[i] & OP_AMOUNT) {
250		case OP_AMOUNT_8:
251			if (b)
252				inv = true;
253			else if (t[i] & OP_ACTION_INDEX)
254				ret = add_bits(p, p->index8[0], I8_BITS);
255			else if (t[i] & OP_ACTION_DATA)
256				ret = add_bits(p, p->data8[0], 64);
257			else
258				inv = true;
259			break;
260		case OP_AMOUNT_4:
261			if (b == 2 && t[i] & OP_ACTION_DATA)
262				ret = add_bits(p, get_input_data(p, 2, 32), 32);
263			else if (b != 0 && b != 4)
264				inv = true;
265			else if (t[i] & OP_ACTION_INDEX)
266				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
267			else if (t[i] & OP_ACTION_DATA)
268				ret = add_bits(p, p->data4[b >> 2], 32);
269			else
270				inv = true;
271			break;
272		case OP_AMOUNT_2:
273			if (b != 0 && b != 2 && b != 4 && b != 6)
274				inv = true;
275			if (t[i] & OP_ACTION_INDEX)
276				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
277			else if (t[i] & OP_ACTION_DATA)
278				ret = add_bits(p, p->data2[b >> 1], 16);
279			else
280				inv = true;
281			break;
282		case OP_AMOUNT_0:
283			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
284			break;
285		default:
286			inv = true;
287			break;
288		}
289
290		if (ret)
291			return ret;
292
293		if (inv) {
294			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
295			       c, i, t[0], t[1], t[2], t[3]);
296			return -EINVAL;
297		}
298
299		b += t[i] & OP_AMOUNT;
300	}
301
302	if (b != 8) {
303		pr_err("Invalid template %x len %x : %x %x %x %x\n",
304		       c, b, t[0], t[1], t[2], t[3]);
305		return -EINVAL;
306	}
307
308	if (sw842_template_counts)
309		atomic_inc(&template_count[t[4]]);
310
311	return 0;
312}
313
314static int add_repeat_template(struct sw842_param *p, u8 r)
315{
316	int ret;
317
318	/* repeat param is 0-based */
319	if (!r || --r > REPEAT_BITS_MAX)
320		return -EINVAL;
321
322	ret = add_bits(p, OP_REPEAT, OP_BITS);
323	if (ret)
324		return ret;
325
326	ret = add_bits(p, r, REPEAT_BITS);
327	if (ret)
328		return ret;
329
330	if (sw842_template_counts)
331		atomic_inc(&template_repeat_count);
332
333	return 0;
334}
335
336static int add_short_data_template(struct sw842_param *p, u8 b)
337{
338	int ret, i;
339
340	if (!b || b > SHORT_DATA_BITS_MAX)
341		return -EINVAL;
342
343	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
344	if (ret)
345		return ret;
346
347	ret = add_bits(p, b, SHORT_DATA_BITS);
348	if (ret)
349		return ret;
350
351	for (i = 0; i < b; i++) {
352		ret = add_bits(p, p->in[i], 8);
353		if (ret)
354			return ret;
355	}
356
357	if (sw842_template_counts)
358		atomic_inc(&template_short_data_count);
359
360	return 0;
361}
362
363static int add_zeros_template(struct sw842_param *p)
364{
365	int ret = add_bits(p, OP_ZEROS, OP_BITS);
366
367	if (ret)
368		return ret;
369
370	if (sw842_template_counts)
371		atomic_inc(&template_zeros_count);
372
373	return 0;
374}
375
376static int add_end_template(struct sw842_param *p)
377{
378	int ret = add_bits(p, OP_END, OP_BITS);
379
380	if (ret)
381		return ret;
382
383	if (sw842_template_counts)
384		atomic_inc(&template_end_count);
385
386	return 0;
387}
388
389static bool check_template(struct sw842_param *p, u8 c)
390{
391	u8 *t = comp_ops[c];
392	int i, match, b = 0;
393
394	if (c >= OPS_MAX)
395		return false;
396
397	for (i = 0; i < 4; i++) {
398		if (t[i] & OP_ACTION_INDEX) {
399			if (t[i] & OP_AMOUNT_2)
400				match = check_index(p, 2, b >> 1);
401			else if (t[i] & OP_AMOUNT_4)
402				match = check_index(p, 4, b >> 2);
403			else if (t[i] & OP_AMOUNT_8)
404				match = check_index(p, 8, 0);
405			else
406				return false;
407			if (!match)
408				return false;
409		}
410
411		b += t[i] & OP_AMOUNT;
412	}
413
414	return true;
415}
416
417static void get_next_data(struct sw842_param *p)
418{
419	p->data8[0] = get_input_data(p, 0, 64);
420	p->data4[0] = get_input_data(p, 0, 32);
421	p->data4[1] = get_input_data(p, 4, 32);
422	p->data2[0] = get_input_data(p, 0, 16);
423	p->data2[1] = get_input_data(p, 2, 16);
424	p->data2[2] = get_input_data(p, 4, 16);
425	p->data2[3] = get_input_data(p, 6, 16);
426}
427
428/* update the hashtable entries.
429 * only call this after finding/adding the current template
430 * the dataN fields for the current 8 byte block must be already updated
431 */
432static void update_hashtables(struct sw842_param *p)
433{
434	u64 pos = p->in - p->instart;
435	u64 n8 = (pos >> 3) % (1 << I8_BITS);
436	u64 n4 = (pos >> 2) % (1 << I4_BITS);
437	u64 n2 = (pos >> 1) % (1 << I2_BITS);
438
439	replace_hash(p, 8, n8, 0);
440	replace_hash(p, 4, n4, 0);
441	replace_hash(p, 4, n4, 1);
442	replace_hash(p, 2, n2, 0);
443	replace_hash(p, 2, n2, 1);
444	replace_hash(p, 2, n2, 2);
445	replace_hash(p, 2, n2, 3);
446}
447
448/* find the next template to use, and add it
449 * the p->dataN fields must already be set for the current 8 byte block
450 */
451static int process_next(struct sw842_param *p)
452{
453	int ret, i;
454
455	p->index8[0] = INDEX_NOT_CHECKED;
456	p->index4[0] = INDEX_NOT_CHECKED;
457	p->index4[1] = INDEX_NOT_CHECKED;
458	p->index2[0] = INDEX_NOT_CHECKED;
459	p->index2[1] = INDEX_NOT_CHECKED;
460	p->index2[2] = INDEX_NOT_CHECKED;
461	p->index2[3] = INDEX_NOT_CHECKED;
462
463	/* check up to OPS_MAX - 1; last op is our fallback */
464	for (i = 0; i < OPS_MAX - 1; i++) {
465		if (check_template(p, i))
466			break;
467	}
468
469	ret = add_template(p, i);
470	if (ret)
471		return ret;
472
473	return 0;
474}
475
476/**
477 * sw842_compress
478 *
479 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
480 * @out, using no more than @olen bytes, using the 842 compression format.
481 *
482 * Returns: 0 on success, error on failure.  The @olen parameter
483 * will contain the number of output bytes written on success, or
484 * 0 on error.
485 */
486int sw842_compress(const u8 *in, unsigned int ilen,
487		   u8 *out, unsigned int *olen, void *wmem)
488{
489	struct sw842_param *p = (struct sw842_param *)wmem;
490	int ret;
491	u64 last, next, pad, total;
492	u8 repeat_count = 0;
493	u32 crc;
494
495	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
496
497	init_hashtable_nodes(p, 8);
498	init_hashtable_nodes(p, 4);
499	init_hashtable_nodes(p, 2);
500
501	p->in = (u8 *)in;
502	p->instart = p->in;
503	p->ilen = ilen;
504	p->out = out;
505	p->olen = *olen;
506	p->bit = 0;
507
508	total = p->olen;
509
510	*olen = 0;
511
512	/* if using strict mode, we can only compress a multiple of 8 */
513	if (sw842_strict && (ilen % 8)) {
514		pr_err("Using strict mode, can't compress len %d\n", ilen);
515		return -EINVAL;
516	}
517
518	/* let's compress at least 8 bytes, mkay? */
519	if (unlikely(ilen < 8))
520		goto skip_comp;
521
522	/* make initial 'last' different so we don't match the first time */
523	last = ~get_unaligned((u64 *)p->in);
524
525	while (p->ilen > 7) {
526		next = get_unaligned((u64 *)p->in);
527
528		/* must get the next data, as we need to update the hashtable
529		 * entries with the new data every time
530		 */
531		get_next_data(p);
532
533		/* we don't care about endianness in last or next;
534		 * we're just comparing 8 bytes to another 8 bytes,
535		 * they're both the same endianness
536		 */
537		if (next == last) {
538			/* repeat count bits are 0-based, so we stop at +1 */
539			if (++repeat_count <= REPEAT_BITS_MAX)
540				goto repeat;
541		}
542		if (repeat_count) {
543			ret = add_repeat_template(p, repeat_count);
544			repeat_count = 0;
545			if (next == last) /* reached max repeat bits */
546				goto repeat;
547		}
548
549		if (next == 0)
550			ret = add_zeros_template(p);
551		else
552			ret = process_next(p);
553
554		if (ret)
555			return ret;
556
557repeat:
558		last = next;
559		update_hashtables(p);
560		p->in += 8;
561		p->ilen -= 8;
562	}
563
564	if (repeat_count) {
565		ret = add_repeat_template(p, repeat_count);
566		if (ret)
567			return ret;
568	}
569
570skip_comp:
571	if (p->ilen > 0) {
572		ret = add_short_data_template(p, p->ilen);
573		if (ret)
574			return ret;
575
576		p->in += p->ilen;
577		p->ilen = 0;
578	}
579
580	ret = add_end_template(p);
581	if (ret)
582		return ret;
583
584	/*
585	 * crc(0:31) is appended to target data starting with the next
586	 * bit after End of stream template.
587	 * nx842 calculates CRC for data in big-endian format. So doing
588	 * same here so that sw842 decompression can be used for both
589	 * compressed data.
590	 */
591	crc = crc32_be(0, in, ilen);
592	ret = add_bits(p, crc, CRC_BITS);
593	if (ret)
594		return ret;
595
596	if (p->bit) {
597		p->out++;
598		p->olen--;
599		p->bit = 0;
600	}
601
602	/* pad compressed length to multiple of 8 */
603	pad = (8 - ((total - p->olen) % 8)) % 8;
604	if (pad) {
605		if (pad > p->olen) /* we were so close! */
606			return -ENOSPC;
607		memset(p->out, 0, pad);
608		p->out += pad;
609		p->olen -= pad;
610	}
611
612	if (unlikely((total - p->olen) > UINT_MAX))
613		return -ENOSPC;
614
615	*olen = total - p->olen;
616
617	return 0;
618}
619EXPORT_SYMBOL_GPL(sw842_compress);
620
621static int __init sw842_init(void)
622{
623	if (sw842_template_counts)
624		sw842_debugfs_create();
625
626	return 0;
627}
628module_init(sw842_init);
629
630static void __exit sw842_exit(void)
631{
632	if (sw842_template_counts)
633		sw842_debugfs_remove();
634}
635module_exit(sw842_exit);
636
637MODULE_LICENSE("GPL");
638MODULE_DESCRIPTION("Software 842 Compressor");
639MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");