Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * 842 Software Compression
4 *
5 * Copyright (C) 2015 Dan Streetman, IBM Corp
6 *
7 * See 842.h for details of the 842 compressed format.
8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#define MODULE_NAME "842_compress"
12
13#include <linux/hashtable.h>
14
15#include "842.h"
16#include "842_debugfs.h"
17
18#define SW842_HASHTABLE8_BITS (10)
19#define SW842_HASHTABLE4_BITS (11)
20#define SW842_HASHTABLE2_BITS (10)
21
22/* By default, we allow compressing input buffers of any length, but we must
23 * use the non-standard "short data" template so the decompressor can correctly
24 * reproduce the uncompressed data buffer at the right length. However the
25 * hardware 842 compressor will not recognize the "short data" template, and
26 * will fail to decompress any compressed buffer containing it (I have no idea
27 * why anyone would want to use software to compress and hardware to decompress
28 * but that's beside the point). This parameter forces the compression
29 * function to simply reject any input buffer that isn't a multiple of 8 bytes
30 * long, instead of using the "short data" template, so that all compressed
31 * buffers produced by this function will be decompressable by the 842 hardware
32 * decompressor. Unless you have a specific need for that, leave this disabled
33 * so that any length buffer can be compressed.
34 */
35static bool sw842_strict;
36module_param_named(strict, sw842_strict, bool, 0644);
37
38static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
39 { I8, N0, N0, N0, 0x19 }, /* 8 */
40 { I4, I4, N0, N0, 0x18 }, /* 18 */
41 { I4, I2, I2, N0, 0x17 }, /* 25 */
42 { I2, I2, I4, N0, 0x13 }, /* 25 */
43 { I2, I2, I2, I2, 0x12 }, /* 32 */
44 { I4, I2, D2, N0, 0x16 }, /* 33 */
45 { I4, D2, I2, N0, 0x15 }, /* 33 */
46 { I2, D2, I4, N0, 0x0e }, /* 33 */
47 { D2, I2, I4, N0, 0x09 }, /* 33 */
48 { I2, I2, I2, D2, 0x11 }, /* 40 */
49 { I2, I2, D2, I2, 0x10 }, /* 40 */
50 { I2, D2, I2, I2, 0x0d }, /* 40 */
51 { D2, I2, I2, I2, 0x08 }, /* 40 */
52 { I4, D4, N0, N0, 0x14 }, /* 41 */
53 { D4, I4, N0, N0, 0x04 }, /* 41 */
54 { I2, I2, D4, N0, 0x0f }, /* 48 */
55 { I2, D2, I2, D2, 0x0c }, /* 48 */
56 { I2, D4, I2, N0, 0x0b }, /* 48 */
57 { D2, I2, I2, D2, 0x07 }, /* 48 */
58 { D2, I2, D2, I2, 0x06 }, /* 48 */
59 { D4, I2, I2, N0, 0x03 }, /* 48 */
60 { I2, D2, D4, N0, 0x0a }, /* 56 */
61 { D2, I2, D4, N0, 0x05 }, /* 56 */
62 { D4, I2, D2, N0, 0x02 }, /* 56 */
63 { D4, D2, I2, N0, 0x01 }, /* 56 */
64 { D8, N0, N0, N0, 0x00 }, /* 64 */
65};
66
67struct sw842_hlist_node8 {
68 struct hlist_node node;
69 u64 data;
70 u8 index;
71};
72
73struct sw842_hlist_node4 {
74 struct hlist_node node;
75 u32 data;
76 u16 index;
77};
78
79struct sw842_hlist_node2 {
80 struct hlist_node node;
81 u16 data;
82 u8 index;
83};
84
85#define INDEX_NOT_FOUND (-1)
86#define INDEX_NOT_CHECKED (-2)
87
88struct sw842_param {
89 u8 *in;
90 u8 *instart;
91 u64 ilen;
92 u8 *out;
93 u64 olen;
94 u8 bit;
95 u64 data8[1];
96 u32 data4[2];
97 u16 data2[4];
98 int index8[1];
99 int index4[2];
100 int index2[4];
101 DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
102 DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
103 DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
104 struct sw842_hlist_node8 node8[1 << I8_BITS];
105 struct sw842_hlist_node4 node4[1 << I4_BITS];
106 struct sw842_hlist_node2 node2[1 << I2_BITS];
107};
108
109#define get_input_data(p, o, b) \
110 be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
111
112#define init_hashtable_nodes(p, b) do { \
113 int _i; \
114 hash_init((p)->htable##b); \
115 for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
116 (p)->node##b[_i].index = _i; \
117 (p)->node##b[_i].data = 0; \
118 INIT_HLIST_NODE(&(p)->node##b[_i].node); \
119 } \
120} while (0)
121
122#define find_index(p, b, n) ({ \
123 struct sw842_hlist_node##b *_n; \
124 p->index##b[n] = INDEX_NOT_FOUND; \
125 hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
126 if (p->data##b[n] == _n->data) { \
127 p->index##b[n] = _n->index; \
128 break; \
129 } \
130 } \
131 p->index##b[n] >= 0; \
132})
133
134#define check_index(p, b, n) \
135 ((p)->index##b[n] == INDEX_NOT_CHECKED \
136 ? find_index(p, b, n) \
137 : (p)->index##b[n] >= 0)
138
139#define replace_hash(p, b, i, d) do { \
140 struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \
141 hash_del(&_n->node); \
142 _n->data = (p)->data##b[d]; \
143 pr_debug("add hash index%x %x pos %x data %lx\n", b, \
144 (unsigned int)_n->index, \
145 (unsigned int)((p)->in - (p)->instart), \
146 (unsigned long)_n->data); \
147 hash_add((p)->htable##b, &_n->node, _n->data); \
148} while (0)
149
150static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
151
152static int add_bits(struct sw842_param *p, u64 d, u8 n);
153
154static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
155{
156 int ret;
157
158 if (n <= s)
159 return -EINVAL;
160
161 ret = add_bits(p, d >> s, n - s);
162 if (ret)
163 return ret;
164 return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
165}
166
167static int add_bits(struct sw842_param *p, u64 d, u8 n)
168{
169 int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
170 u64 o;
171 u8 *out = p->out;
172
173 pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
174
175 if (n > 64)
176 return -EINVAL;
177
178 /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
179 * or if we're at the end of the output buffer and would write past end
180 */
181 if (bits > 64)
182 return __split_add_bits(p, d, n, 32);
183 else if (p->olen < 8 && bits > 32 && bits <= 56)
184 return __split_add_bits(p, d, n, 16);
185 else if (p->olen < 4 && bits > 16 && bits <= 24)
186 return __split_add_bits(p, d, n, 8);
187
188 if (DIV_ROUND_UP(bits, 8) > p->olen)
189 return -ENOSPC;
190
191 o = *out & bmask[b];
192 d <<= s;
193
194 if (bits <= 8)
195 *out = o | d;
196 else if (bits <= 16)
197 put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
198 else if (bits <= 24)
199 put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
200 else if (bits <= 32)
201 put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
202 else if (bits <= 40)
203 put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
204 else if (bits <= 48)
205 put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
206 else if (bits <= 56)
207 put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
208 else
209 put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
210
211 p->bit += n;
212
213 if (p->bit > 7) {
214 p->out += p->bit / 8;
215 p->olen -= p->bit / 8;
216 p->bit %= 8;
217 }
218
219 return 0;
220}
221
222static int add_template(struct sw842_param *p, u8 c)
223{
224 int ret, i, b = 0;
225 u8 *t = comp_ops[c];
226 bool inv = false;
227
228 if (c >= OPS_MAX)
229 return -EINVAL;
230
231 pr_debug("template %x\n", t[4]);
232
233 ret = add_bits(p, t[4], OP_BITS);
234 if (ret)
235 return ret;
236
237 for (i = 0; i < 4; i++) {
238 pr_debug("op %x\n", t[i]);
239
240 switch (t[i] & OP_AMOUNT) {
241 case OP_AMOUNT_8:
242 if (b)
243 inv = true;
244 else if (t[i] & OP_ACTION_INDEX)
245 ret = add_bits(p, p->index8[0], I8_BITS);
246 else if (t[i] & OP_ACTION_DATA)
247 ret = add_bits(p, p->data8[0], 64);
248 else
249 inv = true;
250 break;
251 case OP_AMOUNT_4:
252 if (b == 2 && t[i] & OP_ACTION_DATA)
253 ret = add_bits(p, get_input_data(p, 2, 32), 32);
254 else if (b != 0 && b != 4)
255 inv = true;
256 else if (t[i] & OP_ACTION_INDEX)
257 ret = add_bits(p, p->index4[b >> 2], I4_BITS);
258 else if (t[i] & OP_ACTION_DATA)
259 ret = add_bits(p, p->data4[b >> 2], 32);
260 else
261 inv = true;
262 break;
263 case OP_AMOUNT_2:
264 if (b != 0 && b != 2 && b != 4 && b != 6)
265 inv = true;
266 if (t[i] & OP_ACTION_INDEX)
267 ret = add_bits(p, p->index2[b >> 1], I2_BITS);
268 else if (t[i] & OP_ACTION_DATA)
269 ret = add_bits(p, p->data2[b >> 1], 16);
270 else
271 inv = true;
272 break;
273 case OP_AMOUNT_0:
274 inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
275 break;
276 default:
277 inv = true;
278 break;
279 }
280
281 if (ret)
282 return ret;
283
284 if (inv) {
285 pr_err("Invalid templ %x op %d : %x %x %x %x\n",
286 c, i, t[0], t[1], t[2], t[3]);
287 return -EINVAL;
288 }
289
290 b += t[i] & OP_AMOUNT;
291 }
292
293 if (b != 8) {
294 pr_err("Invalid template %x len %x : %x %x %x %x\n",
295 c, b, t[0], t[1], t[2], t[3]);
296 return -EINVAL;
297 }
298
299 if (sw842_template_counts)
300 atomic_inc(&template_count[t[4]]);
301
302 return 0;
303}
304
305static int add_repeat_template(struct sw842_param *p, u8 r)
306{
307 int ret;
308
309 /* repeat param is 0-based */
310 if (!r || --r > REPEAT_BITS_MAX)
311 return -EINVAL;
312
313 ret = add_bits(p, OP_REPEAT, OP_BITS);
314 if (ret)
315 return ret;
316
317 ret = add_bits(p, r, REPEAT_BITS);
318 if (ret)
319 return ret;
320
321 if (sw842_template_counts)
322 atomic_inc(&template_repeat_count);
323
324 return 0;
325}
326
327static int add_short_data_template(struct sw842_param *p, u8 b)
328{
329 int ret, i;
330
331 if (!b || b > SHORT_DATA_BITS_MAX)
332 return -EINVAL;
333
334 ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
335 if (ret)
336 return ret;
337
338 ret = add_bits(p, b, SHORT_DATA_BITS);
339 if (ret)
340 return ret;
341
342 for (i = 0; i < b; i++) {
343 ret = add_bits(p, p->in[i], 8);
344 if (ret)
345 return ret;
346 }
347
348 if (sw842_template_counts)
349 atomic_inc(&template_short_data_count);
350
351 return 0;
352}
353
354static int add_zeros_template(struct sw842_param *p)
355{
356 int ret = add_bits(p, OP_ZEROS, OP_BITS);
357
358 if (ret)
359 return ret;
360
361 if (sw842_template_counts)
362 atomic_inc(&template_zeros_count);
363
364 return 0;
365}
366
367static int add_end_template(struct sw842_param *p)
368{
369 int ret = add_bits(p, OP_END, OP_BITS);
370
371 if (ret)
372 return ret;
373
374 if (sw842_template_counts)
375 atomic_inc(&template_end_count);
376
377 return 0;
378}
379
380static bool check_template(struct sw842_param *p, u8 c)
381{
382 u8 *t = comp_ops[c];
383 int i, match, b = 0;
384
385 if (c >= OPS_MAX)
386 return false;
387
388 for (i = 0; i < 4; i++) {
389 if (t[i] & OP_ACTION_INDEX) {
390 if (t[i] & OP_AMOUNT_2)
391 match = check_index(p, 2, b >> 1);
392 else if (t[i] & OP_AMOUNT_4)
393 match = check_index(p, 4, b >> 2);
394 else if (t[i] & OP_AMOUNT_8)
395 match = check_index(p, 8, 0);
396 else
397 return false;
398 if (!match)
399 return false;
400 }
401
402 b += t[i] & OP_AMOUNT;
403 }
404
405 return true;
406}
407
408static void get_next_data(struct sw842_param *p)
409{
410 p->data8[0] = get_input_data(p, 0, 64);
411 p->data4[0] = get_input_data(p, 0, 32);
412 p->data4[1] = get_input_data(p, 4, 32);
413 p->data2[0] = get_input_data(p, 0, 16);
414 p->data2[1] = get_input_data(p, 2, 16);
415 p->data2[2] = get_input_data(p, 4, 16);
416 p->data2[3] = get_input_data(p, 6, 16);
417}
418
419/* update the hashtable entries.
420 * only call this after finding/adding the current template
421 * the dataN fields for the current 8 byte block must be already updated
422 */
423static void update_hashtables(struct sw842_param *p)
424{
425 u64 pos = p->in - p->instart;
426 u64 n8 = (pos >> 3) % (1 << I8_BITS);
427 u64 n4 = (pos >> 2) % (1 << I4_BITS);
428 u64 n2 = (pos >> 1) % (1 << I2_BITS);
429
430 replace_hash(p, 8, n8, 0);
431 replace_hash(p, 4, n4, 0);
432 replace_hash(p, 4, n4, 1);
433 replace_hash(p, 2, n2, 0);
434 replace_hash(p, 2, n2, 1);
435 replace_hash(p, 2, n2, 2);
436 replace_hash(p, 2, n2, 3);
437}
438
439/* find the next template to use, and add it
440 * the p->dataN fields must already be set for the current 8 byte block
441 */
442static int process_next(struct sw842_param *p)
443{
444 int ret, i;
445
446 p->index8[0] = INDEX_NOT_CHECKED;
447 p->index4[0] = INDEX_NOT_CHECKED;
448 p->index4[1] = INDEX_NOT_CHECKED;
449 p->index2[0] = INDEX_NOT_CHECKED;
450 p->index2[1] = INDEX_NOT_CHECKED;
451 p->index2[2] = INDEX_NOT_CHECKED;
452 p->index2[3] = INDEX_NOT_CHECKED;
453
454 /* check up to OPS_MAX - 1; last op is our fallback */
455 for (i = 0; i < OPS_MAX - 1; i++) {
456 if (check_template(p, i))
457 break;
458 }
459
460 ret = add_template(p, i);
461 if (ret)
462 return ret;
463
464 return 0;
465}
466
467/**
468 * sw842_compress
469 *
470 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
471 * @out, using no more than @olen bytes, using the 842 compression format.
472 *
473 * Returns: 0 on success, error on failure. The @olen parameter
474 * will contain the number of output bytes written on success, or
475 * 0 on error.
476 */
477int sw842_compress(const u8 *in, unsigned int ilen,
478 u8 *out, unsigned int *olen, void *wmem)
479{
480 struct sw842_param *p = (struct sw842_param *)wmem;
481 int ret;
482 u64 last, next, pad, total;
483 u8 repeat_count = 0;
484 u32 crc;
485
486 BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
487
488 init_hashtable_nodes(p, 8);
489 init_hashtable_nodes(p, 4);
490 init_hashtable_nodes(p, 2);
491
492 p->in = (u8 *)in;
493 p->instart = p->in;
494 p->ilen = ilen;
495 p->out = out;
496 p->olen = *olen;
497 p->bit = 0;
498
499 total = p->olen;
500
501 *olen = 0;
502
503 /* if using strict mode, we can only compress a multiple of 8 */
504 if (sw842_strict && (ilen % 8)) {
505 pr_err("Using strict mode, can't compress len %d\n", ilen);
506 return -EINVAL;
507 }
508
509 /* let's compress at least 8 bytes, mkay? */
510 if (unlikely(ilen < 8))
511 goto skip_comp;
512
513 /* make initial 'last' different so we don't match the first time */
514 last = ~get_unaligned((u64 *)p->in);
515
516 while (p->ilen > 7) {
517 next = get_unaligned((u64 *)p->in);
518
519 /* must get the next data, as we need to update the hashtable
520 * entries with the new data every time
521 */
522 get_next_data(p);
523
524 /* we don't care about endianness in last or next;
525 * we're just comparing 8 bytes to another 8 bytes,
526 * they're both the same endianness
527 */
528 if (next == last) {
529 /* repeat count bits are 0-based, so we stop at +1 */
530 if (++repeat_count <= REPEAT_BITS_MAX)
531 goto repeat;
532 }
533 if (repeat_count) {
534 ret = add_repeat_template(p, repeat_count);
535 repeat_count = 0;
536 if (next == last) /* reached max repeat bits */
537 goto repeat;
538 }
539
540 if (next == 0)
541 ret = add_zeros_template(p);
542 else
543 ret = process_next(p);
544
545 if (ret)
546 return ret;
547
548repeat:
549 last = next;
550 update_hashtables(p);
551 p->in += 8;
552 p->ilen -= 8;
553 }
554
555 if (repeat_count) {
556 ret = add_repeat_template(p, repeat_count);
557 if (ret)
558 return ret;
559 }
560
561skip_comp:
562 if (p->ilen > 0) {
563 ret = add_short_data_template(p, p->ilen);
564 if (ret)
565 return ret;
566
567 p->in += p->ilen;
568 p->ilen = 0;
569 }
570
571 ret = add_end_template(p);
572 if (ret)
573 return ret;
574
575 /*
576 * crc(0:31) is appended to target data starting with the next
577 * bit after End of stream template.
578 * nx842 calculates CRC for data in big-endian format. So doing
579 * same here so that sw842 decompression can be used for both
580 * compressed data.
581 */
582 crc = crc32_be(0, in, ilen);
583 ret = add_bits(p, crc, CRC_BITS);
584 if (ret)
585 return ret;
586
587 if (p->bit) {
588 p->out++;
589 p->olen--;
590 p->bit = 0;
591 }
592
593 /* pad compressed length to multiple of 8 */
594 pad = (8 - ((total - p->olen) % 8)) % 8;
595 if (pad) {
596 if (pad > p->olen) /* we were so close! */
597 return -ENOSPC;
598 memset(p->out, 0, pad);
599 p->out += pad;
600 p->olen -= pad;
601 }
602
603 if (unlikely((total - p->olen) > UINT_MAX))
604 return -ENOSPC;
605
606 *olen = total - p->olen;
607
608 return 0;
609}
610EXPORT_SYMBOL_GPL(sw842_compress);
611
612static int __init sw842_init(void)
613{
614 if (sw842_template_counts)
615 sw842_debugfs_create();
616
617 return 0;
618}
619module_init(sw842_init);
620
621static void __exit sw842_exit(void)
622{
623 if (sw842_template_counts)
624 sw842_debugfs_remove();
625}
626module_exit(sw842_exit);
627
628MODULE_LICENSE("GPL");
629MODULE_DESCRIPTION("Software 842 Compressor");
630MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
1/*
2 * 842 Software Compression
3 *
4 * Copyright (C) 2015 Dan Streetman, IBM Corp
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * See 842.h for details of the 842 compressed format.
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20#define MODULE_NAME "842_compress"
21
22#include <linux/hashtable.h>
23
24#include "842.h"
25#include "842_debugfs.h"
26
27#define SW842_HASHTABLE8_BITS (10)
28#define SW842_HASHTABLE4_BITS (11)
29#define SW842_HASHTABLE2_BITS (10)
30
31/* By default, we allow compressing input buffers of any length, but we must
32 * use the non-standard "short data" template so the decompressor can correctly
33 * reproduce the uncompressed data buffer at the right length. However the
34 * hardware 842 compressor will not recognize the "short data" template, and
35 * will fail to decompress any compressed buffer containing it (I have no idea
36 * why anyone would want to use software to compress and hardware to decompress
37 * but that's beside the point). This parameter forces the compression
38 * function to simply reject any input buffer that isn't a multiple of 8 bytes
39 * long, instead of using the "short data" template, so that all compressed
40 * buffers produced by this function will be decompressable by the 842 hardware
41 * decompressor. Unless you have a specific need for that, leave this disabled
42 * so that any length buffer can be compressed.
43 */
44static bool sw842_strict;
45module_param_named(strict, sw842_strict, bool, 0644);
46
47static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
48 { I8, N0, N0, N0, 0x19 }, /* 8 */
49 { I4, I4, N0, N0, 0x18 }, /* 18 */
50 { I4, I2, I2, N0, 0x17 }, /* 25 */
51 { I2, I2, I4, N0, 0x13 }, /* 25 */
52 { I2, I2, I2, I2, 0x12 }, /* 32 */
53 { I4, I2, D2, N0, 0x16 }, /* 33 */
54 { I4, D2, I2, N0, 0x15 }, /* 33 */
55 { I2, D2, I4, N0, 0x0e }, /* 33 */
56 { D2, I2, I4, N0, 0x09 }, /* 33 */
57 { I2, I2, I2, D2, 0x11 }, /* 40 */
58 { I2, I2, D2, I2, 0x10 }, /* 40 */
59 { I2, D2, I2, I2, 0x0d }, /* 40 */
60 { D2, I2, I2, I2, 0x08 }, /* 40 */
61 { I4, D4, N0, N0, 0x14 }, /* 41 */
62 { D4, I4, N0, N0, 0x04 }, /* 41 */
63 { I2, I2, D4, N0, 0x0f }, /* 48 */
64 { I2, D2, I2, D2, 0x0c }, /* 48 */
65 { I2, D4, I2, N0, 0x0b }, /* 48 */
66 { D2, I2, I2, D2, 0x07 }, /* 48 */
67 { D2, I2, D2, I2, 0x06 }, /* 48 */
68 { D4, I2, I2, N0, 0x03 }, /* 48 */
69 { I2, D2, D4, N0, 0x0a }, /* 56 */
70 { D2, I2, D4, N0, 0x05 }, /* 56 */
71 { D4, I2, D2, N0, 0x02 }, /* 56 */
72 { D4, D2, I2, N0, 0x01 }, /* 56 */
73 { D8, N0, N0, N0, 0x00 }, /* 64 */
74};
75
76struct sw842_hlist_node8 {
77 struct hlist_node node;
78 u64 data;
79 u8 index;
80};
81
82struct sw842_hlist_node4 {
83 struct hlist_node node;
84 u32 data;
85 u16 index;
86};
87
88struct sw842_hlist_node2 {
89 struct hlist_node node;
90 u16 data;
91 u8 index;
92};
93
94#define INDEX_NOT_FOUND (-1)
95#define INDEX_NOT_CHECKED (-2)
96
97struct sw842_param {
98 u8 *in;
99 u8 *instart;
100 u64 ilen;
101 u8 *out;
102 u64 olen;
103 u8 bit;
104 u64 data8[1];
105 u32 data4[2];
106 u16 data2[4];
107 int index8[1];
108 int index4[2];
109 int index2[4];
110 DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
111 DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
112 DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
113 struct sw842_hlist_node8 node8[1 << I8_BITS];
114 struct sw842_hlist_node4 node4[1 << I4_BITS];
115 struct sw842_hlist_node2 node2[1 << I2_BITS];
116};
117
118#define get_input_data(p, o, b) \
119 be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
120
121#define init_hashtable_nodes(p, b) do { \
122 int _i; \
123 hash_init((p)->htable##b); \
124 for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
125 (p)->node##b[_i].index = _i; \
126 (p)->node##b[_i].data = 0; \
127 INIT_HLIST_NODE(&(p)->node##b[_i].node); \
128 } \
129} while (0)
130
131#define find_index(p, b, n) ({ \
132 struct sw842_hlist_node##b *_n; \
133 p->index##b[n] = INDEX_NOT_FOUND; \
134 hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
135 if (p->data##b[n] == _n->data) { \
136 p->index##b[n] = _n->index; \
137 break; \
138 } \
139 } \
140 p->index##b[n] >= 0; \
141})
142
143#define check_index(p, b, n) \
144 ((p)->index##b[n] == INDEX_NOT_CHECKED \
145 ? find_index(p, b, n) \
146 : (p)->index##b[n] >= 0)
147
148#define replace_hash(p, b, i, d) do { \
149 struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \
150 hash_del(&_n->node); \
151 _n->data = (p)->data##b[d]; \
152 pr_debug("add hash index%x %x pos %x data %lx\n", b, \
153 (unsigned int)_n->index, \
154 (unsigned int)((p)->in - (p)->instart), \
155 (unsigned long)_n->data); \
156 hash_add((p)->htable##b, &_n->node, _n->data); \
157} while (0)
158
159static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
160
161static int add_bits(struct sw842_param *p, u64 d, u8 n);
162
163static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
164{
165 int ret;
166
167 if (n <= s)
168 return -EINVAL;
169
170 ret = add_bits(p, d >> s, n - s);
171 if (ret)
172 return ret;
173 return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
174}
175
176static int add_bits(struct sw842_param *p, u64 d, u8 n)
177{
178 int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
179 u64 o;
180 u8 *out = p->out;
181
182 pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
183
184 if (n > 64)
185 return -EINVAL;
186
187 /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
188 * or if we're at the end of the output buffer and would write past end
189 */
190 if (bits > 64)
191 return __split_add_bits(p, d, n, 32);
192 else if (p->olen < 8 && bits > 32 && bits <= 56)
193 return __split_add_bits(p, d, n, 16);
194 else if (p->olen < 4 && bits > 16 && bits <= 24)
195 return __split_add_bits(p, d, n, 8);
196
197 if (DIV_ROUND_UP(bits, 8) > p->olen)
198 return -ENOSPC;
199
200 o = *out & bmask[b];
201 d <<= s;
202
203 if (bits <= 8)
204 *out = o | d;
205 else if (bits <= 16)
206 put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
207 else if (bits <= 24)
208 put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
209 else if (bits <= 32)
210 put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
211 else if (bits <= 40)
212 put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
213 else if (bits <= 48)
214 put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
215 else if (bits <= 56)
216 put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
217 else
218 put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
219
220 p->bit += n;
221
222 if (p->bit > 7) {
223 p->out += p->bit / 8;
224 p->olen -= p->bit / 8;
225 p->bit %= 8;
226 }
227
228 return 0;
229}
230
231static int add_template(struct sw842_param *p, u8 c)
232{
233 int ret, i, b = 0;
234 u8 *t = comp_ops[c];
235 bool inv = false;
236
237 if (c >= OPS_MAX)
238 return -EINVAL;
239
240 pr_debug("template %x\n", t[4]);
241
242 ret = add_bits(p, t[4], OP_BITS);
243 if (ret)
244 return ret;
245
246 for (i = 0; i < 4; i++) {
247 pr_debug("op %x\n", t[i]);
248
249 switch (t[i] & OP_AMOUNT) {
250 case OP_AMOUNT_8:
251 if (b)
252 inv = true;
253 else if (t[i] & OP_ACTION_INDEX)
254 ret = add_bits(p, p->index8[0], I8_BITS);
255 else if (t[i] & OP_ACTION_DATA)
256 ret = add_bits(p, p->data8[0], 64);
257 else
258 inv = true;
259 break;
260 case OP_AMOUNT_4:
261 if (b == 2 && t[i] & OP_ACTION_DATA)
262 ret = add_bits(p, get_input_data(p, 2, 32), 32);
263 else if (b != 0 && b != 4)
264 inv = true;
265 else if (t[i] & OP_ACTION_INDEX)
266 ret = add_bits(p, p->index4[b >> 2], I4_BITS);
267 else if (t[i] & OP_ACTION_DATA)
268 ret = add_bits(p, p->data4[b >> 2], 32);
269 else
270 inv = true;
271 break;
272 case OP_AMOUNT_2:
273 if (b != 0 && b != 2 && b != 4 && b != 6)
274 inv = true;
275 if (t[i] & OP_ACTION_INDEX)
276 ret = add_bits(p, p->index2[b >> 1], I2_BITS);
277 else if (t[i] & OP_ACTION_DATA)
278 ret = add_bits(p, p->data2[b >> 1], 16);
279 else
280 inv = true;
281 break;
282 case OP_AMOUNT_0:
283 inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
284 break;
285 default:
286 inv = true;
287 break;
288 }
289
290 if (ret)
291 return ret;
292
293 if (inv) {
294 pr_err("Invalid templ %x op %d : %x %x %x %x\n",
295 c, i, t[0], t[1], t[2], t[3]);
296 return -EINVAL;
297 }
298
299 b += t[i] & OP_AMOUNT;
300 }
301
302 if (b != 8) {
303 pr_err("Invalid template %x len %x : %x %x %x %x\n",
304 c, b, t[0], t[1], t[2], t[3]);
305 return -EINVAL;
306 }
307
308 if (sw842_template_counts)
309 atomic_inc(&template_count[t[4]]);
310
311 return 0;
312}
313
314static int add_repeat_template(struct sw842_param *p, u8 r)
315{
316 int ret;
317
318 /* repeat param is 0-based */
319 if (!r || --r > REPEAT_BITS_MAX)
320 return -EINVAL;
321
322 ret = add_bits(p, OP_REPEAT, OP_BITS);
323 if (ret)
324 return ret;
325
326 ret = add_bits(p, r, REPEAT_BITS);
327 if (ret)
328 return ret;
329
330 if (sw842_template_counts)
331 atomic_inc(&template_repeat_count);
332
333 return 0;
334}
335
336static int add_short_data_template(struct sw842_param *p, u8 b)
337{
338 int ret, i;
339
340 if (!b || b > SHORT_DATA_BITS_MAX)
341 return -EINVAL;
342
343 ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
344 if (ret)
345 return ret;
346
347 ret = add_bits(p, b, SHORT_DATA_BITS);
348 if (ret)
349 return ret;
350
351 for (i = 0; i < b; i++) {
352 ret = add_bits(p, p->in[i], 8);
353 if (ret)
354 return ret;
355 }
356
357 if (sw842_template_counts)
358 atomic_inc(&template_short_data_count);
359
360 return 0;
361}
362
363static int add_zeros_template(struct sw842_param *p)
364{
365 int ret = add_bits(p, OP_ZEROS, OP_BITS);
366
367 if (ret)
368 return ret;
369
370 if (sw842_template_counts)
371 atomic_inc(&template_zeros_count);
372
373 return 0;
374}
375
376static int add_end_template(struct sw842_param *p)
377{
378 int ret = add_bits(p, OP_END, OP_BITS);
379
380 if (ret)
381 return ret;
382
383 if (sw842_template_counts)
384 atomic_inc(&template_end_count);
385
386 return 0;
387}
388
389static bool check_template(struct sw842_param *p, u8 c)
390{
391 u8 *t = comp_ops[c];
392 int i, match, b = 0;
393
394 if (c >= OPS_MAX)
395 return false;
396
397 for (i = 0; i < 4; i++) {
398 if (t[i] & OP_ACTION_INDEX) {
399 if (t[i] & OP_AMOUNT_2)
400 match = check_index(p, 2, b >> 1);
401 else if (t[i] & OP_AMOUNT_4)
402 match = check_index(p, 4, b >> 2);
403 else if (t[i] & OP_AMOUNT_8)
404 match = check_index(p, 8, 0);
405 else
406 return false;
407 if (!match)
408 return false;
409 }
410
411 b += t[i] & OP_AMOUNT;
412 }
413
414 return true;
415}
416
417static void get_next_data(struct sw842_param *p)
418{
419 p->data8[0] = get_input_data(p, 0, 64);
420 p->data4[0] = get_input_data(p, 0, 32);
421 p->data4[1] = get_input_data(p, 4, 32);
422 p->data2[0] = get_input_data(p, 0, 16);
423 p->data2[1] = get_input_data(p, 2, 16);
424 p->data2[2] = get_input_data(p, 4, 16);
425 p->data2[3] = get_input_data(p, 6, 16);
426}
427
428/* update the hashtable entries.
429 * only call this after finding/adding the current template
430 * the dataN fields for the current 8 byte block must be already updated
431 */
432static void update_hashtables(struct sw842_param *p)
433{
434 u64 pos = p->in - p->instart;
435 u64 n8 = (pos >> 3) % (1 << I8_BITS);
436 u64 n4 = (pos >> 2) % (1 << I4_BITS);
437 u64 n2 = (pos >> 1) % (1 << I2_BITS);
438
439 replace_hash(p, 8, n8, 0);
440 replace_hash(p, 4, n4, 0);
441 replace_hash(p, 4, n4, 1);
442 replace_hash(p, 2, n2, 0);
443 replace_hash(p, 2, n2, 1);
444 replace_hash(p, 2, n2, 2);
445 replace_hash(p, 2, n2, 3);
446}
447
448/* find the next template to use, and add it
449 * the p->dataN fields must already be set for the current 8 byte block
450 */
451static int process_next(struct sw842_param *p)
452{
453 int ret, i;
454
455 p->index8[0] = INDEX_NOT_CHECKED;
456 p->index4[0] = INDEX_NOT_CHECKED;
457 p->index4[1] = INDEX_NOT_CHECKED;
458 p->index2[0] = INDEX_NOT_CHECKED;
459 p->index2[1] = INDEX_NOT_CHECKED;
460 p->index2[2] = INDEX_NOT_CHECKED;
461 p->index2[3] = INDEX_NOT_CHECKED;
462
463 /* check up to OPS_MAX - 1; last op is our fallback */
464 for (i = 0; i < OPS_MAX - 1; i++) {
465 if (check_template(p, i))
466 break;
467 }
468
469 ret = add_template(p, i);
470 if (ret)
471 return ret;
472
473 return 0;
474}
475
476/**
477 * sw842_compress
478 *
479 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
480 * @out, using no more than @olen bytes, using the 842 compression format.
481 *
482 * Returns: 0 on success, error on failure. The @olen parameter
483 * will contain the number of output bytes written on success, or
484 * 0 on error.
485 */
486int sw842_compress(const u8 *in, unsigned int ilen,
487 u8 *out, unsigned int *olen, void *wmem)
488{
489 struct sw842_param *p = (struct sw842_param *)wmem;
490 int ret;
491 u64 last, next, pad, total;
492 u8 repeat_count = 0;
493 u32 crc;
494
495 BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
496
497 init_hashtable_nodes(p, 8);
498 init_hashtable_nodes(p, 4);
499 init_hashtable_nodes(p, 2);
500
501 p->in = (u8 *)in;
502 p->instart = p->in;
503 p->ilen = ilen;
504 p->out = out;
505 p->olen = *olen;
506 p->bit = 0;
507
508 total = p->olen;
509
510 *olen = 0;
511
512 /* if using strict mode, we can only compress a multiple of 8 */
513 if (sw842_strict && (ilen % 8)) {
514 pr_err("Using strict mode, can't compress len %d\n", ilen);
515 return -EINVAL;
516 }
517
518 /* let's compress at least 8 bytes, mkay? */
519 if (unlikely(ilen < 8))
520 goto skip_comp;
521
522 /* make initial 'last' different so we don't match the first time */
523 last = ~get_unaligned((u64 *)p->in);
524
525 while (p->ilen > 7) {
526 next = get_unaligned((u64 *)p->in);
527
528 /* must get the next data, as we need to update the hashtable
529 * entries with the new data every time
530 */
531 get_next_data(p);
532
533 /* we don't care about endianness in last or next;
534 * we're just comparing 8 bytes to another 8 bytes,
535 * they're both the same endianness
536 */
537 if (next == last) {
538 /* repeat count bits are 0-based, so we stop at +1 */
539 if (++repeat_count <= REPEAT_BITS_MAX)
540 goto repeat;
541 }
542 if (repeat_count) {
543 ret = add_repeat_template(p, repeat_count);
544 repeat_count = 0;
545 if (next == last) /* reached max repeat bits */
546 goto repeat;
547 }
548
549 if (next == 0)
550 ret = add_zeros_template(p);
551 else
552 ret = process_next(p);
553
554 if (ret)
555 return ret;
556
557repeat:
558 last = next;
559 update_hashtables(p);
560 p->in += 8;
561 p->ilen -= 8;
562 }
563
564 if (repeat_count) {
565 ret = add_repeat_template(p, repeat_count);
566 if (ret)
567 return ret;
568 }
569
570skip_comp:
571 if (p->ilen > 0) {
572 ret = add_short_data_template(p, p->ilen);
573 if (ret)
574 return ret;
575
576 p->in += p->ilen;
577 p->ilen = 0;
578 }
579
580 ret = add_end_template(p);
581 if (ret)
582 return ret;
583
584 /*
585 * crc(0:31) is appended to target data starting with the next
586 * bit after End of stream template.
587 * nx842 calculates CRC for data in big-endian format. So doing
588 * same here so that sw842 decompression can be used for both
589 * compressed data.
590 */
591 crc = crc32_be(0, in, ilen);
592 ret = add_bits(p, crc, CRC_BITS);
593 if (ret)
594 return ret;
595
596 if (p->bit) {
597 p->out++;
598 p->olen--;
599 p->bit = 0;
600 }
601
602 /* pad compressed length to multiple of 8 */
603 pad = (8 - ((total - p->olen) % 8)) % 8;
604 if (pad) {
605 if (pad > p->olen) /* we were so close! */
606 return -ENOSPC;
607 memset(p->out, 0, pad);
608 p->out += pad;
609 p->olen -= pad;
610 }
611
612 if (unlikely((total - p->olen) > UINT_MAX))
613 return -ENOSPC;
614
615 *olen = total - p->olen;
616
617 return 0;
618}
619EXPORT_SYMBOL_GPL(sw842_compress);
620
621static int __init sw842_init(void)
622{
623 if (sw842_template_counts)
624 sw842_debugfs_create();
625
626 return 0;
627}
628module_init(sw842_init);
629
630static void __exit sw842_exit(void)
631{
632 if (sw842_template_counts)
633 sw842_debugfs_remove();
634}
635module_exit(sw842_exit);
636
637MODULE_LICENSE("GPL");
638MODULE_DESCRIPTION("Software 842 Compressor");
639MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");