Linux Audio

Check our new training course

Loading...
v6.13.7
  1/*
  2 * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
  3 *
  4 * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
  5 * downloaded from:
  6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
  7 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
  8 *
  9 * Copyright (C) 2012 Intel Corporation.
 10 * Copyright 2024 Google LLC
 11 *
 12 * Authors:
 13 *	Wajdi Feghali <wajdi.k.feghali@intel.com>
 14 *	James Guilford <james.guilford@intel.com>
 15 *	David Cote <david.m.cote@intel.com>
 16 *	Tim Chen <tim.c.chen@linux.intel.com>
 17 *
 18 * This software is available to you under a choice of one of two
 19 * licenses.  You may choose to be licensed under the terms of the GNU
 20 * General Public License (GPL) Version 2, available from the file
 21 * COPYING in the main directory of this source tree, or the
 22 * OpenIB.org BSD license below:
 23 *
 24 *     Redistribution and use in source and binary forms, with or
 25 *     without modification, are permitted provided that the following
 26 *     conditions are met:
 27 *
 28 *      - Redistributions of source code must retain the above
 29 *        copyright notice, this list of conditions and the following
 30 *        disclaimer.
 31 *
 32 *      - Redistributions in binary form must reproduce the above
 33 *        copyright notice, this list of conditions and the following
 34 *        disclaimer in the documentation and/or other materials
 35 *        provided with the distribution.
 36 *
 37 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 38 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 39 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 40 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 41 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 42 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 43 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 44 * SOFTWARE.
 45 */
 46
 47#include <linux/linkage.h>
 
 48
 49## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 50
 51# Define threshold below which buffers are considered "small" and routed to
 52# regular CRC code that does not interleave the CRC instructions.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 53#define SMALL_SIZE 200
 54
 55# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
 
 
 
 
 56
 57.text
 58SYM_FUNC_START(crc_pcl)
 59#define    bufp		  %rdi
 60#define    bufp_d	  %edi
 61#define    len		  %esi
 62#define    crc_init	  %edx
 63#define    crc_init_q	  %rdx
 64#define    n_misaligned	  %ecx /* overlaps chunk_bytes! */
 65#define    n_misaligned_q %rcx
 66#define    chunk_bytes	  %ecx /* overlaps n_misaligned! */
 67#define    chunk_bytes_q  %rcx
 68#define    crc1		  %r8
 69#define    crc2		  %r9
 
 
 
 
 
 
 
 
 
 
 
 70
 71	cmp	$SMALL_SIZE, len
 72	jb	.Lsmall
 73
 74	################################################################
 75	## 1) ALIGN:
 76	################################################################
 77	mov	bufp_d, n_misaligned
 78	neg	n_misaligned
 79	and	$7, n_misaligned	# calculate the misalignment amount of
 
 80					# the address
 81	je	.Laligned		# Skip if aligned
 
 
 
 
 
 
 
 
 
 82
 83	# Process 1 <= n_misaligned <= 7 bytes individually in order to align
 84	# the remaining data to an 8-byte boundary.
 85.Ldo_align:
 86	movq	(bufp), %rax
 87	add	n_misaligned_q, bufp
 88	sub	n_misaligned, len
 
 
 89.Lalign_loop:
 90	crc32b	%al, crc_init		# compute crc32 of 1-byte
 91	shr	$8, %rax		# get next byte
 92	dec	n_misaligned
 93	jne     .Lalign_loop
 94.Laligned:
 
 95
 96	################################################################
 97	## 2) PROCESS BLOCK:
 98	################################################################
 99
100	cmp	$128*24, len
 
 
 
101	jae     .Lfull_block
102
103.Lpartial_block:
104	# Compute floor(len / 24) to get num qwords to process from each lane.
105	imul	$2731, len, %eax	# 2731 = ceil(2^16 / 24)
106	shr	$16, %eax
107	jmp	.Lcrc_3lanes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
 
 
109.Lfull_block:
110	# Processing 128 qwords from each lane.
111	mov	$128, %eax
 
 
 
 
 
 
 
112
113	################################################################
114	## 3) CRC each of three lanes:
115	################################################################
116
117.Lcrc_3lanes:
118	xor	crc1,crc1
119	xor     crc2,crc2
120	mov	%eax, chunk_bytes
121	shl	$3, chunk_bytes		# num bytes to process from each lane
122	sub	$5, %eax		# 4 for 4x_loop, 1 for special last iter
123	jl	.Lcrc_3lanes_4x_done
124
125	# Unroll the loop by a factor of 4 to reduce the overhead of the loop
126	# bookkeeping instructions, which can compete with crc32q for the ALUs.
127.Lcrc_3lanes_4x_loop:
128	crc32q	(bufp), crc_init_q
129	crc32q	(bufp,chunk_bytes_q), crc1
130	crc32q	(bufp,chunk_bytes_q,2), crc2
131	crc32q	8(bufp), crc_init_q
132	crc32q	8(bufp,chunk_bytes_q), crc1
133	crc32q	8(bufp,chunk_bytes_q,2), crc2
134	crc32q	16(bufp), crc_init_q
135	crc32q	16(bufp,chunk_bytes_q), crc1
136	crc32q	16(bufp,chunk_bytes_q,2), crc2
137	crc32q	24(bufp), crc_init_q
138	crc32q	24(bufp,chunk_bytes_q), crc1
139	crc32q	24(bufp,chunk_bytes_q,2), crc2
140	add	$32, bufp
141	sub	$4, %eax
142	jge	.Lcrc_3lanes_4x_loop
143
144.Lcrc_3lanes_4x_done:
145	add	$4, %eax
146	jz	.Lcrc_3lanes_last_qword
147
148.Lcrc_3lanes_1x_loop:
149	crc32q	(bufp), crc_init_q
150	crc32q	(bufp,chunk_bytes_q), crc1
151	crc32q	(bufp,chunk_bytes_q,2), crc2
152	add	$8, bufp
153	dec	%eax
154	jnz	.Lcrc_3lanes_1x_loop
155
156.Lcrc_3lanes_last_qword:
157	crc32q	(bufp), crc_init_q
158	crc32q	(bufp,chunk_bytes_q), crc1
159# SKIP  crc32q	(bufp,chunk_bytes_q,2), crc2	; Don't do this one yet
160
161	################################################################
162	## 4) Combine three results:
163	################################################################
164
165	lea	(K_table-8)(%rip), %rax		# first entry is for idx 1
166	pmovzxdq (%rax,chunk_bytes_q), %xmm0	# 2 consts: K1:K2
167	lea	(chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
168	sub	%eax, len			# len -= chunk_bytes * 3
 
169
170	movq	crc_init_q, %xmm1		# CRC for block 1
171	pclmulqdq $0x00, %xmm0, %xmm1		# Multiply by K2
172
173	movq    crc1, %xmm2			# CRC for block 2
174	pclmulqdq $0x10, %xmm0, %xmm2		# Multiply by K1
175
176	pxor    %xmm2,%xmm1
177	movq    %xmm1, %rax
178	xor	(bufp,chunk_bytes_q,2), %rax
179	mov	crc2, crc_init_q
180	crc32	%rax, crc_init_q
181	lea	8(bufp,chunk_bytes_q,2), bufp
182
183	################################################################
184	## 5) If more blocks remain, goto (2):
185	################################################################
186
187	cmp	$128*24, len
188	jae	.Lfull_block
189	cmp	$SMALL_SIZE, len
190	jae	.Lpartial_block
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
192	#######################################################################
193	## 6) Process any remainder without interleaving:
194	#######################################################################
195.Lsmall:
196	test	len, len
197	jz	.Ldone
198	mov	len, %eax
199	shr	$3, %eax
200	jz	.Ldo_dword
201.Ldo_qwords:
202	crc32q	(bufp), crc_init_q
203	add	$8, bufp
204	dec	%eax
205	jnz	.Ldo_qwords
206.Ldo_dword:
207	test	$4, len
208	jz	.Ldo_word
209	crc32l	(bufp), crc_init
210	add	$4, bufp
211.Ldo_word:
212	test	$2, len
213	jz	.Ldo_byte
214	crc32w	(bufp), crc_init
215	add	$2, bufp
216.Ldo_byte:
217	test	$1, len
218	jz	.Ldone
219	crc32b	(bufp), crc_init
220.Ldone:
221	mov	crc_init, %eax
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222        RET
223SYM_FUNC_END(crc_pcl)
224
225.section	.rodata, "a", @progbits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226	################################################################
227	## PCLMULQDQ tables
228	## Table is 128 entries x 2 words (8 bytes) each
229	################################################################
230.align 8
231K_table:
232	.long 0x493c7d27, 0x00000001
233	.long 0xba4fc28e, 0x493c7d27
234	.long 0xddc0152b, 0xf20c0dfe
235	.long 0x9e4addf8, 0xba4fc28e
236	.long 0x39d3b296, 0x3da6d0cb
237	.long 0x0715ce53, 0xddc0152b
238	.long 0x47db8317, 0x1c291d04
239	.long 0x0d3b6092, 0x9e4addf8
240	.long 0xc96cfdc0, 0x740eef02
241	.long 0x878a92a7, 0x39d3b296
242	.long 0xdaece73e, 0x083a6eec
243	.long 0xab7aff2a, 0x0715ce53
244	.long 0x2162d385, 0xc49f4f67
245	.long 0x83348832, 0x47db8317
246	.long 0x299847d5, 0x2ad91c30
247	.long 0xb9e02b86, 0x0d3b6092
248	.long 0x18b33a4e, 0x6992cea2
249	.long 0xb6dd949b, 0xc96cfdc0
250	.long 0x78d9ccb7, 0x7e908048
251	.long 0xbac2fd7b, 0x878a92a7
252	.long 0xa60ce07b, 0x1b3d8f29
253	.long 0xce7f39f4, 0xdaece73e
254	.long 0x61d82e56, 0xf1d0f55e
255	.long 0xd270f1a2, 0xab7aff2a
256	.long 0xc619809d, 0xa87ab8a8
257	.long 0x2b3cac5d, 0x2162d385
258	.long 0x65863b64, 0x8462d800
259	.long 0x1b03397f, 0x83348832
260	.long 0xebb883bd, 0x71d111a8
261	.long 0xb3e32c28, 0x299847d5
262	.long 0x064f7f26, 0xffd852c6
263	.long 0xdd7e3b0c, 0xb9e02b86
264	.long 0xf285651c, 0xdcb17aa4
265	.long 0x10746f3c, 0x18b33a4e
266	.long 0xc7a68855, 0xf37c5aee
267	.long 0x271d9844, 0xb6dd949b
268	.long 0x8e766a0c, 0x6051d5a2
269	.long 0x93a5f730, 0x78d9ccb7
270	.long 0x6cb08e5c, 0x18b0d4ff
271	.long 0x6b749fb2, 0xbac2fd7b
272	.long 0x1393e203, 0x21f3d99c
273	.long 0xcec3662e, 0xa60ce07b
274	.long 0x96c515bb, 0x8f158014
275	.long 0xe6fc4e6a, 0xce7f39f4
276	.long 0x8227bb8a, 0xa00457f7
277	.long 0xb0cd4768, 0x61d82e56
278	.long 0x39c7ff35, 0x8d6d2c43
279	.long 0xd7a4825c, 0xd270f1a2
280	.long 0x0ab3844b, 0x00ac29cf
281	.long 0x0167d312, 0xc619809d
282	.long 0xf6076544, 0xe9adf796
283	.long 0x26f6a60a, 0x2b3cac5d
284	.long 0xa741c1bf, 0x96638b34
285	.long 0x98d8d9cb, 0x65863b64
286	.long 0x49c3cc9c, 0xe0e9f351
287	.long 0x68bce87a, 0x1b03397f
288	.long 0x57a3d037, 0x9af01f2d
289	.long 0x6956fc3b, 0xebb883bd
290	.long 0x42d98888, 0x2cff42cf
291	.long 0x3771e98f, 0xb3e32c28
292	.long 0xb42ae3d9, 0x88f25a3a
293	.long 0x2178513a, 0x064f7f26
294	.long 0xe0ac139e, 0x4e36f0b0
295	.long 0x170076fa, 0xdd7e3b0c
296	.long 0x444dd413, 0xbd6f81f8
297	.long 0x6f345e45, 0xf285651c
298	.long 0x41d17b64, 0x91c9bd4b
299	.long 0xff0dba97, 0x10746f3c
300	.long 0xa2b73df1, 0x885f087b
301	.long 0xf872e54c, 0xc7a68855
302	.long 0x1e41e9fc, 0x4c144932
303	.long 0x86d8e4d2, 0x271d9844
304	.long 0x651bd98b, 0x52148f02
305	.long 0x5bb8f1bc, 0x8e766a0c
306	.long 0xa90fd27a, 0xa3c6f37a
307	.long 0xb3af077a, 0x93a5f730
308	.long 0x4984d782, 0xd7c0557f
309	.long 0xca6ef3ac, 0x6cb08e5c
310	.long 0x234e0b26, 0x63ded06a
311	.long 0xdd66cbbb, 0x6b749fb2
312	.long 0x4597456a, 0x4d56973c
313	.long 0xe9e28eb4, 0x1393e203
314	.long 0x7b3ff57a, 0x9669c9df
315	.long 0xc9c8b782, 0xcec3662e
316	.long 0x3f70cc6f, 0xe417f38a
317	.long 0x93e106a4, 0x96c515bb
318	.long 0x62ec6c6d, 0x4b9e0f71
319	.long 0xd813b325, 0xe6fc4e6a
320	.long 0x0df04680, 0xd104b8fc
321	.long 0x2342001e, 0x8227bb8a
322	.long 0x0a2a8d7e, 0x5b397730
323	.long 0x6d9a4957, 0xb0cd4768
324	.long 0xe8b6368b, 0xe78eb416
325	.long 0xd2c3ed1a, 0x39c7ff35
326	.long 0x995a5724, 0x61ff0e01
327	.long 0x9ef68d35, 0xd7a4825c
328	.long 0x0c139b31, 0x8d96551c
329	.long 0xf2271e60, 0x0ab3844b
330	.long 0x0b0bf8ca, 0x0bf80dd2
331	.long 0x2664fd8b, 0x0167d312
332	.long 0xed64812d, 0x8821abed
333	.long 0x02ee03b2, 0xf6076544
334	.long 0x8604ae0f, 0x6a45d2b2
335	.long 0x363bd6b3, 0x26f6a60a
336	.long 0x135c83fd, 0xd8d26619
337	.long 0x5fabe670, 0xa741c1bf
338	.long 0x35ec3279, 0xde87806c
339	.long 0x00bcf5f6, 0x98d8d9cb
340	.long 0x8ae00689, 0x14338754
341	.long 0x17f27698, 0x49c3cc9c
342	.long 0x58ca5f00, 0x5bd2011f
343	.long 0xaa7c7ad5, 0x68bce87a
344	.long 0xb5cfca28, 0xdd07448e
345	.long 0xded288f8, 0x57a3d037
346	.long 0x59f229bc, 0xdde8f5b9
347	.long 0x6d390dec, 0x6956fc3b
348	.long 0x37170390, 0xa3e3e02c
349	.long 0x6353c1cc, 0x42d98888
350	.long 0xc4584f5c, 0xd73c7bea
351	.long 0xf48642e9, 0x3771e98f
352	.long 0x531377e2, 0x80ff0093
353	.long 0xdd35bc8d, 0xb42ae3d9
354	.long 0xb25b29f2, 0x8fe4c34d
355	.long 0x9a5ede41, 0x2178513a
356	.long 0xa563905d, 0xdf99fc11
357	.long 0x45cddf4e, 0xe0ac139e
358	.long 0xacfa3103, 0x6c23e841
359	.long 0xa51b6135, 0x170076fa
v6.8
  1/*
  2 * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
  3 *
  4 * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
  5 * downloaded from:
  6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
  7 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
  8 *
  9 * Copyright (C) 2012 Intel Corporation.
 
 10 *
 11 * Authors:
 12 *	Wajdi Feghali <wajdi.k.feghali@intel.com>
 13 *	James Guilford <james.guilford@intel.com>
 14 *	David Cote <david.m.cote@intel.com>
 15 *	Tim Chen <tim.c.chen@linux.intel.com>
 16 *
 17 * This software is available to you under a choice of one of two
 18 * licenses.  You may choose to be licensed under the terms of the GNU
 19 * General Public License (GPL) Version 2, available from the file
 20 * COPYING in the main directory of this source tree, or the
 21 * OpenIB.org BSD license below:
 22 *
 23 *     Redistribution and use in source and binary forms, with or
 24 *     without modification, are permitted provided that the following
 25 *     conditions are met:
 26 *
 27 *      - Redistributions of source code must retain the above
 28 *        copyright notice, this list of conditions and the following
 29 *        disclaimer.
 30 *
 31 *      - Redistributions in binary form must reproduce the above
 32 *        copyright notice, this list of conditions and the following
 33 *        disclaimer in the documentation and/or other materials
 34 *        provided with the distribution.
 35 *
 36 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 37 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 38 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 39 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 40 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 41 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 42 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 43 * SOFTWARE.
 44 */
 45
 46#include <linux/linkage.h>
 47#include <asm/nospec-branch.h>
 48
 49## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 50
 51.macro LABEL prefix n
 52.L\prefix\n\():
 53.endm
 54
 55.macro JMPTBL_ENTRY i
 56.quad .Lcrc_\i
 57.endm
 58
 59.macro JNC_LESS_THAN j
 60	jnc .Lless_than_\j
 61.endm
 62
 63# Define threshold where buffers are considered "small" and routed to more
 64# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
 65# SMALL_SIZE can be no larger than 255.
 66
 67#define SMALL_SIZE 200
 68
 69.if (SMALL_SIZE > 255)
 70.error "SMALL_ SIZE must be < 256"
 71.endif
 72
 73# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
 74
 75.text
 76SYM_FUNC_START(crc_pcl)
 77#define    bufp		rdi
 78#define    bufp_dw	%edi
 79#define    bufp_w	%di
 80#define    bufp_b	%dil
 81#define    bufptmp	%rcx
 82#define    block_0	%rcx
 83#define    block_1	%rdx
 84#define    block_2	%r11
 85#define    len		%rsi
 86#define    len_dw	%esi
 87#define    len_w	%si
 88#define    len_b	%sil
 89#define    crc_init_arg %rdx
 90#define    tmp		%rbx
 91#define    crc_init	%r8
 92#define    crc_init_dw	%r8d
 93#define    crc1		%r9
 94#define    crc2		%r10
 95
 96	pushq   %rbx
 97	pushq   %rdi
 98	pushq   %rsi
 99
100	## Move crc_init for Linux to a different
101	mov     crc_init_arg, crc_init
102
103	################################################################
104	## 1) ALIGN:
105	################################################################
106
107	mov     %bufp, bufptmp		# rdi = *buf
108	neg     %bufp
109	and     $7, %bufp		# calculate the unalignment amount of
110					# the address
111	je      .Lproc_block		# Skip if aligned
112
113	## If len is less than 8 and we're unaligned, we need to jump
114	## to special code to avoid reading beyond the end of the buffer
115	cmp     $8, len
116	jae     .Ldo_align
117	# less_than_8 expects length in upper 3 bits of len_dw
118	# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
119	shl     $32-3+1, len_dw
120	jmp     .Lless_than_8_post_shl1
121
 
 
122.Ldo_align:
123	#### Calculate CRC of unaligned bytes of the buffer (if any)
124	movq    (bufptmp), tmp		# load a quadward from the buffer
125	add     %bufp, bufptmp		# align buffer pointer for quadword
126					# processing
127	sub     %bufp, len		# update buffer length
128.Lalign_loop:
129	crc32b  %bl, crc_init_dw 	# compute crc32 of 1-byte
130	shr     $8, tmp			# get next byte
131	dec     %bufp
132	jne     .Lalign_loop
133
134.Lproc_block:
135
136	################################################################
137	## 2) PROCESS  BLOCKS:
138	################################################################
139
140	## compute num of bytes to be processed
141	movq    len, tmp		# save num bytes in tmp
142
143	cmpq    $128*24, len
144	jae     .Lfull_block
145
146.Lcontinue_block:
147	cmpq    $SMALL_SIZE, len
148	jb      .Lsmall
149
150	## len < 128*24
151	movq    $2731, %rax		# 2731 = ceil(2^16 / 24)
152	mul     len_dw
153	shrq    $16, %rax
154
155	## eax contains floor(bytes / 24) = num 24-byte chunks to do
156
157	## process rax 24-byte chunks (128 >= rax >= 0)
158
159	## compute end address of each block
160	## block 0 (base addr + RAX * 8)
161	## block 1 (base addr + RAX * 16)
162	## block 2 (base addr + RAX * 24)
163	lea     (bufptmp, %rax, 8), block_0
164	lea     (block_0, %rax, 8), block_1
165	lea     (block_1, %rax, 8), block_2
166
167	xor     crc1, crc1
168	xor     crc2, crc2
169
170	## branch into array
171	leaq	jump_table(%rip), %bufp
172	mov	(%bufp,%rax,8), %bufp
173	JMP_NOSPEC bufp
174
175	################################################################
176	## 2a) PROCESS FULL BLOCKS:
177	################################################################
178.Lfull_block:
179	movl    $128,%eax
180	lea     128*8*2(block_0), block_1
181	lea     128*8*3(block_0), block_2
182	add     $128*8*1, block_0
183
184	xor     crc1,crc1
185	xor     crc2,crc2
186
187	# Fall through into top of crc array (crc_128)
188
189	################################################################
190	## 3) CRC Array:
191	################################################################
192
193	i=128
194.rept 128-1
195.altmacro
196LABEL crc_ %i
197.noaltmacro
198	ENDBR
199	crc32q   -i*8(block_0), crc_init
200	crc32q   -i*8(block_1), crc1
201	crc32q   -i*8(block_2), crc2
202	i=(i-1)
203.endr
204
205.altmacro
206LABEL crc_ %i
207.noaltmacro
208	ENDBR
209	crc32q   -i*8(block_0), crc_init
210	crc32q   -i*8(block_1), crc1
211# SKIP  crc32  -i*8(block_2), crc2 ; Don't do this one yet
212
213	mov     block_2, block_0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
215	################################################################
216	## 4) Combine three results:
217	################################################################
218
219	lea	(K_table-8)(%rip), %bufp		# first entry is for idx 1
220	shlq    $3, %rax			# rax *= 8
221	pmovzxdq (%bufp,%rax), %xmm0		# 2 consts: K1:K2
222	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
223	subq    %rax, tmp			# tmp -= rax*24
224
225	movq    crc_init, %xmm1			# CRC for block 1
226	pclmulqdq $0x00, %xmm0, %xmm1		# Multiply by K2
227
228	movq    crc1, %xmm2			# CRC for block 2
229	pclmulqdq $0x10, %xmm0, %xmm2		# Multiply by K1
230
231	pxor    %xmm2,%xmm1
232	movq    %xmm1, %rax
233	xor     -i*8(block_2), %rax
234	mov     crc2, crc_init
235	crc32   %rax, crc_init
 
236
237	################################################################
238	## 5) Check for end:
239	################################################################
240
241LABEL crc_ 0
242	ENDBR
243	mov     tmp, len
244	cmp     $128*24, tmp
245	jae     .Lfull_block
246	cmp     $24, tmp
247	jae     .Lcontinue_block
248
249.Lless_than_24:
250	shl     $32-4, len_dw			# less_than_16 expects length
251						# in upper 4 bits of len_dw
252	jnc     .Lless_than_16
253	crc32q  (bufptmp), crc_init
254	crc32q  8(bufptmp), crc_init
255	jz      .Ldo_return
256	add     $16, bufptmp
257	# len is less than 8 if we got here
258	# less_than_8 expects length in upper 3 bits of len_dw
259	# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
260	shl     $2, len_dw
261	jmp     .Lless_than_8_post_shl1
262
263	#######################################################################
264	## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
265	#######################################################################
266.Lsmall:
267	shl $32-8, len_dw		# Prepare len_dw for less_than_256
268	j=256
269.rept 5					# j = {256, 128, 64, 32, 16}
270.altmacro
271LABEL less_than_ %j			# less_than_j: Length should be in
272					# upper lg(j) bits of len_dw
273	j=(j/2)
274	shl     $1, len_dw		# Get next MSB
275	JNC_LESS_THAN %j
276.noaltmacro
277	i=0
278.rept (j/8)
279	crc32q  i(bufptmp), crc_init	# Compute crc32 of 8-byte data
280	i=i+8
281.endr
282	jz      .Ldo_return		# Return if remaining length is zero
283	add     $j, bufptmp		# Advance buf
284.endr
285
286.Lless_than_8:				# Length should be stored in
287					# upper 3 bits of len_dw
288	shl     $1, len_dw
289.Lless_than_8_post_shl1:
290	jnc     .Lless_than_4
291	crc32l  (bufptmp), crc_init_dw	# CRC of 4 bytes
292	jz      .Ldo_return		# return if remaining data is zero
293	add     $4, bufptmp
294.Lless_than_4:				# Length should be stored in
295					# upper 2 bits of len_dw
296	shl     $1, len_dw
297	jnc     .Lless_than_2
298	crc32w  (bufptmp), crc_init_dw	# CRC of 2 bytes
299	jz      .Ldo_return		# return if remaining data is zero
300	add     $2, bufptmp
301.Lless_than_2:				# Length should be stored in the MSB
302					# of len_dw
303	shl     $1, len_dw
304	jnc     .Lless_than_1
305	crc32b  (bufptmp), crc_init_dw	# CRC of 1 byte
306.Lless_than_1:				# Length should be zero
307.Ldo_return:
308	movq    crc_init, %rax
309	popq    %rsi
310	popq    %rdi
311	popq    %rbx
312        RET
313SYM_FUNC_END(crc_pcl)
314
315.section	.rodata, "a", @progbits
316        ################################################################
317        ## jump table        Table is 129 entries x 2 bytes each
318        ################################################################
319.align 4
320jump_table:
321	i=0
322.rept 129
323.altmacro
324JMPTBL_ENTRY %i
325.noaltmacro
326	i=i+1
327.endr
328
329
330	################################################################
331	## PCLMULQDQ tables
332	## Table is 128 entries x 2 words (8 bytes) each
333	################################################################
334.align 8
335K_table:
336	.long 0x493c7d27, 0x00000001
337	.long 0xba4fc28e, 0x493c7d27
338	.long 0xddc0152b, 0xf20c0dfe
339	.long 0x9e4addf8, 0xba4fc28e
340	.long 0x39d3b296, 0x3da6d0cb
341	.long 0x0715ce53, 0xddc0152b
342	.long 0x47db8317, 0x1c291d04
343	.long 0x0d3b6092, 0x9e4addf8
344	.long 0xc96cfdc0, 0x740eef02
345	.long 0x878a92a7, 0x39d3b296
346	.long 0xdaece73e, 0x083a6eec
347	.long 0xab7aff2a, 0x0715ce53
348	.long 0x2162d385, 0xc49f4f67
349	.long 0x83348832, 0x47db8317
350	.long 0x299847d5, 0x2ad91c30
351	.long 0xb9e02b86, 0x0d3b6092
352	.long 0x18b33a4e, 0x6992cea2
353	.long 0xb6dd949b, 0xc96cfdc0
354	.long 0x78d9ccb7, 0x7e908048
355	.long 0xbac2fd7b, 0x878a92a7
356	.long 0xa60ce07b, 0x1b3d8f29
357	.long 0xce7f39f4, 0xdaece73e
358	.long 0x61d82e56, 0xf1d0f55e
359	.long 0xd270f1a2, 0xab7aff2a
360	.long 0xc619809d, 0xa87ab8a8
361	.long 0x2b3cac5d, 0x2162d385
362	.long 0x65863b64, 0x8462d800
363	.long 0x1b03397f, 0x83348832
364	.long 0xebb883bd, 0x71d111a8
365	.long 0xb3e32c28, 0x299847d5
366	.long 0x064f7f26, 0xffd852c6
367	.long 0xdd7e3b0c, 0xb9e02b86
368	.long 0xf285651c, 0xdcb17aa4
369	.long 0x10746f3c, 0x18b33a4e
370	.long 0xc7a68855, 0xf37c5aee
371	.long 0x271d9844, 0xb6dd949b
372	.long 0x8e766a0c, 0x6051d5a2
373	.long 0x93a5f730, 0x78d9ccb7
374	.long 0x6cb08e5c, 0x18b0d4ff
375	.long 0x6b749fb2, 0xbac2fd7b
376	.long 0x1393e203, 0x21f3d99c
377	.long 0xcec3662e, 0xa60ce07b
378	.long 0x96c515bb, 0x8f158014
379	.long 0xe6fc4e6a, 0xce7f39f4
380	.long 0x8227bb8a, 0xa00457f7
381	.long 0xb0cd4768, 0x61d82e56
382	.long 0x39c7ff35, 0x8d6d2c43
383	.long 0xd7a4825c, 0xd270f1a2
384	.long 0x0ab3844b, 0x00ac29cf
385	.long 0x0167d312, 0xc619809d
386	.long 0xf6076544, 0xe9adf796
387	.long 0x26f6a60a, 0x2b3cac5d
388	.long 0xa741c1bf, 0x96638b34
389	.long 0x98d8d9cb, 0x65863b64
390	.long 0x49c3cc9c, 0xe0e9f351
391	.long 0x68bce87a, 0x1b03397f
392	.long 0x57a3d037, 0x9af01f2d
393	.long 0x6956fc3b, 0xebb883bd
394	.long 0x42d98888, 0x2cff42cf
395	.long 0x3771e98f, 0xb3e32c28
396	.long 0xb42ae3d9, 0x88f25a3a
397	.long 0x2178513a, 0x064f7f26
398	.long 0xe0ac139e, 0x4e36f0b0
399	.long 0x170076fa, 0xdd7e3b0c
400	.long 0x444dd413, 0xbd6f81f8
401	.long 0x6f345e45, 0xf285651c
402	.long 0x41d17b64, 0x91c9bd4b
403	.long 0xff0dba97, 0x10746f3c
404	.long 0xa2b73df1, 0x885f087b
405	.long 0xf872e54c, 0xc7a68855
406	.long 0x1e41e9fc, 0x4c144932
407	.long 0x86d8e4d2, 0x271d9844
408	.long 0x651bd98b, 0x52148f02
409	.long 0x5bb8f1bc, 0x8e766a0c
410	.long 0xa90fd27a, 0xa3c6f37a
411	.long 0xb3af077a, 0x93a5f730
412	.long 0x4984d782, 0xd7c0557f
413	.long 0xca6ef3ac, 0x6cb08e5c
414	.long 0x234e0b26, 0x63ded06a
415	.long 0xdd66cbbb, 0x6b749fb2
416	.long 0x4597456a, 0x4d56973c
417	.long 0xe9e28eb4, 0x1393e203
418	.long 0x7b3ff57a, 0x9669c9df
419	.long 0xc9c8b782, 0xcec3662e
420	.long 0x3f70cc6f, 0xe417f38a
421	.long 0x93e106a4, 0x96c515bb
422	.long 0x62ec6c6d, 0x4b9e0f71
423	.long 0xd813b325, 0xe6fc4e6a
424	.long 0x0df04680, 0xd104b8fc
425	.long 0x2342001e, 0x8227bb8a
426	.long 0x0a2a8d7e, 0x5b397730
427	.long 0x6d9a4957, 0xb0cd4768
428	.long 0xe8b6368b, 0xe78eb416
429	.long 0xd2c3ed1a, 0x39c7ff35
430	.long 0x995a5724, 0x61ff0e01
431	.long 0x9ef68d35, 0xd7a4825c
432	.long 0x0c139b31, 0x8d96551c
433	.long 0xf2271e60, 0x0ab3844b
434	.long 0x0b0bf8ca, 0x0bf80dd2
435	.long 0x2664fd8b, 0x0167d312
436	.long 0xed64812d, 0x8821abed
437	.long 0x02ee03b2, 0xf6076544
438	.long 0x8604ae0f, 0x6a45d2b2
439	.long 0x363bd6b3, 0x26f6a60a
440	.long 0x135c83fd, 0xd8d26619
441	.long 0x5fabe670, 0xa741c1bf
442	.long 0x35ec3279, 0xde87806c
443	.long 0x00bcf5f6, 0x98d8d9cb
444	.long 0x8ae00689, 0x14338754
445	.long 0x17f27698, 0x49c3cc9c
446	.long 0x58ca5f00, 0x5bd2011f
447	.long 0xaa7c7ad5, 0x68bce87a
448	.long 0xb5cfca28, 0xdd07448e
449	.long 0xded288f8, 0x57a3d037
450	.long 0x59f229bc, 0xdde8f5b9
451	.long 0x6d390dec, 0x6956fc3b
452	.long 0x37170390, 0xa3e3e02c
453	.long 0x6353c1cc, 0x42d98888
454	.long 0xc4584f5c, 0xd73c7bea
455	.long 0xf48642e9, 0x3771e98f
456	.long 0x531377e2, 0x80ff0093
457	.long 0xdd35bc8d, 0xb42ae3d9
458	.long 0xb25b29f2, 0x8fe4c34d
459	.long 0x9a5ede41, 0x2178513a
460	.long 0xa563905d, 0xdf99fc11
461	.long 0x45cddf4e, 0xe0ac139e
462	.long 0xacfa3103, 0x6c23e841
463	.long 0xa51b6135, 0x170076fa