Linux Audio

Check our new training course

Loading...
v3.15
  1/*
  2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
  3 *
  4 * This file is subject to the terms and conditions of the GNU General Public
  5 * License.  See the file COPYING in the main directory of this archive
  6 * for more details. No warranty for anything given at all.
  7 */
  8#include <linux/linkage.h>
  9#include <asm/dwarf2.h>
 10#include <asm/errno.h>
 11#include <asm/asm.h>
 12
 13/*
 14 * Checksum copy with exception handling.
 15 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
 16 * destination is zeroed.
 17 *
 18 * Input
 19 * rdi  source
 20 * rsi  destination
 21 * edx  len (32bit)
 22 * ecx  sum (32bit)
 23 * r8   src_err_ptr (int)
 24 * r9   dst_err_ptr (int)
 25 *
 26 * Output
 27 * eax  64bit sum. undefined in case of exception.
 28 *
 29 * Wrappers need to take care of valid exception sum and zeroing.
 30 * They also should align source or destination to 8 bytes.
 31 */
 32
 33	.macro source
 3410:
 35	_ASM_EXTABLE(10b, .Lbad_source)
 
 
 
 36	.endm
 37
 38	.macro dest
 3920:
 40	_ASM_EXTABLE(20b, .Lbad_dest)
 
 
 
 41	.endm
 42
 43	.macro ignore L=.Lignore
 4430:
 45	_ASM_EXTABLE(30b, \L)
 
 
 
 46	.endm
 47
 48
 49ENTRY(csum_partial_copy_generic)
 50	CFI_STARTPROC
 51	cmpl	$3*64, %edx
 52	jle	.Lignore
 53
 54.Lignore:
 55	subq  $7*8, %rsp
 56	CFI_ADJUST_CFA_OFFSET 7*8
 57	movq  %rbx, 2*8(%rsp)
 58	CFI_REL_OFFSET rbx, 2*8
 59	movq  %r12, 3*8(%rsp)
 60	CFI_REL_OFFSET r12, 3*8
 61	movq  %r14, 4*8(%rsp)
 62	CFI_REL_OFFSET r14, 4*8
 63	movq  %r13, 5*8(%rsp)
 64	CFI_REL_OFFSET r13, 5*8
 65	movq  %rbp, 6*8(%rsp)
 66	CFI_REL_OFFSET rbp, 6*8
 67
 68	movq  %r8, (%rsp)
 69	movq  %r9, 1*8(%rsp)
 70
 71	movl  %ecx, %eax
 72	movl  %edx, %ecx
 73
 74	xorl  %r9d, %r9d
 75	movq  %rcx, %r12
 76
 77	shrq  $6, %r12
 78	jz	.Lhandle_tail       /* < 64 */
 79
 80	clc
 81
 82	/* main loop. clear in 64 byte blocks */
 83	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 84	/* r11:	temp3, rdx: temp4, r12 loopcnt */
 85	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
 86	.p2align 4
 87.Lloop:
 88	source
 89	movq  (%rdi), %rbx
 90	source
 91	movq  8(%rdi), %r8
 92	source
 93	movq  16(%rdi), %r11
 94	source
 95	movq  24(%rdi), %rdx
 96
 97	source
 98	movq  32(%rdi), %r10
 99	source
100	movq  40(%rdi), %rbp
101	source
102	movq  48(%rdi), %r14
103	source
104	movq  56(%rdi), %r13
105
106	ignore 2f
107	prefetcht0 5*64(%rdi)
1082:
109	adcq  %rbx, %rax
110	adcq  %r8, %rax
111	adcq  %r11, %rax
112	adcq  %rdx, %rax
113	adcq  %r10, %rax
114	adcq  %rbp, %rax
115	adcq  %r14, %rax
116	adcq  %r13, %rax
117
118	decl %r12d
119
120	dest
121	movq %rbx, (%rsi)
122	dest
123	movq %r8, 8(%rsi)
124	dest
125	movq %r11, 16(%rsi)
126	dest
127	movq %rdx, 24(%rsi)
128
129	dest
130	movq %r10, 32(%rsi)
131	dest
132	movq %rbp, 40(%rsi)
133	dest
134	movq %r14, 48(%rsi)
135	dest
136	movq %r13, 56(%rsi)
137
1383:
139
140	leaq 64(%rdi), %rdi
141	leaq 64(%rsi), %rsi
142
143	jnz	.Lloop
144
145	adcq  %r9, %rax
146
147	/* do last up to 56 bytes */
148.Lhandle_tail:
149	/* ecx:	count */
150	movl %ecx, %r10d
151	andl $63, %ecx
152	shrl $3, %ecx
153	jz	.Lfold
154	clc
155	.p2align 4
156.Lloop_8:
157	source
158	movq (%rdi), %rbx
159	adcq %rbx, %rax
160	decl %ecx
161	dest
162	movq %rbx, (%rsi)
163	leaq 8(%rsi), %rsi /* preserve carry */
164	leaq 8(%rdi), %rdi
165	jnz	.Lloop_8
166	adcq %r9, %rax	/* add in carry */
167
168.Lfold:
169	/* reduce checksum to 32bits */
170	movl %eax, %ebx
171	shrq $32, %rax
172	addl %ebx, %eax
173	adcl %r9d, %eax
174
175	/* do last up to 6 bytes */
176.Lhandle_7:
177	movl %r10d, %ecx
178	andl $7, %ecx
179	shrl $1, %ecx
180	jz   .Lhandle_1
181	movl $2, %edx
182	xorl %ebx, %ebx
183	clc
184	.p2align 4
185.Lloop_1:
186	source
187	movw (%rdi), %bx
188	adcl %ebx, %eax
189	decl %ecx
190	dest
191	movw %bx, (%rsi)
192	leaq 2(%rdi), %rdi
193	leaq 2(%rsi), %rsi
194	jnz .Lloop_1
195	adcl %r9d, %eax	/* add in carry */
196
197	/* handle last odd byte */
198.Lhandle_1:
199	testl $1, %r10d
200	jz    .Lende
201	xorl  %ebx, %ebx
202	source
203	movb (%rdi), %bl
204	dest
205	movb %bl, (%rsi)
206	addl %ebx, %eax
207	adcl %r9d, %eax		/* carry */
208
209	CFI_REMEMBER_STATE
210.Lende:
211	movq 2*8(%rsp), %rbx
212	CFI_RESTORE rbx
213	movq 3*8(%rsp), %r12
214	CFI_RESTORE r12
215	movq 4*8(%rsp), %r14
216	CFI_RESTORE r14
217	movq 5*8(%rsp), %r13
218	CFI_RESTORE r13
219	movq 6*8(%rsp), %rbp
220	CFI_RESTORE rbp
221	addq $7*8, %rsp
222	CFI_ADJUST_CFA_OFFSET -7*8
223	ret
224	CFI_RESTORE_STATE
225
226	/* Exception handlers. Very simple, zeroing is done in the wrappers */
227.Lbad_source:
228	movq (%rsp), %rax
229	testq %rax, %rax
230	jz   .Lende
231	movl $-EFAULT, (%rax)
232	jmp  .Lende
233
234.Lbad_dest:
235	movq 8(%rsp), %rax
236	testq %rax, %rax
237	jz   .Lende
238	movl $-EFAULT, (%rax)
239	jmp .Lende
240	CFI_ENDPROC
241ENDPROC(csum_partial_copy_generic)
v3.1
  1/*
  2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
  3 *
  4 * This file is subject to the terms and conditions of the GNU General Public
  5 * License.  See the file COPYING in the main directory of this archive
  6 * for more details. No warranty for anything given at all.
  7 */
  8#include <linux/linkage.h>
  9#include <asm/dwarf2.h>
 10#include <asm/errno.h>
 
 11
 12/*
 13 * Checksum copy with exception handling.
 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
 15 * destination is zeroed.
 16 *
 17 * Input
 18 * rdi  source
 19 * rsi  destination
 20 * edx  len (32bit)
 21 * ecx  sum (32bit)
 22 * r8   src_err_ptr (int)
 23 * r9   dst_err_ptr (int)
 24 *
 25 * Output
 26 * eax  64bit sum. undefined in case of exception.
 27 *
 28 * Wrappers need to take care of valid exception sum and zeroing.
 29 * They also should align source or destination to 8 bytes.
 30 */
 31
 32	.macro source
 3310:
 34	.section __ex_table, "a"
 35	.align 8
 36	.quad 10b, .Lbad_source
 37	.previous
 38	.endm
 39
 40	.macro dest
 4120:
 42	.section __ex_table, "a"
 43	.align 8
 44	.quad 20b, .Lbad_dest
 45	.previous
 46	.endm
 47
 48	.macro ignore L=.Lignore
 4930:
 50	.section __ex_table, "a"
 51	.align 8
 52	.quad 30b, \L
 53	.previous
 54	.endm
 55
 56
 57ENTRY(csum_partial_copy_generic)
 58	CFI_STARTPROC
 59	cmpl	$3*64, %edx
 60	jle	.Lignore
 61
 62.Lignore:
 63	subq  $7*8, %rsp
 64	CFI_ADJUST_CFA_OFFSET 7*8
 65	movq  %rbx, 2*8(%rsp)
 66	CFI_REL_OFFSET rbx, 2*8
 67	movq  %r12, 3*8(%rsp)
 68	CFI_REL_OFFSET r12, 3*8
 69	movq  %r14, 4*8(%rsp)
 70	CFI_REL_OFFSET r14, 4*8
 71	movq  %r13, 5*8(%rsp)
 72	CFI_REL_OFFSET r13, 5*8
 73	movq  %rbp, 6*8(%rsp)
 74	CFI_REL_OFFSET rbp, 6*8
 75
 76	movq  %r8, (%rsp)
 77	movq  %r9, 1*8(%rsp)
 78
 79	movl  %ecx, %eax
 80	movl  %edx, %ecx
 81
 82	xorl  %r9d, %r9d
 83	movq  %rcx, %r12
 84
 85	shrq  $6, %r12
 86	jz	.Lhandle_tail       /* < 64 */
 87
 88	clc
 89
 90	/* main loop. clear in 64 byte blocks */
 91	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 92	/* r11:	temp3, rdx: temp4, r12 loopcnt */
 93	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
 94	.p2align 4
 95.Lloop:
 96	source
 97	movq  (%rdi), %rbx
 98	source
 99	movq  8(%rdi), %r8
100	source
101	movq  16(%rdi), %r11
102	source
103	movq  24(%rdi), %rdx
104
105	source
106	movq  32(%rdi), %r10
107	source
108	movq  40(%rdi), %rbp
109	source
110	movq  48(%rdi), %r14
111	source
112	movq  56(%rdi), %r13
113
114	ignore 2f
115	prefetcht0 5*64(%rdi)
1162:
117	adcq  %rbx, %rax
118	adcq  %r8, %rax
119	adcq  %r11, %rax
120	adcq  %rdx, %rax
121	adcq  %r10, %rax
122	adcq  %rbp, %rax
123	adcq  %r14, %rax
124	adcq  %r13, %rax
125
126	decl %r12d
127
128	dest
129	movq %rbx, (%rsi)
130	dest
131	movq %r8, 8(%rsi)
132	dest
133	movq %r11, 16(%rsi)
134	dest
135	movq %rdx, 24(%rsi)
136
137	dest
138	movq %r10, 32(%rsi)
139	dest
140	movq %rbp, 40(%rsi)
141	dest
142	movq %r14, 48(%rsi)
143	dest
144	movq %r13, 56(%rsi)
145
1463:
147
148	leaq 64(%rdi), %rdi
149	leaq 64(%rsi), %rsi
150
151	jnz	.Lloop
152
153	adcq  %r9, %rax
154
155	/* do last up to 56 bytes */
156.Lhandle_tail:
157	/* ecx:	count */
158	movl %ecx, %r10d
159	andl $63, %ecx
160	shrl $3, %ecx
161	jz	.Lfold
162	clc
163	.p2align 4
164.Lloop_8:
165	source
166	movq (%rdi), %rbx
167	adcq %rbx, %rax
168	decl %ecx
169	dest
170	movq %rbx, (%rsi)
171	leaq 8(%rsi), %rsi /* preserve carry */
172	leaq 8(%rdi), %rdi
173	jnz	.Lloop_8
174	adcq %r9, %rax	/* add in carry */
175
176.Lfold:
177	/* reduce checksum to 32bits */
178	movl %eax, %ebx
179	shrq $32, %rax
180	addl %ebx, %eax
181	adcl %r9d, %eax
182
183	/* do last up to 6 bytes */
184.Lhandle_7:
185	movl %r10d, %ecx
186	andl $7, %ecx
187	shrl $1, %ecx
188	jz   .Lhandle_1
189	movl $2, %edx
190	xorl %ebx, %ebx
191	clc
192	.p2align 4
193.Lloop_1:
194	source
195	movw (%rdi), %bx
196	adcl %ebx, %eax
197	decl %ecx
198	dest
199	movw %bx, (%rsi)
200	leaq 2(%rdi), %rdi
201	leaq 2(%rsi), %rsi
202	jnz .Lloop_1
203	adcl %r9d, %eax	/* add in carry */
204
205	/* handle last odd byte */
206.Lhandle_1:
207	testl $1, %r10d
208	jz    .Lende
209	xorl  %ebx, %ebx
210	source
211	movb (%rdi), %bl
212	dest
213	movb %bl, (%rsi)
214	addl %ebx, %eax
215	adcl %r9d, %eax		/* carry */
216
217	CFI_REMEMBER_STATE
218.Lende:
219	movq 2*8(%rsp), %rbx
220	CFI_RESTORE rbx
221	movq 3*8(%rsp), %r12
222	CFI_RESTORE r12
223	movq 4*8(%rsp), %r14
224	CFI_RESTORE r14
225	movq 5*8(%rsp), %r13
226	CFI_RESTORE r13
227	movq 6*8(%rsp), %rbp
228	CFI_RESTORE rbp
229	addq $7*8, %rsp
230	CFI_ADJUST_CFA_OFFSET -7*8
231	ret
232	CFI_RESTORE_STATE
233
234	/* Exception handlers. Very simple, zeroing is done in the wrappers */
235.Lbad_source:
236	movq (%rsp), %rax
237	testq %rax, %rax
238	jz   .Lende
239	movl $-EFAULT, (%rax)
240	jmp  .Lende
241
242.Lbad_dest:
243	movq 8(%rsp), %rax
244	testq %rax, %rax
245	jz   .Lende
246	movl $-EFAULT, (%rax)
247	jmp .Lende
248	CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)