Loading...
1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/errno.h>
11#include <asm/asm.h>
12
13/*
14 * Checksum copy with exception handling.
15 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
16 * destination is zeroed.
17 *
18 * Input
19 * rdi source
20 * rsi destination
21 * edx len (32bit)
22 * ecx sum (32bit)
23 * r8 src_err_ptr (int)
24 * r9 dst_err_ptr (int)
25 *
26 * Output
27 * eax 64bit sum. undefined in case of exception.
28 *
29 * Wrappers need to take care of valid exception sum and zeroing.
30 * They also should align source or destination to 8 bytes.
31 */
32
33 .macro source
3410:
35 _ASM_EXTABLE(10b, .Lbad_source)
36 .endm
37
38 .macro dest
3920:
40 _ASM_EXTABLE(20b, .Lbad_dest)
41 .endm
42
43 .macro ignore L=.Lignore
4430:
45 _ASM_EXTABLE(30b, \L)
46 .endm
47
48
49ENTRY(csum_partial_copy_generic)
50 CFI_STARTPROC
51 cmpl $3*64, %edx
52 jle .Lignore
53
54.Lignore:
55 subq $7*8, %rsp
56 CFI_ADJUST_CFA_OFFSET 7*8
57 movq %rbx, 2*8(%rsp)
58 CFI_REL_OFFSET rbx, 2*8
59 movq %r12, 3*8(%rsp)
60 CFI_REL_OFFSET r12, 3*8
61 movq %r14, 4*8(%rsp)
62 CFI_REL_OFFSET r14, 4*8
63 movq %r13, 5*8(%rsp)
64 CFI_REL_OFFSET r13, 5*8
65 movq %rbp, 6*8(%rsp)
66 CFI_REL_OFFSET rbp, 6*8
67
68 movq %r8, (%rsp)
69 movq %r9, 1*8(%rsp)
70
71 movl %ecx, %eax
72 movl %edx, %ecx
73
74 xorl %r9d, %r9d
75 movq %rcx, %r12
76
77 shrq $6, %r12
78 jz .Lhandle_tail /* < 64 */
79
80 clc
81
82 /* main loop. clear in 64 byte blocks */
83 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
84 /* r11: temp3, rdx: temp4, r12 loopcnt */
85 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
86 .p2align 4
87.Lloop:
88 source
89 movq (%rdi), %rbx
90 source
91 movq 8(%rdi), %r8
92 source
93 movq 16(%rdi), %r11
94 source
95 movq 24(%rdi), %rdx
96
97 source
98 movq 32(%rdi), %r10
99 source
100 movq 40(%rdi), %rbp
101 source
102 movq 48(%rdi), %r14
103 source
104 movq 56(%rdi), %r13
105
106 ignore 2f
107 prefetcht0 5*64(%rdi)
1082:
109 adcq %rbx, %rax
110 adcq %r8, %rax
111 adcq %r11, %rax
112 adcq %rdx, %rax
113 adcq %r10, %rax
114 adcq %rbp, %rax
115 adcq %r14, %rax
116 adcq %r13, %rax
117
118 decl %r12d
119
120 dest
121 movq %rbx, (%rsi)
122 dest
123 movq %r8, 8(%rsi)
124 dest
125 movq %r11, 16(%rsi)
126 dest
127 movq %rdx, 24(%rsi)
128
129 dest
130 movq %r10, 32(%rsi)
131 dest
132 movq %rbp, 40(%rsi)
133 dest
134 movq %r14, 48(%rsi)
135 dest
136 movq %r13, 56(%rsi)
137
1383:
139
140 leaq 64(%rdi), %rdi
141 leaq 64(%rsi), %rsi
142
143 jnz .Lloop
144
145 adcq %r9, %rax
146
147 /* do last up to 56 bytes */
148.Lhandle_tail:
149 /* ecx: count */
150 movl %ecx, %r10d
151 andl $63, %ecx
152 shrl $3, %ecx
153 jz .Lfold
154 clc
155 .p2align 4
156.Lloop_8:
157 source
158 movq (%rdi), %rbx
159 adcq %rbx, %rax
160 decl %ecx
161 dest
162 movq %rbx, (%rsi)
163 leaq 8(%rsi), %rsi /* preserve carry */
164 leaq 8(%rdi), %rdi
165 jnz .Lloop_8
166 adcq %r9, %rax /* add in carry */
167
168.Lfold:
169 /* reduce checksum to 32bits */
170 movl %eax, %ebx
171 shrq $32, %rax
172 addl %ebx, %eax
173 adcl %r9d, %eax
174
175 /* do last up to 6 bytes */
176.Lhandle_7:
177 movl %r10d, %ecx
178 andl $7, %ecx
179 shrl $1, %ecx
180 jz .Lhandle_1
181 movl $2, %edx
182 xorl %ebx, %ebx
183 clc
184 .p2align 4
185.Lloop_1:
186 source
187 movw (%rdi), %bx
188 adcl %ebx, %eax
189 decl %ecx
190 dest
191 movw %bx, (%rsi)
192 leaq 2(%rdi), %rdi
193 leaq 2(%rsi), %rsi
194 jnz .Lloop_1
195 adcl %r9d, %eax /* add in carry */
196
197 /* handle last odd byte */
198.Lhandle_1:
199 testl $1, %r10d
200 jz .Lende
201 xorl %ebx, %ebx
202 source
203 movb (%rdi), %bl
204 dest
205 movb %bl, (%rsi)
206 addl %ebx, %eax
207 adcl %r9d, %eax /* carry */
208
209 CFI_REMEMBER_STATE
210.Lende:
211 movq 2*8(%rsp), %rbx
212 CFI_RESTORE rbx
213 movq 3*8(%rsp), %r12
214 CFI_RESTORE r12
215 movq 4*8(%rsp), %r14
216 CFI_RESTORE r14
217 movq 5*8(%rsp), %r13
218 CFI_RESTORE r13
219 movq 6*8(%rsp), %rbp
220 CFI_RESTORE rbp
221 addq $7*8, %rsp
222 CFI_ADJUST_CFA_OFFSET -7*8
223 ret
224 CFI_RESTORE_STATE
225
226 /* Exception handlers. Very simple, zeroing is done in the wrappers */
227.Lbad_source:
228 movq (%rsp), %rax
229 testq %rax, %rax
230 jz .Lende
231 movl $-EFAULT, (%rax)
232 jmp .Lende
233
234.Lbad_dest:
235 movq 8(%rsp), %rax
236 testq %rax, %rax
237 jz .Lende
238 movl $-EFAULT, (%rax)
239 jmp .Lende
240 CFI_ENDPROC
241ENDPROC(csum_partial_copy_generic)
1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/errno.h>
11
12/*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi source
19 * rsi destination
20 * edx len (32bit)
21 * ecx sum (32bit)
22 * r8 src_err_ptr (int)
23 * r9 dst_err_ptr (int)
24 *
25 * Output
26 * eax 64bit sum. undefined in case of exception.
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes.
30 */
31
32 .macro source
3310:
34 .section __ex_table, "a"
35 .align 8
36 .quad 10b, .Lbad_source
37 .previous
38 .endm
39
40 .macro dest
4120:
42 .section __ex_table, "a"
43 .align 8
44 .quad 20b, .Lbad_dest
45 .previous
46 .endm
47
48 .macro ignore L=.Lignore
4930:
50 .section __ex_table, "a"
51 .align 8
52 .quad 30b, \L
53 .previous
54 .endm
55
56
57ENTRY(csum_partial_copy_generic)
58 CFI_STARTPROC
59 cmpl $3*64, %edx
60 jle .Lignore
61
62.Lignore:
63 subq $7*8, %rsp
64 CFI_ADJUST_CFA_OFFSET 7*8
65 movq %rbx, 2*8(%rsp)
66 CFI_REL_OFFSET rbx, 2*8
67 movq %r12, 3*8(%rsp)
68 CFI_REL_OFFSET r12, 3*8
69 movq %r14, 4*8(%rsp)
70 CFI_REL_OFFSET r14, 4*8
71 movq %r13, 5*8(%rsp)
72 CFI_REL_OFFSET r13, 5*8
73 movq %rbp, 6*8(%rsp)
74 CFI_REL_OFFSET rbp, 6*8
75
76 movq %r8, (%rsp)
77 movq %r9, 1*8(%rsp)
78
79 movl %ecx, %eax
80 movl %edx, %ecx
81
82 xorl %r9d, %r9d
83 movq %rcx, %r12
84
85 shrq $6, %r12
86 jz .Lhandle_tail /* < 64 */
87
88 clc
89
90 /* main loop. clear in 64 byte blocks */
91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
92 /* r11: temp3, rdx: temp4, r12 loopcnt */
93 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
94 .p2align 4
95.Lloop:
96 source
97 movq (%rdi), %rbx
98 source
99 movq 8(%rdi), %r8
100 source
101 movq 16(%rdi), %r11
102 source
103 movq 24(%rdi), %rdx
104
105 source
106 movq 32(%rdi), %r10
107 source
108 movq 40(%rdi), %rbp
109 source
110 movq 48(%rdi), %r14
111 source
112 movq 56(%rdi), %r13
113
114 ignore 2f
115 prefetcht0 5*64(%rdi)
1162:
117 adcq %rbx, %rax
118 adcq %r8, %rax
119 adcq %r11, %rax
120 adcq %rdx, %rax
121 adcq %r10, %rax
122 adcq %rbp, %rax
123 adcq %r14, %rax
124 adcq %r13, %rax
125
126 decl %r12d
127
128 dest
129 movq %rbx, (%rsi)
130 dest
131 movq %r8, 8(%rsi)
132 dest
133 movq %r11, 16(%rsi)
134 dest
135 movq %rdx, 24(%rsi)
136
137 dest
138 movq %r10, 32(%rsi)
139 dest
140 movq %rbp, 40(%rsi)
141 dest
142 movq %r14, 48(%rsi)
143 dest
144 movq %r13, 56(%rsi)
145
1463:
147
148 leaq 64(%rdi), %rdi
149 leaq 64(%rsi), %rsi
150
151 jnz .Lloop
152
153 adcq %r9, %rax
154
155 /* do last up to 56 bytes */
156.Lhandle_tail:
157 /* ecx: count */
158 movl %ecx, %r10d
159 andl $63, %ecx
160 shrl $3, %ecx
161 jz .Lfold
162 clc
163 .p2align 4
164.Lloop_8:
165 source
166 movq (%rdi), %rbx
167 adcq %rbx, %rax
168 decl %ecx
169 dest
170 movq %rbx, (%rsi)
171 leaq 8(%rsi), %rsi /* preserve carry */
172 leaq 8(%rdi), %rdi
173 jnz .Lloop_8
174 adcq %r9, %rax /* add in carry */
175
176.Lfold:
177 /* reduce checksum to 32bits */
178 movl %eax, %ebx
179 shrq $32, %rax
180 addl %ebx, %eax
181 adcl %r9d, %eax
182
183 /* do last up to 6 bytes */
184.Lhandle_7:
185 movl %r10d, %ecx
186 andl $7, %ecx
187 shrl $1, %ecx
188 jz .Lhandle_1
189 movl $2, %edx
190 xorl %ebx, %ebx
191 clc
192 .p2align 4
193.Lloop_1:
194 source
195 movw (%rdi), %bx
196 adcl %ebx, %eax
197 decl %ecx
198 dest
199 movw %bx, (%rsi)
200 leaq 2(%rdi), %rdi
201 leaq 2(%rsi), %rsi
202 jnz .Lloop_1
203 adcl %r9d, %eax /* add in carry */
204
205 /* handle last odd byte */
206.Lhandle_1:
207 testl $1, %r10d
208 jz .Lende
209 xorl %ebx, %ebx
210 source
211 movb (%rdi), %bl
212 dest
213 movb %bl, (%rsi)
214 addl %ebx, %eax
215 adcl %r9d, %eax /* carry */
216
217 CFI_REMEMBER_STATE
218.Lende:
219 movq 2*8(%rsp), %rbx
220 CFI_RESTORE rbx
221 movq 3*8(%rsp), %r12
222 CFI_RESTORE r12
223 movq 4*8(%rsp), %r14
224 CFI_RESTORE r14
225 movq 5*8(%rsp), %r13
226 CFI_RESTORE r13
227 movq 6*8(%rsp), %rbp
228 CFI_RESTORE rbp
229 addq $7*8, %rsp
230 CFI_ADJUST_CFA_OFFSET -7*8
231 ret
232 CFI_RESTORE_STATE
233
234 /* Exception handlers. Very simple, zeroing is done in the wrappers */
235.Lbad_source:
236 movq (%rsp), %rax
237 testq %rax, %rax
238 jz .Lende
239 movl $-EFAULT, (%rax)
240 jmp .Lende
241
242.Lbad_dest:
243 movq 8(%rsp), %rax
244 testq %rax, %rax
245 jz .Lende
246 movl $-EFAULT, (%rax)
247 jmp .Lende
248 CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)