Loading...
1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/errno.h>
10#include <asm/asm.h>
11
12/*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi source
19 * rsi destination
20 * edx len (32bit)
21 *
22 * Output
23 * eax 64bit sum. undefined in case of exception.
24 *
25 * Wrappers need to take care of valid exception sum and zeroing.
26 * They also should align source or destination to 8 bytes.
27 */
28
29 .macro source
3010:
31 _ASM_EXTABLE_UA(10b, .Lfault)
32 .endm
33
34 .macro dest
3520:
36 _ASM_EXTABLE_UA(20b, .Lfault)
37 .endm
38
39SYM_FUNC_START(csum_partial_copy_generic)
40 subq $5*8, %rsp
41 movq %rbx, 0*8(%rsp)
42 movq %r12, 1*8(%rsp)
43 movq %r14, 2*8(%rsp)
44 movq %r13, 3*8(%rsp)
45 movq %r15, 4*8(%rsp)
46
47 movl $-1, %eax
48 xorl %r9d, %r9d
49 movl %edx, %ecx
50 cmpl $8, %ecx
51 jb .Lshort
52
53 testb $7, %sil
54 jne .Lunaligned
55.Laligned:
56 movl %ecx, %r12d
57
58 shrq $6, %r12
59 jz .Lhandle_tail /* < 64 */
60
61 clc
62
63 /* main loop. clear in 64 byte blocks */
64 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
65 /* r11: temp3, rdx: temp4, r12 loopcnt */
66 /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
67 .p2align 4
68.Lloop:
69 source
70 movq (%rdi), %rbx
71 source
72 movq 8(%rdi), %r8
73 source
74 movq 16(%rdi), %r11
75 source
76 movq 24(%rdi), %rdx
77
78 source
79 movq 32(%rdi), %r10
80 source
81 movq 40(%rdi), %r15
82 source
83 movq 48(%rdi), %r14
84 source
85 movq 56(%rdi), %r13
86
8730:
88 /*
89 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
90 * potentially unmapped kernel address.
91 */
92 _ASM_EXTABLE(30b, 2f)
93 prefetcht0 5*64(%rdi)
942:
95 adcq %rbx, %rax
96 adcq %r8, %rax
97 adcq %r11, %rax
98 adcq %rdx, %rax
99 adcq %r10, %rax
100 adcq %r15, %rax
101 adcq %r14, %rax
102 adcq %r13, %rax
103
104 decl %r12d
105
106 dest
107 movq %rbx, (%rsi)
108 dest
109 movq %r8, 8(%rsi)
110 dest
111 movq %r11, 16(%rsi)
112 dest
113 movq %rdx, 24(%rsi)
114
115 dest
116 movq %r10, 32(%rsi)
117 dest
118 movq %r15, 40(%rsi)
119 dest
120 movq %r14, 48(%rsi)
121 dest
122 movq %r13, 56(%rsi)
123
124 leaq 64(%rdi), %rdi
125 leaq 64(%rsi), %rsi
126
127 jnz .Lloop
128
129 adcq %r9, %rax
130
131 /* do last up to 56 bytes */
132.Lhandle_tail:
133 /* ecx: count, rcx.63: the end result needs to be rol8 */
134 movq %rcx, %r10
135 andl $63, %ecx
136 shrl $3, %ecx
137 jz .Lfold
138 clc
139 .p2align 4
140.Lloop_8:
141 source
142 movq (%rdi), %rbx
143 adcq %rbx, %rax
144 decl %ecx
145 dest
146 movq %rbx, (%rsi)
147 leaq 8(%rsi), %rsi /* preserve carry */
148 leaq 8(%rdi), %rdi
149 jnz .Lloop_8
150 adcq %r9, %rax /* add in carry */
151
152.Lfold:
153 /* reduce checksum to 32bits */
154 movl %eax, %ebx
155 shrq $32, %rax
156 addl %ebx, %eax
157 adcl %r9d, %eax
158
159 /* do last up to 6 bytes */
160.Lhandle_7:
161 movl %r10d, %ecx
162 andl $7, %ecx
163.L1: /* .Lshort rejoins the common path here */
164 shrl $1, %ecx
165 jz .Lhandle_1
166 movl $2, %edx
167 xorl %ebx, %ebx
168 clc
169 .p2align 4
170.Lloop_1:
171 source
172 movw (%rdi), %bx
173 adcl %ebx, %eax
174 decl %ecx
175 dest
176 movw %bx, (%rsi)
177 leaq 2(%rdi), %rdi
178 leaq 2(%rsi), %rsi
179 jnz .Lloop_1
180 adcl %r9d, %eax /* add in carry */
181
182 /* handle last odd byte */
183.Lhandle_1:
184 testb $1, %r10b
185 jz .Lende
186 xorl %ebx, %ebx
187 source
188 movb (%rdi), %bl
189 dest
190 movb %bl, (%rsi)
191 addl %ebx, %eax
192 adcl %r9d, %eax /* carry */
193
194.Lende:
195 testq %r10, %r10
196 js .Lwas_odd
197.Lout:
198 movq 0*8(%rsp), %rbx
199 movq 1*8(%rsp), %r12
200 movq 2*8(%rsp), %r14
201 movq 3*8(%rsp), %r13
202 movq 4*8(%rsp), %r15
203 addq $5*8, %rsp
204 RET
205.Lshort:
206 movl %ecx, %r10d
207 jmp .L1
208.Lunaligned:
209 xorl %ebx, %ebx
210 testb $1, %sil
211 jne .Lodd
2121: testb $2, %sil
213 je 2f
214 source
215 movw (%rdi), %bx
216 dest
217 movw %bx, (%rsi)
218 leaq 2(%rdi), %rdi
219 subq $2, %rcx
220 leaq 2(%rsi), %rsi
221 addq %rbx, %rax
2222: testb $4, %sil
223 je .Laligned
224 source
225 movl (%rdi), %ebx
226 dest
227 movl %ebx, (%rsi)
228 leaq 4(%rdi), %rdi
229 subq $4, %rcx
230 leaq 4(%rsi), %rsi
231 addq %rbx, %rax
232 jmp .Laligned
233
234.Lodd:
235 source
236 movb (%rdi), %bl
237 dest
238 movb %bl, (%rsi)
239 leaq 1(%rdi), %rdi
240 leaq 1(%rsi), %rsi
241 /* decrement, set MSB */
242 leaq -1(%rcx, %rcx), %rcx
243 rorq $1, %rcx
244 shll $8, %ebx
245 addq %rbx, %rax
246 jmp 1b
247
248.Lwas_odd:
249 roll $8, %eax
250 jmp .Lout
251
252 /* Exception: just return 0 */
253.Lfault:
254 xorl %eax, %eax
255 jmp .Lout
256SYM_FUNC_END(csum_partial_copy_generic)
1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/errno.h>
10#include <asm/asm.h>
11
12/*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi source
19 * rsi destination
20 * edx len (32bit)
21 * ecx sum (32bit)
22 * r8 src_err_ptr (int)
23 * r9 dst_err_ptr (int)
24 *
25 * Output
26 * eax 64bit sum. undefined in case of exception.
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes.
30 */
31
32 .macro source
3310:
34 _ASM_EXTABLE_UA(10b, .Lbad_source)
35 .endm
36
37 .macro dest
3820:
39 _ASM_EXTABLE_UA(20b, .Lbad_dest)
40 .endm
41
42 /*
43 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
44 * potentially unmapped kernel address.
45 */
46 .macro ignore L=.Lignore
4730:
48 _ASM_EXTABLE(30b, \L)
49 .endm
50
51
52SYM_FUNC_START(csum_partial_copy_generic)
53 cmpl $3*64, %edx
54 jle .Lignore
55
56.Lignore:
57 subq $7*8, %rsp
58 movq %rbx, 2*8(%rsp)
59 movq %r12, 3*8(%rsp)
60 movq %r14, 4*8(%rsp)
61 movq %r13, 5*8(%rsp)
62 movq %r15, 6*8(%rsp)
63
64 movq %r8, (%rsp)
65 movq %r9, 1*8(%rsp)
66
67 movl %ecx, %eax
68 movl %edx, %ecx
69
70 xorl %r9d, %r9d
71 movq %rcx, %r12
72
73 shrq $6, %r12
74 jz .Lhandle_tail /* < 64 */
75
76 clc
77
78 /* main loop. clear in 64 byte blocks */
79 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
80 /* r11: temp3, rdx: temp4, r12 loopcnt */
81 /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
82 .p2align 4
83.Lloop:
84 source
85 movq (%rdi), %rbx
86 source
87 movq 8(%rdi), %r8
88 source
89 movq 16(%rdi), %r11
90 source
91 movq 24(%rdi), %rdx
92
93 source
94 movq 32(%rdi), %r10
95 source
96 movq 40(%rdi), %r15
97 source
98 movq 48(%rdi), %r14
99 source
100 movq 56(%rdi), %r13
101
102 ignore 2f
103 prefetcht0 5*64(%rdi)
1042:
105 adcq %rbx, %rax
106 adcq %r8, %rax
107 adcq %r11, %rax
108 adcq %rdx, %rax
109 adcq %r10, %rax
110 adcq %r15, %rax
111 adcq %r14, %rax
112 adcq %r13, %rax
113
114 decl %r12d
115
116 dest
117 movq %rbx, (%rsi)
118 dest
119 movq %r8, 8(%rsi)
120 dest
121 movq %r11, 16(%rsi)
122 dest
123 movq %rdx, 24(%rsi)
124
125 dest
126 movq %r10, 32(%rsi)
127 dest
128 movq %r15, 40(%rsi)
129 dest
130 movq %r14, 48(%rsi)
131 dest
132 movq %r13, 56(%rsi)
133
1343:
135
136 leaq 64(%rdi), %rdi
137 leaq 64(%rsi), %rsi
138
139 jnz .Lloop
140
141 adcq %r9, %rax
142
143 /* do last up to 56 bytes */
144.Lhandle_tail:
145 /* ecx: count */
146 movl %ecx, %r10d
147 andl $63, %ecx
148 shrl $3, %ecx
149 jz .Lfold
150 clc
151 .p2align 4
152.Lloop_8:
153 source
154 movq (%rdi), %rbx
155 adcq %rbx, %rax
156 decl %ecx
157 dest
158 movq %rbx, (%rsi)
159 leaq 8(%rsi), %rsi /* preserve carry */
160 leaq 8(%rdi), %rdi
161 jnz .Lloop_8
162 adcq %r9, %rax /* add in carry */
163
164.Lfold:
165 /* reduce checksum to 32bits */
166 movl %eax, %ebx
167 shrq $32, %rax
168 addl %ebx, %eax
169 adcl %r9d, %eax
170
171 /* do last up to 6 bytes */
172.Lhandle_7:
173 movl %r10d, %ecx
174 andl $7, %ecx
175 shrl $1, %ecx
176 jz .Lhandle_1
177 movl $2, %edx
178 xorl %ebx, %ebx
179 clc
180 .p2align 4
181.Lloop_1:
182 source
183 movw (%rdi), %bx
184 adcl %ebx, %eax
185 decl %ecx
186 dest
187 movw %bx, (%rsi)
188 leaq 2(%rdi), %rdi
189 leaq 2(%rsi), %rsi
190 jnz .Lloop_1
191 adcl %r9d, %eax /* add in carry */
192
193 /* handle last odd byte */
194.Lhandle_1:
195 testb $1, %r10b
196 jz .Lende
197 xorl %ebx, %ebx
198 source
199 movb (%rdi), %bl
200 dest
201 movb %bl, (%rsi)
202 addl %ebx, %eax
203 adcl %r9d, %eax /* carry */
204
205.Lende:
206 movq 2*8(%rsp), %rbx
207 movq 3*8(%rsp), %r12
208 movq 4*8(%rsp), %r14
209 movq 5*8(%rsp), %r13
210 movq 6*8(%rsp), %r15
211 addq $7*8, %rsp
212 ret
213
214 /* Exception handlers. Very simple, zeroing is done in the wrappers */
215.Lbad_source:
216 movq (%rsp), %rax
217 testq %rax, %rax
218 jz .Lende
219 movl $-EFAULT, (%rax)
220 jmp .Lende
221
222.Lbad_dest:
223 movq 8(%rsp), %rax
224 testq %rax, %rax
225 jz .Lende
226 movl $-EFAULT, (%rax)
227 jmp .Lende
228SYM_FUNC_END(csum_partial_copy_generic)