Loading...
1/* SPDX-License-Identifier: GPL-2.0+
2 *
3 * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
4 *
5 * INET An implementation of the TCP/IP protocol suite for the LINUX
6 * operating system. INET is implemented using the BSD Socket
7 * interface as the means of communication with the user level.
8 *
9 * IP/TCP/UDP checksumming routines
10 *
11 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Tom May, <ftom@netcom.com>
14 * Pentium Pro/II routines:
15 * Alexander Kjeldaas <astor@guardian.no>
16 * Finn Arne Gangstad <finnag@guardian.no>
17 * Lots of code moved from tcp.c and ip.c; see those files
18 * for more names.
19 *
20 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
21 * handling.
22 * Andi Kleen, add zeroing on error
23 * converted to pure assembler
24 *
25 * SuperH version: Copyright (C) 1999 Niibe Yutaka
26 */
27
28#include <asm/errno.h>
29#include <linux/linkage.h>
30
31/*
32 * computes a partial checksum, e.g. for TCP/UDP fragments
33 */
34
35/*
36 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
37 */
38
39.text
40ENTRY(csum_partial)
41 /*
42 * Experiments with Ethernet and SLIP connections show that buff
43 * is aligned on either a 2-byte or 4-byte boundary. We get at
44 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
45 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
46 * alignment for the unrolled loop.
47 */
48 mov r4, r0
49 tst #3, r0 ! Check alignment.
50 bt/s 2f ! Jump if alignment is ok.
51 mov r4, r7 ! Keep a copy to check for alignment
52 !
53 tst #1, r0 ! Check alignment.
54 bt 21f ! Jump if alignment is boundary of 2bytes.
55
56 ! buf is odd
57 tst r5, r5
58 add #-1, r5
59 bt 9f
60 mov.b @r4+, r0
61 extu.b r0, r0
62 addc r0, r6 ! t=0 from previous tst
63 mov r6, r0
64 shll8 r6
65 shlr16 r0
66 shlr8 r0
67 or r0, r6
68 mov r4, r0
69 tst #2, r0
70 bt 2f
7121:
72 ! buf is 2 byte aligned (len could be 0)
73 add #-2, r5 ! Alignment uses up two bytes.
74 cmp/pz r5 !
75 bt/s 1f ! Jump if we had at least two bytes.
76 clrt
77 bra 6f
78 add #2, r5 ! r5 was < 2. Deal with it.
791:
80 mov.w @r4+, r0
81 extu.w r0, r0
82 addc r0, r6
83 bf 2f
84 add #1, r6
852:
86 ! buf is 4 byte aligned (len could be 0)
87 mov r5, r1
88 mov #-5, r0
89 shld r0, r1
90 tst r1, r1
91 bt/s 4f ! if it's =0, go to 4f
92 clrt
93 .align 2
943:
95 mov.l @r4+, r0
96 mov.l @r4+, r2
97 mov.l @r4+, r3
98 addc r0, r6
99 mov.l @r4+, r0
100 addc r2, r6
101 mov.l @r4+, r2
102 addc r3, r6
103 mov.l @r4+, r3
104 addc r0, r6
105 mov.l @r4+, r0
106 addc r2, r6
107 mov.l @r4+, r2
108 addc r3, r6
109 addc r0, r6
110 addc r2, r6
111 movt r0
112 dt r1
113 bf/s 3b
114 cmp/eq #1, r0
115 ! here, we know r1==0
116 addc r1, r6 ! add carry to r6
1174:
118 mov r5, r0
119 and #0x1c, r0
120 tst r0, r0
121 bt 6f
122 ! 4 bytes or more remaining
123 mov r0, r1
124 shlr2 r1
125 mov #0, r2
1265:
127 addc r2, r6
128 mov.l @r4+, r2
129 movt r0
130 dt r1
131 bf/s 5b
132 cmp/eq #1, r0
133 addc r2, r6
134 addc r1, r6 ! r1==0 here, so it means add carry-bit
1356:
136 ! 3 bytes or less remaining
137 mov #3, r0
138 and r0, r5
139 tst r5, r5
140 bt 9f ! if it's =0 go to 9f
141 mov #2, r1
142 cmp/hs r1, r5
143 bf 7f
144 mov.w @r4+, r0
145 extu.w r0, r0
146 cmp/eq r1, r5
147 bt/s 8f
148 clrt
149 shll16 r0
150 addc r0, r6
1517:
152 mov.b @r4+, r0
153 extu.b r0, r0
154#ifndef __LITTLE_ENDIAN__
155 shll8 r0
156#endif
1578:
158 addc r0, r6
159 mov #0, r0
160 addc r0, r6
1619:
162 ! Check if the buffer was misaligned, if so realign sum
163 mov r7, r0
164 tst #1, r0
165 bt 10f
166 mov r6, r0
167 shll8 r6
168 shlr16 r0
169 shlr8 r0
170 or r0, r6
17110:
172 rts
173 mov r6, r0
174
175/*
176unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
177 */
178
179/*
180 * Copy from ds while checksumming, otherwise like csum_partial with initial
181 * sum being ~0U
182 */
183
184#define EXC(...) \
185 9999: __VA_ARGS__ ; \
186 .section __ex_table, "a"; \
187 .long 9999b, 6001f ; \
188 .previous
189
190!
191! r4: const char *SRC
192! r5: char *DST
193! r6: int LEN
194!
195ENTRY(csum_partial_copy_generic)
196 mov #-1,r7
197 mov #3,r0 ! Check src and dest are equally aligned
198 mov r4,r1
199 and r0,r1
200 and r5,r0
201 cmp/eq r1,r0
202 bf 3f ! Different alignments, use slow version
203 tst #1,r0 ! Check dest word aligned
204 bf 3f ! If not, do it the slow way
205
206 mov #2,r0
207 tst r0,r5 ! Check dest alignment.
208 bt 2f ! Jump if alignment is ok.
209 add #-2,r6 ! Alignment uses up two bytes.
210 cmp/pz r6 ! Jump if we had at least two bytes.
211 bt/s 1f
212 clrt
213 add #2,r6 ! r6 was < 2. Deal with it.
214 bra 4f
215 mov r6,r2
216
2173: ! Handle different src and dest alignments.
218 ! This is not common, so simple byte by byte copy will do.
219 mov r6,r2
220 shlr r6
221 tst r6,r6
222 bt 4f
223 clrt
224 .align 2
2255:
226EXC( mov.b @r4+,r1 )
227EXC( mov.b @r4+,r0 )
228 extu.b r1,r1
229EXC( mov.b r1,@r5 )
230EXC( mov.b r0,@(1,r5) )
231 extu.b r0,r0
232 add #2,r5
233
234#ifdef __LITTLE_ENDIAN__
235 shll8 r0
236#else
237 shll8 r1
238#endif
239 or r1,r0
240
241 addc r0,r7
242 movt r0
243 dt r6
244 bf/s 5b
245 cmp/eq #1,r0
246 mov #0,r0
247 addc r0, r7
248
249 mov r2, r0
250 tst #1, r0
251 bt 7f
252 bra 5f
253 clrt
254
255 ! src and dest equally aligned, but to a two byte boundary.
256 ! Handle first two bytes as a special case
257 .align 2
2581:
259EXC( mov.w @r4+,r0 )
260EXC( mov.w r0,@r5 )
261 add #2,r5
262 extu.w r0,r0
263 addc r0,r7
264 mov #0,r0
265 addc r0,r7
2662:
267 mov r6,r2
268 mov #-5,r0
269 shld r0,r6
270 tst r6,r6
271 bt/s 2f
272 clrt
273 .align 2
2741:
275EXC( mov.l @r4+,r0 )
276EXC( mov.l @r4+,r1 )
277 addc r0,r7
278EXC( mov.l r0,@r5 )
279EXC( mov.l r1,@(4,r5) )
280 addc r1,r7
281
282EXC( mov.l @r4+,r0 )
283EXC( mov.l @r4+,r1 )
284 addc r0,r7
285EXC( mov.l r0,@(8,r5) )
286EXC( mov.l r1,@(12,r5) )
287 addc r1,r7
288
289EXC( mov.l @r4+,r0 )
290EXC( mov.l @r4+,r1 )
291 addc r0,r7
292EXC( mov.l r0,@(16,r5) )
293EXC( mov.l r1,@(20,r5) )
294 addc r1,r7
295
296EXC( mov.l @r4+,r0 )
297EXC( mov.l @r4+,r1 )
298 addc r0,r7
299EXC( mov.l r0,@(24,r5) )
300EXC( mov.l r1,@(28,r5) )
301 addc r1,r7
302 add #32,r5
303 movt r0
304 dt r6
305 bf/s 1b
306 cmp/eq #1,r0
307 mov #0,r0
308 addc r0,r7
309
3102: mov r2,r6
311 mov #0x1c,r0
312 and r0,r6
313 cmp/pl r6
314 bf/s 4f
315 clrt
316 shlr2 r6
3173:
318EXC( mov.l @r4+,r0 )
319 addc r0,r7
320EXC( mov.l r0,@r5 )
321 add #4,r5
322 movt r0
323 dt r6
324 bf/s 3b
325 cmp/eq #1,r0
326 mov #0,r0
327 addc r0,r7
3284: mov r2,r6
329 mov #3,r0
330 and r0,r6
331 cmp/pl r6
332 bf 7f
333 mov #2,r1
334 cmp/hs r1,r6
335 bf 5f
336EXC( mov.w @r4+,r0 )
337EXC( mov.w r0,@r5 )
338 extu.w r0,r0
339 add #2,r5
340 cmp/eq r1,r6
341 bt/s 6f
342 clrt
343 shll16 r0
344 addc r0,r7
3455:
346EXC( mov.b @r4+,r0 )
347EXC( mov.b r0,@r5 )
348 extu.b r0,r0
349#ifndef __LITTLE_ENDIAN__
350 shll8 r0
351#endif
3526: addc r0,r7
353 mov #0,r0
354 addc r0,r7
3557:
356
357# Exception handler:
358.section .fixup, "ax"
359
3606001:
361 rts
362 mov #0,r0
363.previous
364 rts
365 mov r7,r0
1/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
2 *
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * IP/TCP/UDP checksumming routines
8 *
9 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11 * Tom May, <ftom@netcom.com>
12 * Pentium Pro/II routines:
13 * Alexander Kjeldaas <astor@guardian.no>
14 * Finn Arne Gangstad <finnag@guardian.no>
15 * Lots of code moved from tcp.c and ip.c; see those files
16 * for more names.
17 *
18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19 * handling.
20 * Andi Kleen, add zeroing on error
21 * converted to pure assembler
22 *
23 * SuperH version: Copyright (C) 1999 Niibe Yutaka
24 *
25 * This program is free software; you can redistribute it and/or
26 * modify it under the terms of the GNU General Public License
27 * as published by the Free Software Foundation; either version
28 * 2 of the License, or (at your option) any later version.
29 */
30
31#include <asm/errno.h>
32#include <linux/linkage.h>
33
34/*
35 * computes a partial checksum, e.g. for TCP/UDP fragments
36 */
37
38/*
39 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
40 */
41
42.text
43ENTRY(csum_partial)
44 /*
45 * Experiments with Ethernet and SLIP connections show that buff
46 * is aligned on either a 2-byte or 4-byte boundary. We get at
47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49 * alignment for the unrolled loop.
50 */
51 mov r4, r0
52 tst #3, r0 ! Check alignment.
53 bt/s 2f ! Jump if alignment is ok.
54 mov r4, r7 ! Keep a copy to check for alignment
55 !
56 tst #1, r0 ! Check alignment.
57 bt 21f ! Jump if alignment is boundary of 2bytes.
58
59 ! buf is odd
60 tst r5, r5
61 add #-1, r5
62 bt 9f
63 mov.b @r4+, r0
64 extu.b r0, r0
65 addc r0, r6 ! t=0 from previous tst
66 mov r6, r0
67 shll8 r6
68 shlr16 r0
69 shlr8 r0
70 or r0, r6
71 mov r4, r0
72 tst #2, r0
73 bt 2f
7421:
75 ! buf is 2 byte aligned (len could be 0)
76 add #-2, r5 ! Alignment uses up two bytes.
77 cmp/pz r5 !
78 bt/s 1f ! Jump if we had at least two bytes.
79 clrt
80 bra 6f
81 add #2, r5 ! r5 was < 2. Deal with it.
821:
83 mov.w @r4+, r0
84 extu.w r0, r0
85 addc r0, r6
86 bf 2f
87 add #1, r6
882:
89 ! buf is 4 byte aligned (len could be 0)
90 mov r5, r1
91 mov #-5, r0
92 shld r0, r1
93 tst r1, r1
94 bt/s 4f ! if it's =0, go to 4f
95 clrt
96 .align 2
973:
98 mov.l @r4+, r0
99 mov.l @r4+, r2
100 mov.l @r4+, r3
101 addc r0, r6
102 mov.l @r4+, r0
103 addc r2, r6
104 mov.l @r4+, r2
105 addc r3, r6
106 mov.l @r4+, r3
107 addc r0, r6
108 mov.l @r4+, r0
109 addc r2, r6
110 mov.l @r4+, r2
111 addc r3, r6
112 addc r0, r6
113 addc r2, r6
114 movt r0
115 dt r1
116 bf/s 3b
117 cmp/eq #1, r0
118 ! here, we know r1==0
119 addc r1, r6 ! add carry to r6
1204:
121 mov r5, r0
122 and #0x1c, r0
123 tst r0, r0
124 bt 6f
125 ! 4 bytes or more remaining
126 mov r0, r1
127 shlr2 r1
128 mov #0, r2
1295:
130 addc r2, r6
131 mov.l @r4+, r2
132 movt r0
133 dt r1
134 bf/s 5b
135 cmp/eq #1, r0
136 addc r2, r6
137 addc r1, r6 ! r1==0 here, so it means add carry-bit
1386:
139 ! 3 bytes or less remaining
140 mov #3, r0
141 and r0, r5
142 tst r5, r5
143 bt 9f ! if it's =0 go to 9f
144 mov #2, r1
145 cmp/hs r1, r5
146 bf 7f
147 mov.w @r4+, r0
148 extu.w r0, r0
149 cmp/eq r1, r5
150 bt/s 8f
151 clrt
152 shll16 r0
153 addc r0, r6
1547:
155 mov.b @r4+, r0
156 extu.b r0, r0
157#ifndef __LITTLE_ENDIAN__
158 shll8 r0
159#endif
1608:
161 addc r0, r6
162 mov #0, r0
163 addc r0, r6
1649:
165 ! Check if the buffer was misaligned, if so realign sum
166 mov r7, r0
167 tst #1, r0
168 bt 10f
169 mov r6, r0
170 shll8 r6
171 shlr16 r0
172 shlr8 r0
173 or r0, r6
17410:
175 rts
176 mov r6, r0
177
178/*
179unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
180 int sum, int *src_err_ptr, int *dst_err_ptr)
181 */
182
183/*
184 * Copy from ds while checksumming, otherwise like csum_partial
185 *
186 * The macros SRC and DST specify the type of access for the instruction.
187 * thus we can call a custom exception handler for all access types.
188 *
189 * FIXME: could someone double-check whether I haven't mixed up some SRC and
190 * DST definitions? It's damn hard to trigger all cases. I hope I got
191 * them all but there's no guarantee.
192 */
193
194#define SRC(...) \
195 9999: __VA_ARGS__ ; \
196 .section __ex_table, "a"; \
197 .long 9999b, 6001f ; \
198 .previous
199
200#define DST(...) \
201 9999: __VA_ARGS__ ; \
202 .section __ex_table, "a"; \
203 .long 9999b, 6002f ; \
204 .previous
205
206!
207! r4: const char *SRC
208! r5: char *DST
209! r6: int LEN
210! r7: int SUM
211!
212! on stack:
213! int *SRC_ERR_PTR
214! int *DST_ERR_PTR
215!
216ENTRY(csum_partial_copy_generic)
217 mov.l r5,@-r15
218 mov.l r6,@-r15
219
220 mov #3,r0 ! Check src and dest are equally aligned
221 mov r4,r1
222 and r0,r1
223 and r5,r0
224 cmp/eq r1,r0
225 bf 3f ! Different alignments, use slow version
226 tst #1,r0 ! Check dest word aligned
227 bf 3f ! If not, do it the slow way
228
229 mov #2,r0
230 tst r0,r5 ! Check dest alignment.
231 bt 2f ! Jump if alignment is ok.
232 add #-2,r6 ! Alignment uses up two bytes.
233 cmp/pz r6 ! Jump if we had at least two bytes.
234 bt/s 1f
235 clrt
236 add #2,r6 ! r6 was < 2. Deal with it.
237 bra 4f
238 mov r6,r2
239
2403: ! Handle different src and dest alignments.
241 ! This is not common, so simple byte by byte copy will do.
242 mov r6,r2
243 shlr r6
244 tst r6,r6
245 bt 4f
246 clrt
247 .align 2
2485:
249SRC( mov.b @r4+,r1 )
250SRC( mov.b @r4+,r0 )
251 extu.b r1,r1
252DST( mov.b r1,@r5 )
253DST( mov.b r0,@(1,r5) )
254 extu.b r0,r0
255 add #2,r5
256
257#ifdef __LITTLE_ENDIAN__
258 shll8 r0
259#else
260 shll8 r1
261#endif
262 or r1,r0
263
264 addc r0,r7
265 movt r0
266 dt r6
267 bf/s 5b
268 cmp/eq #1,r0
269 mov #0,r0
270 addc r0, r7
271
272 mov r2, r0
273 tst #1, r0
274 bt 7f
275 bra 5f
276 clrt
277
278 ! src and dest equally aligned, but to a two byte boundary.
279 ! Handle first two bytes as a special case
280 .align 2
2811:
282SRC( mov.w @r4+,r0 )
283DST( mov.w r0,@r5 )
284 add #2,r5
285 extu.w r0,r0
286 addc r0,r7
287 mov #0,r0
288 addc r0,r7
2892:
290 mov r6,r2
291 mov #-5,r0
292 shld r0,r6
293 tst r6,r6
294 bt/s 2f
295 clrt
296 .align 2
2971:
298SRC( mov.l @r4+,r0 )
299SRC( mov.l @r4+,r1 )
300 addc r0,r7
301DST( mov.l r0,@r5 )
302DST( mov.l r1,@(4,r5) )
303 addc r1,r7
304
305SRC( mov.l @r4+,r0 )
306SRC( mov.l @r4+,r1 )
307 addc r0,r7
308DST( mov.l r0,@(8,r5) )
309DST( mov.l r1,@(12,r5) )
310 addc r1,r7
311
312SRC( mov.l @r4+,r0 )
313SRC( mov.l @r4+,r1 )
314 addc r0,r7
315DST( mov.l r0,@(16,r5) )
316DST( mov.l r1,@(20,r5) )
317 addc r1,r7
318
319SRC( mov.l @r4+,r0 )
320SRC( mov.l @r4+,r1 )
321 addc r0,r7
322DST( mov.l r0,@(24,r5) )
323DST( mov.l r1,@(28,r5) )
324 addc r1,r7
325 add #32,r5
326 movt r0
327 dt r6
328 bf/s 1b
329 cmp/eq #1,r0
330 mov #0,r0
331 addc r0,r7
332
3332: mov r2,r6
334 mov #0x1c,r0
335 and r0,r6
336 cmp/pl r6
337 bf/s 4f
338 clrt
339 shlr2 r6
3403:
341SRC( mov.l @r4+,r0 )
342 addc r0,r7
343DST( mov.l r0,@r5 )
344 add #4,r5
345 movt r0
346 dt r6
347 bf/s 3b
348 cmp/eq #1,r0
349 mov #0,r0
350 addc r0,r7
3514: mov r2,r6
352 mov #3,r0
353 and r0,r6
354 cmp/pl r6
355 bf 7f
356 mov #2,r1
357 cmp/hs r1,r6
358 bf 5f
359SRC( mov.w @r4+,r0 )
360DST( mov.w r0,@r5 )
361 extu.w r0,r0
362 add #2,r5
363 cmp/eq r1,r6
364 bt/s 6f
365 clrt
366 shll16 r0
367 addc r0,r7
3685:
369SRC( mov.b @r4+,r0 )
370DST( mov.b r0,@r5 )
371 extu.b r0,r0
372#ifndef __LITTLE_ENDIAN__
373 shll8 r0
374#endif
3756: addc r0,r7
376 mov #0,r0
377 addc r0,r7
3787:
3795000:
380
381# Exception handler:
382.section .fixup, "ax"
383
3846001:
385 mov.l @(8,r15),r0 ! src_err_ptr
386 mov #-EFAULT,r1
387 mov.l r1,@r0
388
389 ! zero the complete destination - computing the rest
390 ! is too much work
391 mov.l @(4,r15),r5 ! dst
392 mov.l @r15,r6 ! len
393 mov #0,r7
3941: mov.b r7,@r5
395 dt r6
396 bf/s 1b
397 add #1,r5
398 mov.l 8000f,r0
399 jmp @r0
400 nop
401 .align 2
4028000: .long 5000b
403
4046002:
405 mov.l @(12,r15),r0 ! dst_err_ptr
406 mov #-EFAULT,r1
407 mov.l r1,@r0
408 mov.l 8001f,r0
409 jmp @r0
410 nop
411 .align 2
4128001: .long 5000b
413
414.previous
415 add #8,r15
416 rts
417 mov r7,r0