Loading...
1/*
2 * linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * unsigned int
13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14 * r0 = src, r1 = dst, r2 = len, r3 = sum
15 * Returns : r0 = checksum
16 *
17 * Note that 'tst' and 'teq' preserve the carry flag.
18 */
19
20src .req r0
21dst .req r1
22len .req r2
23sum .req r3
24
25.Lzero: mov r0, sum
26 load_regs
27
28 /*
29 * Align an unaligned destination pointer. We know that
30 * we have >= 8 bytes here, so we don't need to check
31 * the length. Note that the source pointer hasn't been
32 * aligned yet.
33 */
34.Ldst_unaligned:
35 tst dst, #1
36 beq .Ldst_16bit
37
38 load1b ip
39 sub len, len, #1
40 adcs sum, sum, ip, put_byte_1 @ update checksum
41 strb ip, [dst], #1
42 tst dst, #2
43 moveq pc, lr @ dst is now 32bit aligned
44
45.Ldst_16bit: load2b r8, ip
46 sub len, len, #2
47 adcs sum, sum, r8, put_byte_0
48 strb r8, [dst], #1
49 adcs sum, sum, ip, put_byte_1
50 strb ip, [dst], #1
51 mov pc, lr @ dst is now 32bit aligned
52
53 /*
54 * Handle 0 to 7 bytes, with any alignment of source and
55 * destination pointers. Note that when we get here, C = 0
56 */
57.Lless8: teq len, #0 @ check for zero count
58 beq .Lzero
59
60 /* we must have at least one byte. */
61 tst dst, #1 @ dst 16-bit aligned
62 beq .Lless8_aligned
63
64 /* Align dst */
65 load1b ip
66 sub len, len, #1
67 adcs sum, sum, ip, put_byte_1 @ update checksum
68 strb ip, [dst], #1
69 tst len, #6
70 beq .Lless8_byteonly
71
721: load2b r8, ip
73 sub len, len, #2
74 adcs sum, sum, r8, put_byte_0
75 strb r8, [dst], #1
76 adcs sum, sum, ip, put_byte_1
77 strb ip, [dst], #1
78.Lless8_aligned:
79 tst len, #6
80 bne 1b
81.Lless8_byteonly:
82 tst len, #1
83 beq .Ldone
84 load1b r8
85 adcs sum, sum, r8, put_byte_0 @ update checksum
86 strb r8, [dst], #1
87 b .Ldone
88
89FN_ENTRY
90 save_regs
91
92 cmp len, #8 @ Ensure that we have at least
93 blo .Lless8 @ 8 bytes to copy.
94
95 adds sum, sum, #0 @ C = 0
96 tst dst, #3 @ Test destination alignment
97 blne .Ldst_unaligned @ align destination, return here
98
99 /*
100 * Ok, the dst pointer is now 32bit aligned, and we know
101 * that we must have more than 4 bytes to copy. Note
102 * that C contains the carry from the dst alignment above.
103 */
104
105 tst src, #3 @ Test source alignment
106 bne .Lsrc_not_aligned
107
108 /* Routine for src & dst aligned */
109
110 bics ip, len, #15
111 beq 2f
112
1131: load4l r4, r5, r6, r7
114 stmia dst!, {r4, r5, r6, r7}
115 adcs sum, sum, r4
116 adcs sum, sum, r5
117 adcs sum, sum, r6
118 adcs sum, sum, r7
119 sub ip, ip, #16
120 teq ip, #0
121 bne 1b
122
1232: ands ip, len, #12
124 beq 4f
125 tst ip, #8
126 beq 3f
127 load2l r4, r5
128 stmia dst!, {r4, r5}
129 adcs sum, sum, r4
130 adcs sum, sum, r5
131 tst ip, #4
132 beq 4f
133
1343: load1l r4
135 str r4, [dst], #4
136 adcs sum, sum, r4
137
1384: ands len, len, #3
139 beq .Ldone
140 load1l r4
141 tst len, #2
142 mov r5, r4, get_byte_0
143 beq .Lexit
144 adcs sum, sum, r4, push #16
145 strb r5, [dst], #1
146 mov r5, r4, get_byte_1
147 strb r5, [dst], #1
148 mov r5, r4, get_byte_2
149.Lexit: tst len, #1
150 strneb r5, [dst], #1
151 andne r5, r5, #255
152 adcnes sum, sum, r5, put_byte_0
153
154 /*
155 * If the dst pointer was not 16-bit aligned, we
156 * need to rotate the checksum here to get around
157 * the inefficient byte manipulations in the
158 * architecture independent code.
159 */
160.Ldone: adc r0, sum, #0
161 ldr sum, [sp, #0] @ dst
162 tst sum, #1
163 movne r0, r0, ror #8
164 load_regs
165
166.Lsrc_not_aligned:
167 adc sum, sum, #0 @ include C from dst alignment
168 and ip, src, #3
169 bic src, src, #3
170 load1l r5
171 cmp ip, #2
172 beq .Lsrc2_aligned
173 bhi .Lsrc3_aligned
174 mov r4, r5, pull #8 @ C = 0
175 bics ip, len, #15
176 beq 2f
1771: load4l r5, r6, r7, r8
178 orr r4, r4, r5, push #24
179 mov r5, r5, pull #8
180 orr r5, r5, r6, push #24
181 mov r6, r6, pull #8
182 orr r6, r6, r7, push #24
183 mov r7, r7, pull #8
184 orr r7, r7, r8, push #24
185 stmia dst!, {r4, r5, r6, r7}
186 adcs sum, sum, r4
187 adcs sum, sum, r5
188 adcs sum, sum, r6
189 adcs sum, sum, r7
190 mov r4, r8, pull #8
191 sub ip, ip, #16
192 teq ip, #0
193 bne 1b
1942: ands ip, len, #12
195 beq 4f
196 tst ip, #8
197 beq 3f
198 load2l r5, r6
199 orr r4, r4, r5, push #24
200 mov r5, r5, pull #8
201 orr r5, r5, r6, push #24
202 stmia dst!, {r4, r5}
203 adcs sum, sum, r4
204 adcs sum, sum, r5
205 mov r4, r6, pull #8
206 tst ip, #4
207 beq 4f
2083: load1l r5
209 orr r4, r4, r5, push #24
210 str r4, [dst], #4
211 adcs sum, sum, r4
212 mov r4, r5, pull #8
2134: ands len, len, #3
214 beq .Ldone
215 mov r5, r4, get_byte_0
216 tst len, #2
217 beq .Lexit
218 adcs sum, sum, r4, push #16
219 strb r5, [dst], #1
220 mov r5, r4, get_byte_1
221 strb r5, [dst], #1
222 mov r5, r4, get_byte_2
223 b .Lexit
224
225.Lsrc2_aligned: mov r4, r5, pull #16
226 adds sum, sum, #0
227 bics ip, len, #15
228 beq 2f
2291: load4l r5, r6, r7, r8
230 orr r4, r4, r5, push #16
231 mov r5, r5, pull #16
232 orr r5, r5, r6, push #16
233 mov r6, r6, pull #16
234 orr r6, r6, r7, push #16
235 mov r7, r7, pull #16
236 orr r7, r7, r8, push #16
237 stmia dst!, {r4, r5, r6, r7}
238 adcs sum, sum, r4
239 adcs sum, sum, r5
240 adcs sum, sum, r6
241 adcs sum, sum, r7
242 mov r4, r8, pull #16
243 sub ip, ip, #16
244 teq ip, #0
245 bne 1b
2462: ands ip, len, #12
247 beq 4f
248 tst ip, #8
249 beq 3f
250 load2l r5, r6
251 orr r4, r4, r5, push #16
252 mov r5, r5, pull #16
253 orr r5, r5, r6, push #16
254 stmia dst!, {r4, r5}
255 adcs sum, sum, r4
256 adcs sum, sum, r5
257 mov r4, r6, pull #16
258 tst ip, #4
259 beq 4f
2603: load1l r5
261 orr r4, r4, r5, push #16
262 str r4, [dst], #4
263 adcs sum, sum, r4
264 mov r4, r5, pull #16
2654: ands len, len, #3
266 beq .Ldone
267 mov r5, r4, get_byte_0
268 tst len, #2
269 beq .Lexit
270 adcs sum, sum, r4
271 strb r5, [dst], #1
272 mov r5, r4, get_byte_1
273 strb r5, [dst], #1
274 tst len, #1
275 beq .Ldone
276 load1b r5
277 b .Lexit
278
279.Lsrc3_aligned: mov r4, r5, pull #24
280 adds sum, sum, #0
281 bics ip, len, #15
282 beq 2f
2831: load4l r5, r6, r7, r8
284 orr r4, r4, r5, push #8
285 mov r5, r5, pull #24
286 orr r5, r5, r6, push #8
287 mov r6, r6, pull #24
288 orr r6, r6, r7, push #8
289 mov r7, r7, pull #24
290 orr r7, r7, r8, push #8
291 stmia dst!, {r4, r5, r6, r7}
292 adcs sum, sum, r4
293 adcs sum, sum, r5
294 adcs sum, sum, r6
295 adcs sum, sum, r7
296 mov r4, r8, pull #24
297 sub ip, ip, #16
298 teq ip, #0
299 bne 1b
3002: ands ip, len, #12
301 beq 4f
302 tst ip, #8
303 beq 3f
304 load2l r5, r6
305 orr r4, r4, r5, push #8
306 mov r5, r5, pull #24
307 orr r5, r5, r6, push #8
308 stmia dst!, {r4, r5}
309 adcs sum, sum, r4
310 adcs sum, sum, r5
311 mov r4, r6, pull #24
312 tst ip, #4
313 beq 4f
3143: load1l r5
315 orr r4, r4, r5, push #8
316 str r4, [dst], #4
317 adcs sum, sum, r4
318 mov r4, r5, pull #24
3194: ands len, len, #3
320 beq .Ldone
321 mov r5, r4, get_byte_0
322 tst len, #2
323 beq .Lexit
324 strb r5, [dst], #1
325 adcs sum, sum, r4
326 load1l r4
327 mov r5, r4, get_byte_0
328 strb r5, [dst], #1
329 adcs sum, sum, r4, push #24
330 mov r5, r4, get_byte_1
331 b .Lexit
332FN_EXIT
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * linux/arch/arm/lib/csumpartialcopygeneric.S
4 *
5 * Copyright (C) 1995-2001 Russell King
6 */
7#include <asm/assembler.h>
8
9/*
10 * unsigned int
11 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12 * r0 = src, r1 = dst, r2 = len, r3 = sum
13 * Returns : r0 = checksum
14 *
15 * Note that 'tst' and 'teq' preserve the carry flag.
16 */
17
18src .req r0
19dst .req r1
20len .req r2
21sum .req r3
22
23.Lzero: mov r0, sum
24 load_regs
25
26 /*
27 * Align an unaligned destination pointer. We know that
28 * we have >= 8 bytes here, so we don't need to check
29 * the length. Note that the source pointer hasn't been
30 * aligned yet.
31 */
32.Ldst_unaligned:
33 tst dst, #1
34 beq .Ldst_16bit
35
36 load1b ip
37 sub len, len, #1
38 adcs sum, sum, ip, put_byte_1 @ update checksum
39 strb ip, [dst], #1
40 tst dst, #2
41 reteq lr @ dst is now 32bit aligned
42
43.Ldst_16bit: load2b r8, ip
44 sub len, len, #2
45 adcs sum, sum, r8, put_byte_0
46 strb r8, [dst], #1
47 adcs sum, sum, ip, put_byte_1
48 strb ip, [dst], #1
49 ret lr @ dst is now 32bit aligned
50
51 /*
52 * Handle 0 to 7 bytes, with any alignment of source and
53 * destination pointers. Note that when we get here, C = 0
54 */
55.Lless8: teq len, #0 @ check for zero count
56 beq .Lzero
57
58 /* we must have at least one byte. */
59 tst dst, #1 @ dst 16-bit aligned
60 beq .Lless8_aligned
61
62 /* Align dst */
63 load1b ip
64 sub len, len, #1
65 adcs sum, sum, ip, put_byte_1 @ update checksum
66 strb ip, [dst], #1
67 tst len, #6
68 beq .Lless8_byteonly
69
701: load2b r8, ip
71 sub len, len, #2
72 adcs sum, sum, r8, put_byte_0
73 strb r8, [dst], #1
74 adcs sum, sum, ip, put_byte_1
75 strb ip, [dst], #1
76.Lless8_aligned:
77 tst len, #6
78 bne 1b
79.Lless8_byteonly:
80 tst len, #1
81 beq .Ldone
82 load1b r8
83 adcs sum, sum, r8, put_byte_0 @ update checksum
84 strb r8, [dst], #1
85 b .Ldone
86
87FN_ENTRY
88 save_regs
89 mov sum, #-1
90
91 cmp len, #8 @ Ensure that we have at least
92 blo .Lless8 @ 8 bytes to copy.
93
94 adds sum, sum, #0 @ C = 0
95 tst dst, #3 @ Test destination alignment
96 blne .Ldst_unaligned @ align destination, return here
97
98 /*
99 * Ok, the dst pointer is now 32bit aligned, and we know
100 * that we must have more than 4 bytes to copy. Note
101 * that C contains the carry from the dst alignment above.
102 */
103
104 tst src, #3 @ Test source alignment
105 bne .Lsrc_not_aligned
106
107 /* Routine for src & dst aligned */
108
109 bics ip, len, #15
110 beq 2f
111
1121: load4l r4, r5, r6, r7
113 stmia dst!, {r4, r5, r6, r7}
114 adcs sum, sum, r4
115 adcs sum, sum, r5
116 adcs sum, sum, r6
117 adcs sum, sum, r7
118 sub ip, ip, #16
119 teq ip, #0
120 bne 1b
121
1222: ands ip, len, #12
123 beq 4f
124 tst ip, #8
125 beq 3f
126 load2l r4, r5
127 stmia dst!, {r4, r5}
128 adcs sum, sum, r4
129 adcs sum, sum, r5
130 tst ip, #4
131 beq 4f
132
1333: load1l r4
134 str r4, [dst], #4
135 adcs sum, sum, r4
136
1374: ands len, len, #3
138 beq .Ldone
139 load1l r4
140 tst len, #2
141 mov r5, r4, get_byte_0
142 beq .Lexit
143 adcs sum, sum, r4, lspush #16
144 strb r5, [dst], #1
145 mov r5, r4, get_byte_1
146 strb r5, [dst], #1
147 mov r5, r4, get_byte_2
148.Lexit: tst len, #1
149 strbne r5, [dst], #1
150 andne r5, r5, #255
151 adcsne sum, sum, r5, put_byte_0
152
153 /*
154 * If the dst pointer was not 16-bit aligned, we
155 * need to rotate the checksum here to get around
156 * the inefficient byte manipulations in the
157 * architecture independent code.
158 */
159.Ldone: adc r0, sum, #0
160 ldr sum, [sp, #0] @ dst
161 tst sum, #1
162 movne r0, r0, ror #8
163 load_regs
164
165.Lsrc_not_aligned:
166 adc sum, sum, #0 @ include C from dst alignment
167 and ip, src, #3
168 bic src, src, #3
169 load1l r5
170 cmp ip, #2
171 beq .Lsrc2_aligned
172 bhi .Lsrc3_aligned
173 mov r4, r5, lspull #8 @ C = 0
174 bics ip, len, #15
175 beq 2f
1761: load4l r5, r6, r7, r8
177 orr r4, r4, r5, lspush #24
178 mov r5, r5, lspull #8
179 orr r5, r5, r6, lspush #24
180 mov r6, r6, lspull #8
181 orr r6, r6, r7, lspush #24
182 mov r7, r7, lspull #8
183 orr r7, r7, r8, lspush #24
184 stmia dst!, {r4, r5, r6, r7}
185 adcs sum, sum, r4
186 adcs sum, sum, r5
187 adcs sum, sum, r6
188 adcs sum, sum, r7
189 mov r4, r8, lspull #8
190 sub ip, ip, #16
191 teq ip, #0
192 bne 1b
1932: ands ip, len, #12
194 beq 4f
195 tst ip, #8
196 beq 3f
197 load2l r5, r6
198 orr r4, r4, r5, lspush #24
199 mov r5, r5, lspull #8
200 orr r5, r5, r6, lspush #24
201 stmia dst!, {r4, r5}
202 adcs sum, sum, r4
203 adcs sum, sum, r5
204 mov r4, r6, lspull #8
205 tst ip, #4
206 beq 4f
2073: load1l r5
208 orr r4, r4, r5, lspush #24
209 str r4, [dst], #4
210 adcs sum, sum, r4
211 mov r4, r5, lspull #8
2124: ands len, len, #3
213 beq .Ldone
214 mov r5, r4, get_byte_0
215 tst len, #2
216 beq .Lexit
217 adcs sum, sum, r4, lspush #16
218 strb r5, [dst], #1
219 mov r5, r4, get_byte_1
220 strb r5, [dst], #1
221 mov r5, r4, get_byte_2
222 b .Lexit
223
224.Lsrc2_aligned: mov r4, r5, lspull #16
225 adds sum, sum, #0
226 bics ip, len, #15
227 beq 2f
2281: load4l r5, r6, r7, r8
229 orr r4, r4, r5, lspush #16
230 mov r5, r5, lspull #16
231 orr r5, r5, r6, lspush #16
232 mov r6, r6, lspull #16
233 orr r6, r6, r7, lspush #16
234 mov r7, r7, lspull #16
235 orr r7, r7, r8, lspush #16
236 stmia dst!, {r4, r5, r6, r7}
237 adcs sum, sum, r4
238 adcs sum, sum, r5
239 adcs sum, sum, r6
240 adcs sum, sum, r7
241 mov r4, r8, lspull #16
242 sub ip, ip, #16
243 teq ip, #0
244 bne 1b
2452: ands ip, len, #12
246 beq 4f
247 tst ip, #8
248 beq 3f
249 load2l r5, r6
250 orr r4, r4, r5, lspush #16
251 mov r5, r5, lspull #16
252 orr r5, r5, r6, lspush #16
253 stmia dst!, {r4, r5}
254 adcs sum, sum, r4
255 adcs sum, sum, r5
256 mov r4, r6, lspull #16
257 tst ip, #4
258 beq 4f
2593: load1l r5
260 orr r4, r4, r5, lspush #16
261 str r4, [dst], #4
262 adcs sum, sum, r4
263 mov r4, r5, lspull #16
2644: ands len, len, #3
265 beq .Ldone
266 mov r5, r4, get_byte_0
267 tst len, #2
268 beq .Lexit
269 adcs sum, sum, r4
270 strb r5, [dst], #1
271 mov r5, r4, get_byte_1
272 strb r5, [dst], #1
273 tst len, #1
274 beq .Ldone
275 load1b r5
276 b .Lexit
277
278.Lsrc3_aligned: mov r4, r5, lspull #24
279 adds sum, sum, #0
280 bics ip, len, #15
281 beq 2f
2821: load4l r5, r6, r7, r8
283 orr r4, r4, r5, lspush #8
284 mov r5, r5, lspull #24
285 orr r5, r5, r6, lspush #8
286 mov r6, r6, lspull #24
287 orr r6, r6, r7, lspush #8
288 mov r7, r7, lspull #24
289 orr r7, r7, r8, lspush #8
290 stmia dst!, {r4, r5, r6, r7}
291 adcs sum, sum, r4
292 adcs sum, sum, r5
293 adcs sum, sum, r6
294 adcs sum, sum, r7
295 mov r4, r8, lspull #24
296 sub ip, ip, #16
297 teq ip, #0
298 bne 1b
2992: ands ip, len, #12
300 beq 4f
301 tst ip, #8
302 beq 3f
303 load2l r5, r6
304 orr r4, r4, r5, lspush #8
305 mov r5, r5, lspull #24
306 orr r5, r5, r6, lspush #8
307 stmia dst!, {r4, r5}
308 adcs sum, sum, r4
309 adcs sum, sum, r5
310 mov r4, r6, lspull #24
311 tst ip, #4
312 beq 4f
3133: load1l r5
314 orr r4, r4, r5, lspush #8
315 str r4, [dst], #4
316 adcs sum, sum, r4
317 mov r4, r5, lspull #24
3184: ands len, len, #3
319 beq .Ldone
320 mov r5, r4, get_byte_0
321 tst len, #2
322 beq .Lexit
323 strb r5, [dst], #1
324 adcs sum, sum, r4
325 load1l r4
326 mov r5, r4, get_byte_0
327 strb r5, [dst], #1
328 adcs sum, sum, r4, lspush #24
329 mov r5, r4, get_byte_1
330 b .Lexit
331FN_EXIT