Loading...
1/*
2 * linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <asm/assembler.h>
11
12/*
13 * unsigned int
14 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
15 * r0 = src, r1 = dst, r2 = len, r3 = sum
16 * Returns : r0 = checksum
17 *
18 * Note that 'tst' and 'teq' preserve the carry flag.
19 */
20
21src .req r0
22dst .req r1
23len .req r2
24sum .req r3
25
26.Lzero: mov r0, sum
27 load_regs
28
29 /*
30 * Align an unaligned destination pointer. We know that
31 * we have >= 8 bytes here, so we don't need to check
32 * the length. Note that the source pointer hasn't been
33 * aligned yet.
34 */
35.Ldst_unaligned:
36 tst dst, #1
37 beq .Ldst_16bit
38
39 load1b ip
40 sub len, len, #1
41 adcs sum, sum, ip, put_byte_1 @ update checksum
42 strb ip, [dst], #1
43 tst dst, #2
44 reteq lr @ dst is now 32bit aligned
45
46.Ldst_16bit: load2b r8, ip
47 sub len, len, #2
48 adcs sum, sum, r8, put_byte_0
49 strb r8, [dst], #1
50 adcs sum, sum, ip, put_byte_1
51 strb ip, [dst], #1
52 ret lr @ dst is now 32bit aligned
53
54 /*
55 * Handle 0 to 7 bytes, with any alignment of source and
56 * destination pointers. Note that when we get here, C = 0
57 */
58.Lless8: teq len, #0 @ check for zero count
59 beq .Lzero
60
61 /* we must have at least one byte. */
62 tst dst, #1 @ dst 16-bit aligned
63 beq .Lless8_aligned
64
65 /* Align dst */
66 load1b ip
67 sub len, len, #1
68 adcs sum, sum, ip, put_byte_1 @ update checksum
69 strb ip, [dst], #1
70 tst len, #6
71 beq .Lless8_byteonly
72
731: load2b r8, ip
74 sub len, len, #2
75 adcs sum, sum, r8, put_byte_0
76 strb r8, [dst], #1
77 adcs sum, sum, ip, put_byte_1
78 strb ip, [dst], #1
79.Lless8_aligned:
80 tst len, #6
81 bne 1b
82.Lless8_byteonly:
83 tst len, #1
84 beq .Ldone
85 load1b r8
86 adcs sum, sum, r8, put_byte_0 @ update checksum
87 strb r8, [dst], #1
88 b .Ldone
89
90FN_ENTRY
91 save_regs
92
93 cmp len, #8 @ Ensure that we have at least
94 blo .Lless8 @ 8 bytes to copy.
95
96 adds sum, sum, #0 @ C = 0
97 tst dst, #3 @ Test destination alignment
98 blne .Ldst_unaligned @ align destination, return here
99
100 /*
101 * Ok, the dst pointer is now 32bit aligned, and we know
102 * that we must have more than 4 bytes to copy. Note
103 * that C contains the carry from the dst alignment above.
104 */
105
106 tst src, #3 @ Test source alignment
107 bne .Lsrc_not_aligned
108
109 /* Routine for src & dst aligned */
110
111 bics ip, len, #15
112 beq 2f
113
1141: load4l r4, r5, r6, r7
115 stmia dst!, {r4, r5, r6, r7}
116 adcs sum, sum, r4
117 adcs sum, sum, r5
118 adcs sum, sum, r6
119 adcs sum, sum, r7
120 sub ip, ip, #16
121 teq ip, #0
122 bne 1b
123
1242: ands ip, len, #12
125 beq 4f
126 tst ip, #8
127 beq 3f
128 load2l r4, r5
129 stmia dst!, {r4, r5}
130 adcs sum, sum, r4
131 adcs sum, sum, r5
132 tst ip, #4
133 beq 4f
134
1353: load1l r4
136 str r4, [dst], #4
137 adcs sum, sum, r4
138
1394: ands len, len, #3
140 beq .Ldone
141 load1l r4
142 tst len, #2
143 mov r5, r4, get_byte_0
144 beq .Lexit
145 adcs sum, sum, r4, lspush #16
146 strb r5, [dst], #1
147 mov r5, r4, get_byte_1
148 strb r5, [dst], #1
149 mov r5, r4, get_byte_2
150.Lexit: tst len, #1
151 strneb r5, [dst], #1
152 andne r5, r5, #255
153 adcnes sum, sum, r5, put_byte_0
154
155 /*
156 * If the dst pointer was not 16-bit aligned, we
157 * need to rotate the checksum here to get around
158 * the inefficient byte manipulations in the
159 * architecture independent code.
160 */
161.Ldone: adc r0, sum, #0
162 ldr sum, [sp, #0] @ dst
163 tst sum, #1
164 movne r0, r0, ror #8
165 load_regs
166
167.Lsrc_not_aligned:
168 adc sum, sum, #0 @ include C from dst alignment
169 and ip, src, #3
170 bic src, src, #3
171 load1l r5
172 cmp ip, #2
173 beq .Lsrc2_aligned
174 bhi .Lsrc3_aligned
175 mov r4, r5, lspull #8 @ C = 0
176 bics ip, len, #15
177 beq 2f
1781: load4l r5, r6, r7, r8
179 orr r4, r4, r5, lspush #24
180 mov r5, r5, lspull #8
181 orr r5, r5, r6, lspush #24
182 mov r6, r6, lspull #8
183 orr r6, r6, r7, lspush #24
184 mov r7, r7, lspull #8
185 orr r7, r7, r8, lspush #24
186 stmia dst!, {r4, r5, r6, r7}
187 adcs sum, sum, r4
188 adcs sum, sum, r5
189 adcs sum, sum, r6
190 adcs sum, sum, r7
191 mov r4, r8, lspull #8
192 sub ip, ip, #16
193 teq ip, #0
194 bne 1b
1952: ands ip, len, #12
196 beq 4f
197 tst ip, #8
198 beq 3f
199 load2l r5, r6
200 orr r4, r4, r5, lspush #24
201 mov r5, r5, lspull #8
202 orr r5, r5, r6, lspush #24
203 stmia dst!, {r4, r5}
204 adcs sum, sum, r4
205 adcs sum, sum, r5
206 mov r4, r6, lspull #8
207 tst ip, #4
208 beq 4f
2093: load1l r5
210 orr r4, r4, r5, lspush #24
211 str r4, [dst], #4
212 adcs sum, sum, r4
213 mov r4, r5, lspull #8
2144: ands len, len, #3
215 beq .Ldone
216 mov r5, r4, get_byte_0
217 tst len, #2
218 beq .Lexit
219 adcs sum, sum, r4, lspush #16
220 strb r5, [dst], #1
221 mov r5, r4, get_byte_1
222 strb r5, [dst], #1
223 mov r5, r4, get_byte_2
224 b .Lexit
225
226.Lsrc2_aligned: mov r4, r5, lspull #16
227 adds sum, sum, #0
228 bics ip, len, #15
229 beq 2f
2301: load4l r5, r6, r7, r8
231 orr r4, r4, r5, lspush #16
232 mov r5, r5, lspull #16
233 orr r5, r5, r6, lspush #16
234 mov r6, r6, lspull #16
235 orr r6, r6, r7, lspush #16
236 mov r7, r7, lspull #16
237 orr r7, r7, r8, lspush #16
238 stmia dst!, {r4, r5, r6, r7}
239 adcs sum, sum, r4
240 adcs sum, sum, r5
241 adcs sum, sum, r6
242 adcs sum, sum, r7
243 mov r4, r8, lspull #16
244 sub ip, ip, #16
245 teq ip, #0
246 bne 1b
2472: ands ip, len, #12
248 beq 4f
249 tst ip, #8
250 beq 3f
251 load2l r5, r6
252 orr r4, r4, r5, lspush #16
253 mov r5, r5, lspull #16
254 orr r5, r5, r6, lspush #16
255 stmia dst!, {r4, r5}
256 adcs sum, sum, r4
257 adcs sum, sum, r5
258 mov r4, r6, lspull #16
259 tst ip, #4
260 beq 4f
2613: load1l r5
262 orr r4, r4, r5, lspush #16
263 str r4, [dst], #4
264 adcs sum, sum, r4
265 mov r4, r5, lspull #16
2664: ands len, len, #3
267 beq .Ldone
268 mov r5, r4, get_byte_0
269 tst len, #2
270 beq .Lexit
271 adcs sum, sum, r4
272 strb r5, [dst], #1
273 mov r5, r4, get_byte_1
274 strb r5, [dst], #1
275 tst len, #1
276 beq .Ldone
277 load1b r5
278 b .Lexit
279
280.Lsrc3_aligned: mov r4, r5, lspull #24
281 adds sum, sum, #0
282 bics ip, len, #15
283 beq 2f
2841: load4l r5, r6, r7, r8
285 orr r4, r4, r5, lspush #8
286 mov r5, r5, lspull #24
287 orr r5, r5, r6, lspush #8
288 mov r6, r6, lspull #24
289 orr r6, r6, r7, lspush #8
290 mov r7, r7, lspull #24
291 orr r7, r7, r8, lspush #8
292 stmia dst!, {r4, r5, r6, r7}
293 adcs sum, sum, r4
294 adcs sum, sum, r5
295 adcs sum, sum, r6
296 adcs sum, sum, r7
297 mov r4, r8, lspull #24
298 sub ip, ip, #16
299 teq ip, #0
300 bne 1b
3012: ands ip, len, #12
302 beq 4f
303 tst ip, #8
304 beq 3f
305 load2l r5, r6
306 orr r4, r4, r5, lspush #8
307 mov r5, r5, lspull #24
308 orr r5, r5, r6, lspush #8
309 stmia dst!, {r4, r5}
310 adcs sum, sum, r4
311 adcs sum, sum, r5
312 mov r4, r6, lspull #24
313 tst ip, #4
314 beq 4f
3153: load1l r5
316 orr r4, r4, r5, lspush #8
317 str r4, [dst], #4
318 adcs sum, sum, r4
319 mov r4, r5, lspull #24
3204: ands len, len, #3
321 beq .Ldone
322 mov r5, r4, get_byte_0
323 tst len, #2
324 beq .Lexit
325 strb r5, [dst], #1
326 adcs sum, sum, r4
327 load1l r4
328 mov r5, r4, get_byte_0
329 strb r5, [dst], #1
330 adcs sum, sum, r4, lspush #24
331 mov r5, r4, get_byte_1
332 b .Lexit
333FN_EXIT
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * linux/arch/arm/lib/csumpartialcopygeneric.S
4 *
5 * Copyright (C) 1995-2001 Russell King
6 */
7#include <asm/assembler.h>
8
9/*
10 * unsigned int
11 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12 * r0 = src, r1 = dst, r2 = len, r3 = sum
13 * Returns : r0 = checksum
14 *
15 * Note that 'tst' and 'teq' preserve the carry flag.
16 */
17
18src .req r0
19dst .req r1
20len .req r2
21sum .req r3
22
23.Lzero: mov r0, sum
24 load_regs
25
26 /*
27 * Align an unaligned destination pointer. We know that
28 * we have >= 8 bytes here, so we don't need to check
29 * the length. Note that the source pointer hasn't been
30 * aligned yet.
31 */
32.Ldst_unaligned:
33 tst dst, #1
34 beq .Ldst_16bit
35
36 load1b ip
37 sub len, len, #1
38 adcs sum, sum, ip, put_byte_1 @ update checksum
39 strb ip, [dst], #1
40 tst dst, #2
41 reteq lr @ dst is now 32bit aligned
42
43.Ldst_16bit: load2b r8, ip
44 sub len, len, #2
45 adcs sum, sum, r8, put_byte_0
46 strb r8, [dst], #1
47 adcs sum, sum, ip, put_byte_1
48 strb ip, [dst], #1
49 ret lr @ dst is now 32bit aligned
50
51 /*
52 * Handle 0 to 7 bytes, with any alignment of source and
53 * destination pointers. Note that when we get here, C = 0
54 */
55.Lless8: teq len, #0 @ check for zero count
56 beq .Lzero
57
58 /* we must have at least one byte. */
59 tst dst, #1 @ dst 16-bit aligned
60 beq .Lless8_aligned
61
62 /* Align dst */
63 load1b ip
64 sub len, len, #1
65 adcs sum, sum, ip, put_byte_1 @ update checksum
66 strb ip, [dst], #1
67 tst len, #6
68 beq .Lless8_byteonly
69
701: load2b r8, ip
71 sub len, len, #2
72 adcs sum, sum, r8, put_byte_0
73 strb r8, [dst], #1
74 adcs sum, sum, ip, put_byte_1
75 strb ip, [dst], #1
76.Lless8_aligned:
77 tst len, #6
78 bne 1b
79.Lless8_byteonly:
80 tst len, #1
81 beq .Ldone
82 load1b r8
83 adcs sum, sum, r8, put_byte_0 @ update checksum
84 strb r8, [dst], #1
85 b .Ldone
86
87FN_ENTRY
88 save_regs
89
90 cmp len, #8 @ Ensure that we have at least
91 blo .Lless8 @ 8 bytes to copy.
92
93 adds sum, sum, #0 @ C = 0
94 tst dst, #3 @ Test destination alignment
95 blne .Ldst_unaligned @ align destination, return here
96
97 /*
98 * Ok, the dst pointer is now 32bit aligned, and we know
99 * that we must have more than 4 bytes to copy. Note
100 * that C contains the carry from the dst alignment above.
101 */
102
103 tst src, #3 @ Test source alignment
104 bne .Lsrc_not_aligned
105
106 /* Routine for src & dst aligned */
107
108 bics ip, len, #15
109 beq 2f
110
1111: load4l r4, r5, r6, r7
112 stmia dst!, {r4, r5, r6, r7}
113 adcs sum, sum, r4
114 adcs sum, sum, r5
115 adcs sum, sum, r6
116 adcs sum, sum, r7
117 sub ip, ip, #16
118 teq ip, #0
119 bne 1b
120
1212: ands ip, len, #12
122 beq 4f
123 tst ip, #8
124 beq 3f
125 load2l r4, r5
126 stmia dst!, {r4, r5}
127 adcs sum, sum, r4
128 adcs sum, sum, r5
129 tst ip, #4
130 beq 4f
131
1323: load1l r4
133 str r4, [dst], #4
134 adcs sum, sum, r4
135
1364: ands len, len, #3
137 beq .Ldone
138 load1l r4
139 tst len, #2
140 mov r5, r4, get_byte_0
141 beq .Lexit
142 adcs sum, sum, r4, lspush #16
143 strb r5, [dst], #1
144 mov r5, r4, get_byte_1
145 strb r5, [dst], #1
146 mov r5, r4, get_byte_2
147.Lexit: tst len, #1
148 strbne r5, [dst], #1
149 andne r5, r5, #255
150 adcsne sum, sum, r5, put_byte_0
151
152 /*
153 * If the dst pointer was not 16-bit aligned, we
154 * need to rotate the checksum here to get around
155 * the inefficient byte manipulations in the
156 * architecture independent code.
157 */
158.Ldone: adc r0, sum, #0
159 ldr sum, [sp, #0] @ dst
160 tst sum, #1
161 movne r0, r0, ror #8
162 load_regs
163
164.Lsrc_not_aligned:
165 adc sum, sum, #0 @ include C from dst alignment
166 and ip, src, #3
167 bic src, src, #3
168 load1l r5
169 cmp ip, #2
170 beq .Lsrc2_aligned
171 bhi .Lsrc3_aligned
172 mov r4, r5, lspull #8 @ C = 0
173 bics ip, len, #15
174 beq 2f
1751: load4l r5, r6, r7, r8
176 orr r4, r4, r5, lspush #24
177 mov r5, r5, lspull #8
178 orr r5, r5, r6, lspush #24
179 mov r6, r6, lspull #8
180 orr r6, r6, r7, lspush #24
181 mov r7, r7, lspull #8
182 orr r7, r7, r8, lspush #24
183 stmia dst!, {r4, r5, r6, r7}
184 adcs sum, sum, r4
185 adcs sum, sum, r5
186 adcs sum, sum, r6
187 adcs sum, sum, r7
188 mov r4, r8, lspull #8
189 sub ip, ip, #16
190 teq ip, #0
191 bne 1b
1922: ands ip, len, #12
193 beq 4f
194 tst ip, #8
195 beq 3f
196 load2l r5, r6
197 orr r4, r4, r5, lspush #24
198 mov r5, r5, lspull #8
199 orr r5, r5, r6, lspush #24
200 stmia dst!, {r4, r5}
201 adcs sum, sum, r4
202 adcs sum, sum, r5
203 mov r4, r6, lspull #8
204 tst ip, #4
205 beq 4f
2063: load1l r5
207 orr r4, r4, r5, lspush #24
208 str r4, [dst], #4
209 adcs sum, sum, r4
210 mov r4, r5, lspull #8
2114: ands len, len, #3
212 beq .Ldone
213 mov r5, r4, get_byte_0
214 tst len, #2
215 beq .Lexit
216 adcs sum, sum, r4, lspush #16
217 strb r5, [dst], #1
218 mov r5, r4, get_byte_1
219 strb r5, [dst], #1
220 mov r5, r4, get_byte_2
221 b .Lexit
222
223.Lsrc2_aligned: mov r4, r5, lspull #16
224 adds sum, sum, #0
225 bics ip, len, #15
226 beq 2f
2271: load4l r5, r6, r7, r8
228 orr r4, r4, r5, lspush #16
229 mov r5, r5, lspull #16
230 orr r5, r5, r6, lspush #16
231 mov r6, r6, lspull #16
232 orr r6, r6, r7, lspush #16
233 mov r7, r7, lspull #16
234 orr r7, r7, r8, lspush #16
235 stmia dst!, {r4, r5, r6, r7}
236 adcs sum, sum, r4
237 adcs sum, sum, r5
238 adcs sum, sum, r6
239 adcs sum, sum, r7
240 mov r4, r8, lspull #16
241 sub ip, ip, #16
242 teq ip, #0
243 bne 1b
2442: ands ip, len, #12
245 beq 4f
246 tst ip, #8
247 beq 3f
248 load2l r5, r6
249 orr r4, r4, r5, lspush #16
250 mov r5, r5, lspull #16
251 orr r5, r5, r6, lspush #16
252 stmia dst!, {r4, r5}
253 adcs sum, sum, r4
254 adcs sum, sum, r5
255 mov r4, r6, lspull #16
256 tst ip, #4
257 beq 4f
2583: load1l r5
259 orr r4, r4, r5, lspush #16
260 str r4, [dst], #4
261 adcs sum, sum, r4
262 mov r4, r5, lspull #16
2634: ands len, len, #3
264 beq .Ldone
265 mov r5, r4, get_byte_0
266 tst len, #2
267 beq .Lexit
268 adcs sum, sum, r4
269 strb r5, [dst], #1
270 mov r5, r4, get_byte_1
271 strb r5, [dst], #1
272 tst len, #1
273 beq .Ldone
274 load1b r5
275 b .Lexit
276
277.Lsrc3_aligned: mov r4, r5, lspull #24
278 adds sum, sum, #0
279 bics ip, len, #15
280 beq 2f
2811: load4l r5, r6, r7, r8
282 orr r4, r4, r5, lspush #8
283 mov r5, r5, lspull #24
284 orr r5, r5, r6, lspush #8
285 mov r6, r6, lspull #24
286 orr r6, r6, r7, lspush #8
287 mov r7, r7, lspull #24
288 orr r7, r7, r8, lspush #8
289 stmia dst!, {r4, r5, r6, r7}
290 adcs sum, sum, r4
291 adcs sum, sum, r5
292 adcs sum, sum, r6
293 adcs sum, sum, r7
294 mov r4, r8, lspull #24
295 sub ip, ip, #16
296 teq ip, #0
297 bne 1b
2982: ands ip, len, #12
299 beq 4f
300 tst ip, #8
301 beq 3f
302 load2l r5, r6
303 orr r4, r4, r5, lspush #8
304 mov r5, r5, lspull #24
305 orr r5, r5, r6, lspush #8
306 stmia dst!, {r4, r5}
307 adcs sum, sum, r4
308 adcs sum, sum, r5
309 mov r4, r6, lspull #24
310 tst ip, #4
311 beq 4f
3123: load1l r5
313 orr r4, r4, r5, lspush #8
314 str r4, [dst], #4
315 adcs sum, sum, r4
316 mov r4, r5, lspull #24
3174: ands len, len, #3
318 beq .Ldone
319 mov r5, r4, get_byte_0
320 tst len, #2
321 beq .Lexit
322 strb r5, [dst], #1
323 adcs sum, sum, r4
324 load1l r4
325 mov r5, r4, get_byte_0
326 strb r5, [dst], #1
327 adcs sum, sum, r4, lspush #24
328 mov r5, r4, get_byte_1
329 b .Lexit
330FN_EXIT