Loading...
1/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file. (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING. If not, write to
32the Free Software Foundation, 59 Temple Place - Suite 330,
33Boston, MA 02111-1307, USA. */
34
35
36#include <linux/linkage.h>
37#include <asm/assembler.h>
38#include <asm/unwind.h>
39
40.macro ARM_DIV_BODY dividend, divisor, result, curbit
41
42#if __LINUX_ARM_ARCH__ >= 5
43
44 clz \curbit, \divisor
45 clz \result, \dividend
46 sub \result, \curbit, \result
47 mov \curbit, #1
48 mov \divisor, \divisor, lsl \result
49 mov \curbit, \curbit, lsl \result
50 mov \result, #0
51
52#else
53
54 @ Initially shift the divisor left 3 bits if possible,
55 @ set curbit accordingly. This allows for curbit to be located
56 @ at the left end of each 4 bit nibbles in the division loop
57 @ to save one loop in most cases.
58 tst \divisor, #0xe0000000
59 moveq \divisor, \divisor, lsl #3
60 moveq \curbit, #8
61 movne \curbit, #1
62
63 @ Unless the divisor is very big, shift it up in multiples of
64 @ four bits, since this is the amount of unwinding in the main
65 @ division loop. Continue shifting until the divisor is
66 @ larger than the dividend.
671: cmp \divisor, #0x10000000
68 cmplo \divisor, \dividend
69 movlo \divisor, \divisor, lsl #4
70 movlo \curbit, \curbit, lsl #4
71 blo 1b
72
73 @ For very big divisors, we must shift it a bit at a time, or
74 @ we will be in danger of overflowing.
751: cmp \divisor, #0x80000000
76 cmplo \divisor, \dividend
77 movlo \divisor, \divisor, lsl #1
78 movlo \curbit, \curbit, lsl #1
79 blo 1b
80
81 mov \result, #0
82
83#endif
84
85 @ Division loop
861: cmp \dividend, \divisor
87 subhs \dividend, \dividend, \divisor
88 orrhs \result, \result, \curbit
89 cmp \dividend, \divisor, lsr #1
90 subhs \dividend, \dividend, \divisor, lsr #1
91 orrhs \result, \result, \curbit, lsr #1
92 cmp \dividend, \divisor, lsr #2
93 subhs \dividend, \dividend, \divisor, lsr #2
94 orrhs \result, \result, \curbit, lsr #2
95 cmp \dividend, \divisor, lsr #3
96 subhs \dividend, \dividend, \divisor, lsr #3
97 orrhs \result, \result, \curbit, lsr #3
98 cmp \dividend, #0 @ Early termination?
99 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
100 movne \divisor, \divisor, lsr #4
101 bne 1b
102
103.endm
104
105
106.macro ARM_DIV2_ORDER divisor, order
107
108#if __LINUX_ARM_ARCH__ >= 5
109
110 clz \order, \divisor
111 rsb \order, \order, #31
112
113#else
114
115 cmp \divisor, #(1 << 16)
116 movhs \divisor, \divisor, lsr #16
117 movhs \order, #16
118 movlo \order, #0
119
120 cmp \divisor, #(1 << 8)
121 movhs \divisor, \divisor, lsr #8
122 addhs \order, \order, #8
123
124 cmp \divisor, #(1 << 4)
125 movhs \divisor, \divisor, lsr #4
126 addhs \order, \order, #4
127
128 cmp \divisor, #(1 << 2)
129 addhi \order, \order, #3
130 addls \order, \order, \divisor, lsr #1
131
132#endif
133
134.endm
135
136
137.macro ARM_MOD_BODY dividend, divisor, order, spare
138
139#if __LINUX_ARM_ARCH__ >= 5
140
141 clz \order, \divisor
142 clz \spare, \dividend
143 sub \order, \order, \spare
144 mov \divisor, \divisor, lsl \order
145
146#else
147
148 mov \order, #0
149
150 @ Unless the divisor is very big, shift it up in multiples of
151 @ four bits, since this is the amount of unwinding in the main
152 @ division loop. Continue shifting until the divisor is
153 @ larger than the dividend.
1541: cmp \divisor, #0x10000000
155 cmplo \divisor, \dividend
156 movlo \divisor, \divisor, lsl #4
157 addlo \order, \order, #4
158 blo 1b
159
160 @ For very big divisors, we must shift it a bit at a time, or
161 @ we will be in danger of overflowing.
1621: cmp \divisor, #0x80000000
163 cmplo \divisor, \dividend
164 movlo \divisor, \divisor, lsl #1
165 addlo \order, \order, #1
166 blo 1b
167
168#endif
169
170 @ Perform all needed substractions to keep only the reminder.
171 @ Do comparisons in batch of 4 first.
172 subs \order, \order, #3 @ yes, 3 is intended here
173 blt 2f
174
1751: cmp \dividend, \divisor
176 subhs \dividend, \dividend, \divisor
177 cmp \dividend, \divisor, lsr #1
178 subhs \dividend, \dividend, \divisor, lsr #1
179 cmp \dividend, \divisor, lsr #2
180 subhs \dividend, \dividend, \divisor, lsr #2
181 cmp \dividend, \divisor, lsr #3
182 subhs \dividend, \dividend, \divisor, lsr #3
183 cmp \dividend, #1
184 mov \divisor, \divisor, lsr #4
185 subges \order, \order, #4
186 bge 1b
187
188 tst \order, #3
189 teqne \dividend, #0
190 beq 5f
191
192 @ Either 1, 2 or 3 comparison/substractions are left.
1932: cmn \order, #2
194 blt 4f
195 beq 3f
196 cmp \dividend, \divisor
197 subhs \dividend, \dividend, \divisor
198 mov \divisor, \divisor, lsr #1
1993: cmp \dividend, \divisor
200 subhs \dividend, \dividend, \divisor
201 mov \divisor, \divisor, lsr #1
2024: cmp \dividend, \divisor
203 subhs \dividend, \dividend, \divisor
2045:
205.endm
206
207
208ENTRY(__udivsi3)
209ENTRY(__aeabi_uidiv)
210UNWIND(.fnstart)
211
212 subs r2, r1, #1
213 moveq pc, lr
214 bcc Ldiv0
215 cmp r0, r1
216 bls 11f
217 tst r1, r2
218 beq 12f
219
220 ARM_DIV_BODY r0, r1, r2, r3
221
222 mov r0, r2
223 mov pc, lr
224
22511: moveq r0, #1
226 movne r0, #0
227 mov pc, lr
228
22912: ARM_DIV2_ORDER r1, r2
230
231 mov r0, r0, lsr r2
232 mov pc, lr
233
234UNWIND(.fnend)
235ENDPROC(__udivsi3)
236ENDPROC(__aeabi_uidiv)
237
238ENTRY(__umodsi3)
239UNWIND(.fnstart)
240
241 subs r2, r1, #1 @ compare divisor with 1
242 bcc Ldiv0
243 cmpne r0, r1 @ compare dividend with divisor
244 moveq r0, #0
245 tsthi r1, r2 @ see if divisor is power of 2
246 andeq r0, r0, r2
247 movls pc, lr
248
249 ARM_MOD_BODY r0, r1, r2, r3
250
251 mov pc, lr
252
253UNWIND(.fnend)
254ENDPROC(__umodsi3)
255
256ENTRY(__divsi3)
257ENTRY(__aeabi_idiv)
258UNWIND(.fnstart)
259
260 cmp r1, #0
261 eor ip, r0, r1 @ save the sign of the result.
262 beq Ldiv0
263 rsbmi r1, r1, #0 @ loops below use unsigned.
264 subs r2, r1, #1 @ division by 1 or -1 ?
265 beq 10f
266 movs r3, r0
267 rsbmi r3, r0, #0 @ positive dividend value
268 cmp r3, r1
269 bls 11f
270 tst r1, r2 @ divisor is power of 2 ?
271 beq 12f
272
273 ARM_DIV_BODY r3, r1, r0, r2
274
275 cmp ip, #0
276 rsbmi r0, r0, #0
277 mov pc, lr
278
27910: teq ip, r0 @ same sign ?
280 rsbmi r0, r0, #0
281 mov pc, lr
282
28311: movlo r0, #0
284 moveq r0, ip, asr #31
285 orreq r0, r0, #1
286 mov pc, lr
287
28812: ARM_DIV2_ORDER r1, r2
289
290 cmp ip, #0
291 mov r0, r3, lsr r2
292 rsbmi r0, r0, #0
293 mov pc, lr
294
295UNWIND(.fnend)
296ENDPROC(__divsi3)
297ENDPROC(__aeabi_idiv)
298
299ENTRY(__modsi3)
300UNWIND(.fnstart)
301
302 cmp r1, #0
303 beq Ldiv0
304 rsbmi r1, r1, #0 @ loops below use unsigned.
305 movs ip, r0 @ preserve sign of dividend
306 rsbmi r0, r0, #0 @ if negative make positive
307 subs r2, r1, #1 @ compare divisor with 1
308 cmpne r0, r1 @ compare dividend with divisor
309 moveq r0, #0
310 tsthi r1, r2 @ see if divisor is power of 2
311 andeq r0, r0, r2
312 bls 10f
313
314 ARM_MOD_BODY r0, r1, r2, r3
315
31610: cmp ip, #0
317 rsbmi r0, r0, #0
318 mov pc, lr
319
320UNWIND(.fnend)
321ENDPROC(__modsi3)
322
323#ifdef CONFIG_AEABI
324
325ENTRY(__aeabi_uidivmod)
326UNWIND(.fnstart)
327UNWIND(.save {r0, r1, ip, lr} )
328
329 stmfd sp!, {r0, r1, ip, lr}
330 bl __aeabi_uidiv
331 ldmfd sp!, {r1, r2, ip, lr}
332 mul r3, r0, r2
333 sub r1, r1, r3
334 mov pc, lr
335
336UNWIND(.fnend)
337ENDPROC(__aeabi_uidivmod)
338
339ENTRY(__aeabi_idivmod)
340UNWIND(.fnstart)
341UNWIND(.save {r0, r1, ip, lr} )
342 stmfd sp!, {r0, r1, ip, lr}
343 bl __aeabi_idiv
344 ldmfd sp!, {r1, r2, ip, lr}
345 mul r3, r0, r2
346 sub r1, r1, r3
347 mov pc, lr
348
349UNWIND(.fnend)
350ENDPROC(__aeabi_idivmod)
351
352#endif
353
354Ldiv0:
355UNWIND(.fnstart)
356UNWIND(.pad #4)
357UNWIND(.save {lr})
358 str lr, [sp, #-8]!
359 bl __div0
360 mov r0, #0 @ About as wrong as it could be.
361 ldr pc, [sp], #8
362UNWIND(.fnend)
363ENDPROC(Ldiv0)
1/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file. (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING. If not, write to
32the Free Software Foundation, 59 Temple Place - Suite 330,
33Boston, MA 02111-1307, USA. */
34
35
36#include <linux/linkage.h>
37#include <asm/assembler.h>
38#include <asm/unwind.h>
39
40.macro ARM_DIV_BODY dividend, divisor, result, curbit
41
42#if __LINUX_ARM_ARCH__ >= 5
43
44 clz \curbit, \divisor
45 clz \result, \dividend
46 sub \result, \curbit, \result
47 mov \curbit, #1
48 mov \divisor, \divisor, lsl \result
49 mov \curbit, \curbit, lsl \result
50 mov \result, #0
51
52#else
53
54 @ Initially shift the divisor left 3 bits if possible,
55 @ set curbit accordingly. This allows for curbit to be located
56 @ at the left end of each 4 bit nibbles in the division loop
57 @ to save one loop in most cases.
58 tst \divisor, #0xe0000000
59 moveq \divisor, \divisor, lsl #3
60 moveq \curbit, #8
61 movne \curbit, #1
62
63 @ Unless the divisor is very big, shift it up in multiples of
64 @ four bits, since this is the amount of unwinding in the main
65 @ division loop. Continue shifting until the divisor is
66 @ larger than the dividend.
671: cmp \divisor, #0x10000000
68 cmplo \divisor, \dividend
69 movlo \divisor, \divisor, lsl #4
70 movlo \curbit, \curbit, lsl #4
71 blo 1b
72
73 @ For very big divisors, we must shift it a bit at a time, or
74 @ we will be in danger of overflowing.
751: cmp \divisor, #0x80000000
76 cmplo \divisor, \dividend
77 movlo \divisor, \divisor, lsl #1
78 movlo \curbit, \curbit, lsl #1
79 blo 1b
80
81 mov \result, #0
82
83#endif
84
85 @ Division loop
861: cmp \dividend, \divisor
87 subhs \dividend, \dividend, \divisor
88 orrhs \result, \result, \curbit
89 cmp \dividend, \divisor, lsr #1
90 subhs \dividend, \dividend, \divisor, lsr #1
91 orrhs \result, \result, \curbit, lsr #1
92 cmp \dividend, \divisor, lsr #2
93 subhs \dividend, \dividend, \divisor, lsr #2
94 orrhs \result, \result, \curbit, lsr #2
95 cmp \dividend, \divisor, lsr #3
96 subhs \dividend, \dividend, \divisor, lsr #3
97 orrhs \result, \result, \curbit, lsr #3
98 cmp \dividend, #0 @ Early termination?
99 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
100 movne \divisor, \divisor, lsr #4
101 bne 1b
102
103.endm
104
105
106.macro ARM_DIV2_ORDER divisor, order
107
108#if __LINUX_ARM_ARCH__ >= 5
109
110 clz \order, \divisor
111 rsb \order, \order, #31
112
113#else
114
115 cmp \divisor, #(1 << 16)
116 movhs \divisor, \divisor, lsr #16
117 movhs \order, #16
118 movlo \order, #0
119
120 cmp \divisor, #(1 << 8)
121 movhs \divisor, \divisor, lsr #8
122 addhs \order, \order, #8
123
124 cmp \divisor, #(1 << 4)
125 movhs \divisor, \divisor, lsr #4
126 addhs \order, \order, #4
127
128 cmp \divisor, #(1 << 2)
129 addhi \order, \order, #3
130 addls \order, \order, \divisor, lsr #1
131
132#endif
133
134.endm
135
136
137.macro ARM_MOD_BODY dividend, divisor, order, spare
138
139#if __LINUX_ARM_ARCH__ >= 5
140
141 clz \order, \divisor
142 clz \spare, \dividend
143 sub \order, \order, \spare
144 mov \divisor, \divisor, lsl \order
145
146#else
147
148 mov \order, #0
149
150 @ Unless the divisor is very big, shift it up in multiples of
151 @ four bits, since this is the amount of unwinding in the main
152 @ division loop. Continue shifting until the divisor is
153 @ larger than the dividend.
1541: cmp \divisor, #0x10000000
155 cmplo \divisor, \dividend
156 movlo \divisor, \divisor, lsl #4
157 addlo \order, \order, #4
158 blo 1b
159
160 @ For very big divisors, we must shift it a bit at a time, or
161 @ we will be in danger of overflowing.
1621: cmp \divisor, #0x80000000
163 cmplo \divisor, \dividend
164 movlo \divisor, \divisor, lsl #1
165 addlo \order, \order, #1
166 blo 1b
167
168#endif
169
170 @ Perform all needed subtractions to keep only the reminder.
171 @ Do comparisons in batch of 4 first.
172 subs \order, \order, #3 @ yes, 3 is intended here
173 blt 2f
174
1751: cmp \dividend, \divisor
176 subhs \dividend, \dividend, \divisor
177 cmp \dividend, \divisor, lsr #1
178 subhs \dividend, \dividend, \divisor, lsr #1
179 cmp \dividend, \divisor, lsr #2
180 subhs \dividend, \dividend, \divisor, lsr #2
181 cmp \dividend, \divisor, lsr #3
182 subhs \dividend, \dividend, \divisor, lsr #3
183 cmp \dividend, #1
184 mov \divisor, \divisor, lsr #4
185 subsge \order, \order, #4
186 bge 1b
187
188 tst \order, #3
189 teqne \dividend, #0
190 beq 5f
191
192 @ Either 1, 2 or 3 comparison/subtractions are left.
1932: cmn \order, #2
194 blt 4f
195 beq 3f
196 cmp \dividend, \divisor
197 subhs \dividend, \dividend, \divisor
198 mov \divisor, \divisor, lsr #1
1993: cmp \dividend, \divisor
200 subhs \dividend, \dividend, \divisor
201 mov \divisor, \divisor, lsr #1
2024: cmp \dividend, \divisor
203 subhs \dividend, \dividend, \divisor
2045:
205.endm
206
207
208#ifdef CONFIG_ARM_PATCH_IDIV
209 .align 3
210#endif
211
212ENTRY(__udivsi3)
213ENTRY(__aeabi_uidiv)
214UNWIND(.fnstart)
215
216 subs r2, r1, #1
217 reteq lr
218 bcc Ldiv0
219 cmp r0, r1
220 bls 11f
221 tst r1, r2
222 beq 12f
223
224 ARM_DIV_BODY r0, r1, r2, r3
225
226 mov r0, r2
227 ret lr
228
22911: moveq r0, #1
230 movne r0, #0
231 ret lr
232
23312: ARM_DIV2_ORDER r1, r2
234
235 mov r0, r0, lsr r2
236 ret lr
237
238UNWIND(.fnend)
239ENDPROC(__udivsi3)
240ENDPROC(__aeabi_uidiv)
241
242ENTRY(__umodsi3)
243UNWIND(.fnstart)
244
245 subs r2, r1, #1 @ compare divisor with 1
246 bcc Ldiv0
247 cmpne r0, r1 @ compare dividend with divisor
248 moveq r0, #0
249 tsthi r1, r2 @ see if divisor is power of 2
250 andeq r0, r0, r2
251 retls lr
252
253 ARM_MOD_BODY r0, r1, r2, r3
254
255 ret lr
256
257UNWIND(.fnend)
258ENDPROC(__umodsi3)
259
260#ifdef CONFIG_ARM_PATCH_IDIV
261 .align 3
262#endif
263
264ENTRY(__divsi3)
265ENTRY(__aeabi_idiv)
266UNWIND(.fnstart)
267
268 cmp r1, #0
269 eor ip, r0, r1 @ save the sign of the result.
270 beq Ldiv0
271 rsbmi r1, r1, #0 @ loops below use unsigned.
272 subs r2, r1, #1 @ division by 1 or -1 ?
273 beq 10f
274 movs r3, r0
275 rsbmi r3, r0, #0 @ positive dividend value
276 cmp r3, r1
277 bls 11f
278 tst r1, r2 @ divisor is power of 2 ?
279 beq 12f
280
281 ARM_DIV_BODY r3, r1, r0, r2
282
283 cmp ip, #0
284 rsbmi r0, r0, #0
285 ret lr
286
28710: teq ip, r0 @ same sign ?
288 rsbmi r0, r0, #0
289 ret lr
290
29111: movlo r0, #0
292 moveq r0, ip, asr #31
293 orreq r0, r0, #1
294 ret lr
295
29612: ARM_DIV2_ORDER r1, r2
297
298 cmp ip, #0
299 mov r0, r3, lsr r2
300 rsbmi r0, r0, #0
301 ret lr
302
303UNWIND(.fnend)
304ENDPROC(__divsi3)
305ENDPROC(__aeabi_idiv)
306
307ENTRY(__modsi3)
308UNWIND(.fnstart)
309
310 cmp r1, #0
311 beq Ldiv0
312 rsbmi r1, r1, #0 @ loops below use unsigned.
313 movs ip, r0 @ preserve sign of dividend
314 rsbmi r0, r0, #0 @ if negative make positive
315 subs r2, r1, #1 @ compare divisor with 1
316 cmpne r0, r1 @ compare dividend with divisor
317 moveq r0, #0
318 tsthi r1, r2 @ see if divisor is power of 2
319 andeq r0, r0, r2
320 bls 10f
321
322 ARM_MOD_BODY r0, r1, r2, r3
323
32410: cmp ip, #0
325 rsbmi r0, r0, #0
326 ret lr
327
328UNWIND(.fnend)
329ENDPROC(__modsi3)
330
331#ifdef CONFIG_AEABI
332
333ENTRY(__aeabi_uidivmod)
334UNWIND(.fnstart)
335UNWIND(.save {r0, r1, ip, lr} )
336
337 stmfd sp!, {r0, r1, ip, lr}
338 bl __aeabi_uidiv
339 ldmfd sp!, {r1, r2, ip, lr}
340 mul r3, r0, r2
341 sub r1, r1, r3
342 ret lr
343
344UNWIND(.fnend)
345ENDPROC(__aeabi_uidivmod)
346
347ENTRY(__aeabi_idivmod)
348UNWIND(.fnstart)
349UNWIND(.save {r0, r1, ip, lr} )
350 stmfd sp!, {r0, r1, ip, lr}
351 bl __aeabi_idiv
352 ldmfd sp!, {r1, r2, ip, lr}
353 mul r3, r0, r2
354 sub r1, r1, r3
355 ret lr
356
357UNWIND(.fnend)
358ENDPROC(__aeabi_idivmod)
359
360#endif
361
362Ldiv0:
363UNWIND(.fnstart)
364UNWIND(.pad #4)
365UNWIND(.save {lr})
366 str lr, [sp, #-8]!
367 bl __div0
368 mov r0, #0 @ About as wrong as it could be.
369 ldr pc, [sp], #8
370UNWIND(.fnend)
371ENDPROC(Ldiv0)