div64.S - arch/arm/lib/div64.S - Linux diff v4.10.11

  1/*
  2 *  linux/arch/arm/lib/div64.S
  3 *
  4 *  Optimized computation of 64-bit dividend / 32-bit divisor
  5 *
  6 *  Author:	Nicolas Pitre
  7 *  Created:	Oct 5, 2003
  8 *  Copyright:	Monta Vista Software, Inc.
  9 *
 10 *  This program is free software; you can redistribute it and/or modify
 11 *  it under the terms of the GNU General Public License version 2 as
 12 *  published by the Free Software Foundation.
 13 */
 14
 15#include <linux/linkage.h>
 16#include <asm/assembler.h>
 17#include <asm/unwind.h>
 18
 19#ifdef __ARMEB__
 20#define xh r0
 21#define xl r1
 22#define yh r2
 23#define yl r3
 24#else
 25#define xl r0
 26#define xh r1
 27#define yl r2
 28#define yh r3
 29#endif
 30
 31/*
 32 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
 33 *
 34 * Note: Calling convention is totally non standard for optimal code.
 35 *       This is meant to be used by do_div() from include/asm/div64.h only.
 36 *
 37 * Input parameters:
 38 * 	xh-xl	= dividend (clobbered)
 39 * 	r4	= divisor (preserved)
 40 *
 41 * Output values:
 42 * 	yh-yl	= result
 43 * 	xh	= remainder
 44 *
 45 * Clobbered regs: xl, ip
 46 */
 47
 48ENTRY(__do_div64)
 49UNWIND(.fnstart)
 50
 51	@ Test for easy paths first.
 52	subs	ip, r4, #1
 53	bls	9f			@ divisor is 0 or 1
 54	tst	ip, r4
 55	beq	8f			@ divisor is power of 2
 56
 57	@ See if we need to handle upper 32-bit result.
 58	cmp	xh, r4
 59	mov	yh, #0
 60	blo	3f
 61
 62	@ Align divisor with upper part of dividend.
 63	@ The aligned divisor is stored in yl preserving the original.
 64	@ The bit position is stored in ip.
 65
 66#if __LINUX_ARM_ARCH__ >= 5
 67
 68	clz	yl, r4
 69	clz	ip, xh
 70	sub	yl, yl, ip
 71	mov	ip, #1
 72	mov	ip, ip, lsl yl
 73	mov	yl, r4, lsl yl
 74
 75#else
 76
 77	mov	yl, r4
 78	mov	ip, #1
 791:	cmp	yl, #0x80000000
 80	cmpcc	yl, xh
 81	movcc	yl, yl, lsl #1
 82	movcc	ip, ip, lsl #1
 83	bcc	1b
 84
 85#endif
 86
 87	@ The division loop for needed upper bit positions.
 88 	@ Break out early if dividend reaches 0.
 892:	cmp	xh, yl
 90	orrcs	yh, yh, ip
 91	subcss	xh, xh, yl
 92	movnes	ip, ip, lsr #1
 93	mov	yl, yl, lsr #1
 94	bne	2b
 95
 96	@ See if we need to handle lower 32-bit result.
 973:	cmp	xh, #0
 98	mov	yl, #0
 99	cmpeq	xl, r4
100	movlo	xh, xl
101	retlo	lr
102
103	@ The division loop for lower bit positions.
104	@ Here we shift remainer bits leftwards rather than moving the
105	@ divisor for comparisons, considering the carry-out bit as well.
106	mov	ip, #0x80000000
1074:	movs	xl, xl, lsl #1
108	adcs	xh, xh, xh
109	beq	6f
110	cmpcc	xh, r4
1115:	orrcs	yl, yl, ip
112	subcs	xh, xh, r4
113	movs	ip, ip, lsr #1
114	bne	4b
115	ret	lr
116
117	@ The top part of remainder became zero.  If carry is set
118	@ (the 33th bit) this is a false positive so resume the loop.
119	@ Otherwise, if lower part is also null then we are done.
1206:	bcs	5b
121	cmp	xl, #0
122	reteq	lr
123
124	@ We still have remainer bits in the low part.  Bring them up.
125
126#if __LINUX_ARM_ARCH__ >= 5
127
128	clz	xh, xl			@ we know xh is zero here so...
129	add	xh, xh, #1
130	mov	xl, xl, lsl xh
131	mov	ip, ip, lsr xh
132
133#else
134
1357:	movs	xl, xl, lsl #1
136	mov	ip, ip, lsr #1
137	bcc	7b
138
139#endif
140
141	@ Current remainder is now 1.  It is worthless to compare with
142	@ divisor at this point since divisor can not be smaller than 3 here.
143	@ If possible, branch for another shift in the division loop.
144	@ If no bit position left then we are done.
145	movs	ip, ip, lsr #1
146	mov	xh, #1
147	bne	4b
148	ret	lr
149
1508:	@ Division by a power of 2: determine what that divisor order is
151	@ then simply shift values around
152
153#if __LINUX_ARM_ARCH__ >= 5
154
155	clz	ip, r4
156	rsb	ip, ip, #31
157
158#else
159
160	mov	yl, r4
161	cmp	r4, #(1 << 16)
162	mov	ip, #0
163	movhs	yl, yl, lsr #16
164	movhs	ip, #16
165
166	cmp	yl, #(1 << 8)
167	movhs	yl, yl, lsr #8
168	addhs	ip, ip, #8
169
170	cmp	yl, #(1 << 4)
171	movhs	yl, yl, lsr #4
172	addhs	ip, ip, #4
173
174	cmp	yl, #(1 << 2)
175	addhi	ip, ip, #3
176	addls	ip, ip, yl, lsr #1
177
178#endif
179
180	mov	yh, xh, lsr ip
181	mov	yl, xl, lsr ip
182	rsb	ip, ip, #32
183 ARM(	orr	yl, yl, xh, lsl ip	)
184 THUMB(	lsl	xh, xh, ip		)
185 THUMB(	orr	yl, yl, xh		)
186	mov	xh, xl, lsl ip
187	mov	xh, xh, lsr ip
188	ret	lr
189
190	@ eq -> division by 1: obvious enough...
1919:	moveq	yl, xl
192	moveq	yh, xh
193	moveq	xh, #0
194	reteq	lr
195UNWIND(.fnend)
196
197UNWIND(.fnstart)
198UNWIND(.pad #4)
199UNWIND(.save {lr})
200Ldiv0_64:
201	@ Division by 0:
202	str	lr, [sp, #-8]!
203	bl	__div0
204
205	@ as wrong as it could be...
206	mov	yl, #0
207	mov	yh, #0
208	mov	xh, #0
209	ldr	pc, [sp], #8
210
211UNWIND(.fnend)
212ENDPROC(__do_div64)

  1/*
  2 *  linux/arch/arm/lib/div64.S
  3 *
  4 *  Optimized computation of 64-bit dividend / 32-bit divisor
  5 *
  6 *  Author:	Nicolas Pitre
  7 *  Created:	Oct 5, 2003
  8 *  Copyright:	Monta Vista Software, Inc.
  9 *
 10 *  This program is free software; you can redistribute it and/or modify
 11 *  it under the terms of the GNU General Public License version 2 as
 12 *  published by the Free Software Foundation.
 13 */
 14
 15#include <linux/linkage.h>
 
 16#include <asm/unwind.h>
 17
 18#ifdef __ARMEB__
 19#define xh r0
 20#define xl r1
 21#define yh r2
 22#define yl r3
 23#else
 24#define xl r0
 25#define xh r1
 26#define yl r2
 27#define yh r3
 28#endif
 29
 30/*
 31 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
 32 *
 33 * Note: Calling convention is totally non standard for optimal code.
 34 *       This is meant to be used by do_div() from include/asm/div64.h only.
 35 *
 36 * Input parameters:
 37 * 	xh-xl	= dividend (clobbered)
 38 * 	r4	= divisor (preserved)
 39 *
 40 * Output values:
 41 * 	yh-yl	= result
 42 * 	xh	= remainder
 43 *
 44 * Clobbered regs: xl, ip
 45 */
 46
 47ENTRY(__do_div64)
 48UNWIND(.fnstart)
 49
 50	@ Test for easy paths first.
 51	subs	ip, r4, #1
 52	bls	9f			@ divisor is 0 or 1
 53	tst	ip, r4
 54	beq	8f			@ divisor is power of 2
 55
 56	@ See if we need to handle upper 32-bit result.
 57	cmp	xh, r4
 58	mov	yh, #0
 59	blo	3f
 60
 61	@ Align divisor with upper part of dividend.
 62	@ The aligned divisor is stored in yl preserving the original.
 63	@ The bit position is stored in ip.
 64
 65#if __LINUX_ARM_ARCH__ >= 5
 66
 67	clz	yl, r4
 68	clz	ip, xh
 69	sub	yl, yl, ip
 70	mov	ip, #1
 71	mov	ip, ip, lsl yl
 72	mov	yl, r4, lsl yl
 73
 74#else
 75
 76	mov	yl, r4
 77	mov	ip, #1
 781:	cmp	yl, #0x80000000
 79	cmpcc	yl, xh
 80	movcc	yl, yl, lsl #1
 81	movcc	ip, ip, lsl #1
 82	bcc	1b
 83
 84#endif
 85
 86	@ The division loop for needed upper bit positions.
 87 	@ Break out early if dividend reaches 0.
 882:	cmp	xh, yl
 89	orrcs	yh, yh, ip
 90	subcss	xh, xh, yl
 91	movnes	ip, ip, lsr #1
 92	mov	yl, yl, lsr #1
 93	bne	2b
 94
 95	@ See if we need to handle lower 32-bit result.
 963:	cmp	xh, #0
 97	mov	yl, #0
 98	cmpeq	xl, r4
 99	movlo	xh, xl
100	movlo	pc, lr
101
102	@ The division loop for lower bit positions.
103	@ Here we shift remainer bits leftwards rather than moving the
104	@ divisor for comparisons, considering the carry-out bit as well.
105	mov	ip, #0x80000000
1064:	movs	xl, xl, lsl #1
107	adcs	xh, xh, xh
108	beq	6f
109	cmpcc	xh, r4
1105:	orrcs	yl, yl, ip
111	subcs	xh, xh, r4
112	movs	ip, ip, lsr #1
113	bne	4b
114	mov	pc, lr
115
116	@ The top part of remainder became zero.  If carry is set
117	@ (the 33th bit) this is a false positive so resume the loop.
118	@ Otherwise, if lower part is also null then we are done.
1196:	bcs	5b
120	cmp	xl, #0
121	moveq	pc, lr
122
123	@ We still have remainer bits in the low part.  Bring them up.
124
125#if __LINUX_ARM_ARCH__ >= 5
126
127	clz	xh, xl			@ we know xh is zero here so...
128	add	xh, xh, #1
129	mov	xl, xl, lsl xh
130	mov	ip, ip, lsr xh
131
132#else
133
1347:	movs	xl, xl, lsl #1
135	mov	ip, ip, lsr #1
136	bcc	7b
137
138#endif
139
140	@ Current remainder is now 1.  It is worthless to compare with
141	@ divisor at this point since divisor can not be smaller than 3 here.
142	@ If possible, branch for another shift in the division loop.
143	@ If no bit position left then we are done.
144	movs	ip, ip, lsr #1
145	mov	xh, #1
146	bne	4b
147	mov	pc, lr
148
1498:	@ Division by a power of 2: determine what that divisor order is
150	@ then simply shift values around
151
152#if __LINUX_ARM_ARCH__ >= 5
153
154	clz	ip, r4
155	rsb	ip, ip, #31
156
157#else
158
159	mov	yl, r4
160	cmp	r4, #(1 << 16)
161	mov	ip, #0
162	movhs	yl, yl, lsr #16
163	movhs	ip, #16
164
165	cmp	yl, #(1 << 8)
166	movhs	yl, yl, lsr #8
167	addhs	ip, ip, #8
168
169	cmp	yl, #(1 << 4)
170	movhs	yl, yl, lsr #4
171	addhs	ip, ip, #4
172
173	cmp	yl, #(1 << 2)
174	addhi	ip, ip, #3
175	addls	ip, ip, yl, lsr #1
176
177#endif
178
179	mov	yh, xh, lsr ip
180	mov	yl, xl, lsr ip
181	rsb	ip, ip, #32
182 ARM(	orr	yl, yl, xh, lsl ip	)
183 THUMB(	lsl	xh, xh, ip		)
184 THUMB(	orr	yl, yl, xh		)
185	mov	xh, xl, lsl ip
186	mov	xh, xh, lsr ip
187	mov	pc, lr
188
189	@ eq -> division by 1: obvious enough...
1909:	moveq	yl, xl
191	moveq	yh, xh
192	moveq	xh, #0
193	moveq	pc, lr
194UNWIND(.fnend)
195
196UNWIND(.fnstart)
197UNWIND(.pad #4)
198UNWIND(.save {lr})
199Ldiv0_64:
200	@ Division by 0:
201	str	lr, [sp, #-8]!
202	bl	__div0
203
204	@ as wrong as it could be...
205	mov	yl, #0
206	mov	yh, #0
207	mov	xh, #0
208	ldr	pc, [sp], #8
209
210UNWIND(.fnend)
211ENDPROC(__do_div64)