div64.S - arch/arm/lib/div64.S - Linux diff v6.13.7

  1/* SPDX-License-Identifier: GPL-2.0-only */
  2/*
  3 *  linux/arch/arm/lib/div64.S
  4 *
  5 *  Optimized computation of 64-bit dividend / 32-bit divisor
  6 *
  7 *  Author:	Nicolas Pitre
  8 *  Created:	Oct 5, 2003
  9 *  Copyright:	Monta Vista Software, Inc.
 10 */
 11
 12#include <linux/linkage.h>
 13#include <asm/assembler.h>
 14#include <asm/unwind.h>
 15
 16#ifdef __ARMEB__
 17#define xh r0
 18#define xl r1
 19#define yh r2
 20#define yl r3
 21#else
 22#define xl r0
 23#define xh r1
 24#define yl r2
 25#define yh r3
 26#endif
 27
 28/*
 29 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
 30 *
 31 * Note: Calling convention is totally non standard for optimal code.
 32 *       This is meant to be used by do_div() from include/asm/div64.h only.
 33 *
 34 * Input parameters:
 35 * 	xh-xl	= dividend (clobbered)
 36 * 	r4	= divisor (preserved)
 37 *
 38 * Output values:
 39 * 	yh-yl	= result
 40 * 	xh	= remainder
 41 *
 42 * Clobbered regs: xl, ip
 43 */
 44
 45ENTRY(__do_div64)
 46UNWIND(.fnstart)
 47
 48	@ Test for easy paths first.
 49	subs	ip, r4, #1
 50	bls	9f			@ divisor is 0 or 1
 51	tst	ip, r4
 52	beq	8f			@ divisor is power of 2
 53
 54	@ See if we need to handle upper 32-bit result.
 55	cmp	xh, r4
 56	mov	yh, #0
 57	blo	3f
 58
 59	@ Align divisor with upper part of dividend.
 60	@ The aligned divisor is stored in yl preserving the original.
 61	@ The bit position is stored in ip.
 62
 63#if __LINUX_ARM_ARCH__ >= 5
 64
 65	clz	yl, r4
 66	clz	ip, xh
 67	sub	yl, yl, ip
 68	mov	ip, #1
 69	mov	ip, ip, lsl yl
 70	mov	yl, r4, lsl yl
 71
 72#else
 73
 74	mov	yl, r4
 75	mov	ip, #1
 761:	cmp	yl, #0x80000000
 77	cmpcc	yl, xh
 78	movcc	yl, yl, lsl #1
 79	movcc	ip, ip, lsl #1
 80	bcc	1b
 81
 82#endif
 83
 84	@ The division loop for needed upper bit positions.
 85 	@ Break out early if dividend reaches 0.
 862:	cmp	xh, yl
 87	orrcs	yh, yh, ip
 88	subscs	xh, xh, yl
 89	movsne	ip, ip, lsr #1
 90	mov	yl, yl, lsr #1
 91	bne	2b
 92
 93	@ See if we need to handle lower 32-bit result.
 943:	cmp	xh, #0
 95	mov	yl, #0
 96	cmpeq	xl, r4
 97	movlo	xh, xl
 98	retlo	lr
 99
100	@ The division loop for lower bit positions.
101	@ Here we shift remainer bits leftwards rather than moving the
102	@ divisor for comparisons, considering the carry-out bit as well.
103	mov	ip, #0x80000000
1044:	movs	xl, xl, lsl #1
105	adcs	xh, xh, xh
106	beq	6f
107	cmpcc	xh, r4
1085:	orrcs	yl, yl, ip
109	subcs	xh, xh, r4
110	movs	ip, ip, lsr #1
111	bne	4b
112	ret	lr
113
114	@ The top part of remainder became zero.  If carry is set
115	@ (the 33th bit) this is a false positive so resume the loop.
116	@ Otherwise, if lower part is also null then we are done.
1176:	bcs	5b
118	cmp	xl, #0
119	reteq	lr
120
121	@ We still have remainer bits in the low part.  Bring them up.
122
123#if __LINUX_ARM_ARCH__ >= 5
124
125	clz	xh, xl			@ we know xh is zero here so...
126	add	xh, xh, #1
127	mov	xl, xl, lsl xh
128	mov	ip, ip, lsr xh
129
130#else
131
1327:	movs	xl, xl, lsl #1
133	mov	ip, ip, lsr #1
134	bcc	7b
135
136#endif
137
138	@ Current remainder is now 1.  It is worthless to compare with
139	@ divisor at this point since divisor can not be smaller than 3 here.
140	@ If possible, branch for another shift in the division loop.
141	@ If no bit position left then we are done.
142	movs	ip, ip, lsr #1
143	mov	xh, #1
144	bne	4b
145	ret	lr
146
1478:	@ Division by a power of 2: determine what that divisor order is
148	@ then simply shift values around
149
150#if __LINUX_ARM_ARCH__ >= 5
151
152	clz	ip, r4
153	rsb	ip, ip, #31
154
155#else
156
157	mov	yl, r4
158	cmp	r4, #(1 << 16)
159	mov	ip, #0
160	movhs	yl, yl, lsr #16
161	movhs	ip, #16
162
163	cmp	yl, #(1 << 8)
164	movhs	yl, yl, lsr #8
165	addhs	ip, ip, #8
166
167	cmp	yl, #(1 << 4)
168	movhs	yl, yl, lsr #4
169	addhs	ip, ip, #4
170
171	cmp	yl, #(1 << 2)
172	addhi	ip, ip, #3
173	addls	ip, ip, yl, lsr #1
174
175#endif
176
177	mov	yh, xh, lsr ip
178	mov	yl, xl, lsr ip
179	rsb	ip, ip, #32
180 ARM(	orr	yl, yl, xh, lsl ip	)
181 THUMB(	lsl	xh, xh, ip		)
182 THUMB(	orr	yl, yl, xh		)
183	mov	xh, xl, lsl ip
184	mov	xh, xh, lsr ip
185	ret	lr
186
187	@ eq -> division by 1: obvious enough...
1889:	moveq	yl, xl
189	moveq	yh, xh
190	moveq	xh, #0
191	reteq	lr
192UNWIND(.fnend)
193
194UNWIND(.fnstart)
195UNWIND(.pad #4)
196UNWIND(.save {lr})
197Ldiv0_64:
198	@ Division by 0:
199	str	lr, [sp, #-8]!
200	bl	__div0
201
202	@ as wrong as it could be...
203	mov	yl, #0
204	mov	yh, #0
205	mov	xh, #0
206	ldr	pc, [sp], #8
207
208UNWIND(.fnend)
209ENDPROC(__do_div64)

  1/* SPDX-License-Identifier: GPL-2.0-only */
  2/*
  3 *  linux/arch/arm/lib/div64.S
  4 *
  5 *  Optimized computation of 64-bit dividend / 32-bit divisor
  6 *
  7 *  Author:	Nicolas Pitre
  8 *  Created:	Oct 5, 2003
  9 *  Copyright:	Monta Vista Software, Inc.
 10 */
 11
 12#include <linux/linkage.h>
 13#include <asm/assembler.h>
 14#include <asm/unwind.h>
 15
 16#ifdef __ARMEB__
 17#define xh r0
 18#define xl r1
 19#define yh r2
 20#define yl r3
 21#else
 22#define xl r0
 23#define xh r1
 24#define yl r2
 25#define yh r3
 26#endif
 27
 28/*
 29 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
 30 *
 31 * Note: Calling convention is totally non standard for optimal code.
 32 *       This is meant to be used by do_div() from include/asm/div64.h only.
 33 *
 34 * Input parameters:
 35 * 	xh-xl	= dividend (clobbered)
 36 * 	r4	= divisor (preserved)
 37 *
 38 * Output values:
 39 * 	yh-yl	= result
 40 * 	xh	= remainder
 41 *
 42 * Clobbered regs: xl, ip
 43 */
 44
 45ENTRY(__do_div64)
 46UNWIND(.fnstart)
 47
 48	@ Test for easy paths first.
 49	subs	ip, r4, #1
 50	bls	9f			@ divisor is 0 or 1
 51	tst	ip, r4
 52	beq	8f			@ divisor is power of 2
 53
 54	@ See if we need to handle upper 32-bit result.
 55	cmp	xh, r4
 56	mov	yh, #0
 57	blo	3f
 58
 59	@ Align divisor with upper part of dividend.
 60	@ The aligned divisor is stored in yl preserving the original.
 61	@ The bit position is stored in ip.
 62
 63#if __LINUX_ARM_ARCH__ >= 5
 64
 65	clz	yl, r4
 66	clz	ip, xh
 67	sub	yl, yl, ip
 68	mov	ip, #1
 69	mov	ip, ip, lsl yl
 70	mov	yl, r4, lsl yl
 71
 72#else
 73
 74	mov	yl, r4
 75	mov	ip, #1
 761:	cmp	yl, #0x80000000
 77	cmpcc	yl, xh
 78	movcc	yl, yl, lsl #1
 79	movcc	ip, ip, lsl #1
 80	bcc	1b
 81
 82#endif
 83
 84	@ The division loop for needed upper bit positions.
 85 	@ Break out early if dividend reaches 0.
 862:	cmp	xh, yl
 87	orrcs	yh, yh, ip
 88	subscs	xh, xh, yl
 89	movsne	ip, ip, lsr #1
 90	mov	yl, yl, lsr #1
 91	bne	2b
 92
 93	@ See if we need to handle lower 32-bit result.
 943:	cmp	xh, #0
 95	mov	yl, #0
 96	cmpeq	xl, r4
 97	movlo	xh, xl
 98	retlo	lr
 99
100	@ The division loop for lower bit positions.
101	@ Here we shift remainer bits leftwards rather than moving the
102	@ divisor for comparisons, considering the carry-out bit as well.
103	mov	ip, #0x80000000
1044:	movs	xl, xl, lsl #1
105	adcs	xh, xh, xh
106	beq	6f
107	cmpcc	xh, r4
1085:	orrcs	yl, yl, ip
109	subcs	xh, xh, r4
110	movs	ip, ip, lsr #1
111	bne	4b
112	ret	lr
113
114	@ The top part of remainder became zero.  If carry is set
115	@ (the 33th bit) this is a false positive so resume the loop.
116	@ Otherwise, if lower part is also null then we are done.
1176:	bcs	5b
118	cmp	xl, #0
119	reteq	lr
120
121	@ We still have remainer bits in the low part.  Bring them up.
122
123#if __LINUX_ARM_ARCH__ >= 5
124
125	clz	xh, xl			@ we know xh is zero here so...
126	add	xh, xh, #1
127	mov	xl, xl, lsl xh
128	mov	ip, ip, lsr xh
129
130#else
131
1327:	movs	xl, xl, lsl #1
133	mov	ip, ip, lsr #1
134	bcc	7b
135
136#endif
137
138	@ Current remainder is now 1.  It is worthless to compare with
139	@ divisor at this point since divisor can not be smaller than 3 here.
140	@ If possible, branch for another shift in the division loop.
141	@ If no bit position left then we are done.
142	movs	ip, ip, lsr #1
143	mov	xh, #1
144	bne	4b
145	ret	lr
146
1478:	@ Division by a power of 2: determine what that divisor order is
148	@ then simply shift values around
149
150#if __LINUX_ARM_ARCH__ >= 5
151
152	clz	ip, r4
153	rsb	ip, ip, #31
154
155#else
156
157	mov	yl, r4
158	cmp	r4, #(1 << 16)
159	mov	ip, #0
160	movhs	yl, yl, lsr #16
161	movhs	ip, #16
162
163	cmp	yl, #(1 << 8)
164	movhs	yl, yl, lsr #8
165	addhs	ip, ip, #8
166
167	cmp	yl, #(1 << 4)
168	movhs	yl, yl, lsr #4
169	addhs	ip, ip, #4
170
171	cmp	yl, #(1 << 2)
172	addhi	ip, ip, #3
173	addls	ip, ip, yl, lsr #1
174
175#endif
176
177	mov	yh, xh, lsr ip
178	mov	yl, xl, lsr ip
179	rsb	ip, ip, #32
180 ARM(	orr	yl, yl, xh, lsl ip	)
181 THUMB(	lsl	xh, xh, ip		)
182 THUMB(	orr	yl, yl, xh		)
183	mov	xh, xl, lsl ip
184	mov	xh, xh, lsr ip
185	ret	lr
186
187	@ eq -> division by 1: obvious enough...
1889:	moveq	yl, xl
189	moveq	yh, xh
190	moveq	xh, #0
191	reteq	lr
192UNWIND(.fnend)
193
194UNWIND(.fnstart)
195UNWIND(.pad #4)
196UNWIND(.save {lr})
197Ldiv0_64:
198	@ Division by 0:
199	str	lr, [sp, #-8]!
200	bl	__div0
201
202	@ as wrong as it could be...
203	mov	yl, #0
204	mov	yh, #0
205	mov	xh, #0
206	ldr	pc, [sp], #8
207
208UNWIND(.fnend)
209ENDPROC(__do_div64)