polynom_Xsig.S - arch/x86/math-emu/polynom_Xsig.S - Linux diff v6.2

  1/* SPDX-License-Identifier: GPL-2.0 */
  2/*---------------------------------------------------------------------------+
  3 |  polynomial_Xsig.S                                                        |
  4 |                                                                           |
  5 | Fixed point arithmetic polynomial evaluation.                             |
  6 |                                                                           |
  7 | Copyright (C) 1992,1993,1994,1995                                         |
  8 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  9 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
 10 |                                                                           |
 11 | Call from C as:                                                           |
 12 |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
 13 |                        unsigned long long terms[], int n)                 |
 14 |                                                                           |
 15 | Computes:                                                                 |
 16 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
 17 | and adds the result to the 12 byte Xsig.                                  |
 18 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
 19 | precision.                                                                |
 20 |                                                                           |
 21 | This function must be used carefully: most overflow of intermediate       |
 22 | results is controlled, but overflow of the result is not.                 |
 23 |                                                                           |
 24 +---------------------------------------------------------------------------*/
 25	.file	"polynomial_Xsig.S"
 26
 27#include "fpu_emu.h"
 28
 29
 30#define	TERM_SIZE	$8
 31#define	SUM_MS		-20(%ebp)	/* sum ms long */
 32#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
 33#define	SUM_LS		-28(%ebp)	/* sum ls long */
 34#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
 35#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
 36#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
 37#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
 38
 39.text
 40SYM_FUNC_START(polynomial_Xsig)
 41	pushl	%ebp
 42	movl	%esp,%ebp
 43	subl	$32,%esp
 44	pushl	%esi
 45	pushl	%edi
 46	pushl	%ebx
 47
 48	movl	PARAM2,%esi		/* x */
 49	movl	PARAM3,%edi		/* terms */
 50
 51	movl	TERM_SIZE,%eax
 52	mull	PARAM4			/* n */
 53	addl	%eax,%edi
 54
 55	movl	4(%edi),%edx		/* terms[n] */
 56	movl	%edx,SUM_MS
 57	movl	(%edi),%edx		/* terms[n] */
 58	movl	%edx,SUM_MIDDLE
 59	xor	%eax,%eax
 60	movl	%eax,SUM_LS
 61	movb	%al,OVERFLOWED
 62
 63	subl	TERM_SIZE,%edi
 64	decl	PARAM4
 65	js	L_accum_done
 66
 67L_accum_loop:
 68	xor	%eax,%eax
 69	movl	%eax,ACCUM_MS
 70	movl	%eax,ACCUM_MIDDLE
 71
 72	movl	SUM_MIDDLE,%eax
 73	mull	(%esi)			/* x ls long */
 74	movl	%edx,ACCUM_LS
 75
 76	movl	SUM_MIDDLE,%eax
 77	mull	4(%esi)			/* x ms long */
 78	addl	%eax,ACCUM_LS
 79	adcl	%edx,ACCUM_MIDDLE
 80	adcl	$0,ACCUM_MS
 81
 82	movl	SUM_MS,%eax
 83	mull	(%esi)			/* x ls long */
 84	addl	%eax,ACCUM_LS
 85	adcl	%edx,ACCUM_MIDDLE
 86	adcl	$0,ACCUM_MS
 87
 88	movl	SUM_MS,%eax
 89	mull	4(%esi)			/* x ms long */
 90	addl	%eax,ACCUM_MIDDLE
 91	adcl	%edx,ACCUM_MS
 92
 93	testb	$0xff,OVERFLOWED
 94	jz	L_no_overflow
 95
 96	movl	(%esi),%eax
 97	addl	%eax,ACCUM_MIDDLE
 98	movl	4(%esi),%eax
 99	adcl	%eax,ACCUM_MS		/* This could overflow too */
100
101L_no_overflow:
102
103/*
104 * Now put the sum of next term and the accumulator
105 * into the sum register
106 */
107	movl	ACCUM_LS,%eax
108	addl	(%edi),%eax		/* term ls long */
109	movl	%eax,SUM_LS
110	movl	ACCUM_MIDDLE,%eax
111	adcl	(%edi),%eax		/* term ls long */
112	movl	%eax,SUM_MIDDLE
113	movl	ACCUM_MS,%eax
114	adcl	4(%edi),%eax		/* term ms long */
115	movl	%eax,SUM_MS
116	sbbb	%al,%al
117	movb	%al,OVERFLOWED		/* Used in the next iteration */
118
119	subl	TERM_SIZE,%edi
120	decl	PARAM4
121	jns	L_accum_loop
122
123L_accum_done:
124	movl	PARAM1,%edi		/* accum */
125	movl	SUM_LS,%eax
126	addl	%eax,(%edi)
127	movl	SUM_MIDDLE,%eax
128	adcl	%eax,4(%edi)
129	movl	SUM_MS,%eax
130	adcl	%eax,8(%edi)
131
132	popl	%ebx
133	popl	%edi
134	popl	%esi
135	leave
136	RET
137SYM_FUNC_END(polynomial_Xsig)

  1/* SPDX-License-Identifier: GPL-2.0 */
  2/*---------------------------------------------------------------------------+
  3 |  polynomial_Xsig.S                                                        |
  4 |                                                                           |
  5 | Fixed point arithmetic polynomial evaluation.                             |
  6 |                                                                           |
  7 | Copyright (C) 1992,1993,1994,1995                                         |
  8 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  9 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
 10 |                                                                           |
 11 | Call from C as:                                                           |
 12 |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
 13 |                        unsigned long long terms[], int n)                 |
 14 |                                                                           |
 15 | Computes:                                                                 |
 16 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
 17 | and adds the result to the 12 byte Xsig.                                  |
 18 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
 19 | precision.                                                                |
 20 |                                                                           |
 21 | This function must be used carefully: most overflow of intermediate       |
 22 | results is controlled, but overflow of the result is not.                 |
 23 |                                                                           |
 24 +---------------------------------------------------------------------------*/
 25	.file	"polynomial_Xsig.S"
 26
 27#include "fpu_emu.h"
 28
 29
 30#define	TERM_SIZE	$8
 31#define	SUM_MS		-20(%ebp)	/* sum ms long */
 32#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
 33#define	SUM_LS		-28(%ebp)	/* sum ls long */
 34#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
 35#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
 36#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
 37#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
 38
 39.text
 40SYM_FUNC_START(polynomial_Xsig)
 41	pushl	%ebp
 42	movl	%esp,%ebp
 43	subl	$32,%esp
 44	pushl	%esi
 45	pushl	%edi
 46	pushl	%ebx
 47
 48	movl	PARAM2,%esi		/* x */
 49	movl	PARAM3,%edi		/* terms */
 50
 51	movl	TERM_SIZE,%eax
 52	mull	PARAM4			/* n */
 53	addl	%eax,%edi
 54
 55	movl	4(%edi),%edx		/* terms[n] */
 56	movl	%edx,SUM_MS
 57	movl	(%edi),%edx		/* terms[n] */
 58	movl	%edx,SUM_MIDDLE
 59	xor	%eax,%eax
 60	movl	%eax,SUM_LS
 61	movb	%al,OVERFLOWED
 62
 63	subl	TERM_SIZE,%edi
 64	decl	PARAM4
 65	js	L_accum_done
 66
 67L_accum_loop:
 68	xor	%eax,%eax
 69	movl	%eax,ACCUM_MS
 70	movl	%eax,ACCUM_MIDDLE
 71
 72	movl	SUM_MIDDLE,%eax
 73	mull	(%esi)			/* x ls long */
 74	movl	%edx,ACCUM_LS
 75
 76	movl	SUM_MIDDLE,%eax
 77	mull	4(%esi)			/* x ms long */
 78	addl	%eax,ACCUM_LS
 79	adcl	%edx,ACCUM_MIDDLE
 80	adcl	$0,ACCUM_MS
 81
 82	movl	SUM_MS,%eax
 83	mull	(%esi)			/* x ls long */
 84	addl	%eax,ACCUM_LS
 85	adcl	%edx,ACCUM_MIDDLE
 86	adcl	$0,ACCUM_MS
 87
 88	movl	SUM_MS,%eax
 89	mull	4(%esi)			/* x ms long */
 90	addl	%eax,ACCUM_MIDDLE
 91	adcl	%edx,ACCUM_MS
 92
 93	testb	$0xff,OVERFLOWED
 94	jz	L_no_overflow
 95
 96	movl	(%esi),%eax
 97	addl	%eax,ACCUM_MIDDLE
 98	movl	4(%esi),%eax
 99	adcl	%eax,ACCUM_MS		/* This could overflow too */
100
101L_no_overflow:
102
103/*
104 * Now put the sum of next term and the accumulator
105 * into the sum register
106 */
107	movl	ACCUM_LS,%eax
108	addl	(%edi),%eax		/* term ls long */
109	movl	%eax,SUM_LS
110	movl	ACCUM_MIDDLE,%eax
111	adcl	(%edi),%eax		/* term ls long */
112	movl	%eax,SUM_MIDDLE
113	movl	ACCUM_MS,%eax
114	adcl	4(%edi),%eax		/* term ms long */
115	movl	%eax,SUM_MS
116	sbbb	%al,%al
117	movb	%al,OVERFLOWED		/* Used in the next iteration */
118
119	subl	TERM_SIZE,%edi
120	decl	PARAM4
121	jns	L_accum_loop
122
123L_accum_done:
124	movl	PARAM1,%edi		/* accum */
125	movl	SUM_LS,%eax
126	addl	%eax,(%edi)
127	movl	SUM_MIDDLE,%eax
128	adcl	%eax,4(%edi)
129	movl	SUM_MS,%eax
130	adcl	%eax,8(%edi)
131
132	popl	%ebx
133	popl	%edi
134	popl	%esi
135	leave
136	RET
137SYM_FUNC_END(polynomial_Xsig)