reg_round.S - arch/x86/math-emu/reg_round.S - Linux diff v3.1

 
  1	.file "reg_round.S"
  2/*---------------------------------------------------------------------------+
  3 |  reg_round.S                                                              |
  4 |                                                                           |
  5 | Rounding/truncation/etc for FPU basic arithmetic functions.               |
  6 |                                                                           |
  7 | Copyright (C) 1993,1995,1997                                              |
  8 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  9 |                       Australia.  E-mail billm@suburbia.net               |
 10 |                                                                           |
 11 | This code has four possible entry points.                                 |
 12 | The following must be entered by a jmp instruction:                       |
 13 |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
 14 |                                                                           |
 15 | The FPU_round entry point is intended to be used by C code.               |
 16 | From C, call as:                                                          |
 17 |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
 18 |                                                                           |
 19 |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
 20 |    one was raised, or -1 on internal error.                               |
 21 |                                                                           |
 22 | For correct "up" and "down" rounding, the argument must have the correct  |
 23 | sign.                                                                     |
 24 |                                                                           |
 25 +---------------------------------------------------------------------------*/
 26
 27/*---------------------------------------------------------------------------+
 28 | Four entry points.                                                        |
 29 |                                                                           |
 30 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
 31 |  %eax:%ebx  64 bit significand                                            |
 32 |  %edx       32 bit extension of the significand                           |
 33 |  %edi       pointer to an FPU_REG for the result to be stored             |
 34 |  stack      calling function must have set up a C stack frame and         |
 35 |             pushed %esi, %edi, and %ebx                                   |
 36 |                                                                           |
 37 | Needed just for the fpu_reg_round_sqrt entry point:                       |
 38 |  %cx  A control word in the same format as the FPU control word.          |
 39 | Otherwise, PARAM4 must give such a value.                                 |
 40 |                                                                           |
 41 |                                                                           |
 42 | The significand and its extension are assumed to be exact in the          |
 43 | following sense:                                                          |
 44 |   If the significand by itself is the exact result then the significand   |
 45 |   extension (%edx) must contain 0, otherwise the significand extension    |
 46 |   must be non-zero.                                                       |
 47 |   If the significand extension is non-zero then the significand is        |
 48 |   smaller than the magnitude of the correct exact result by an amount     |
 49 |   greater than zero and less than one ls bit of the significand.          |
 50 |   The significand extension is only required to have three possible       |
 51 |   non-zero values:                                                        |
 52 |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
 53 |                                 bit smaller than the magnitude of the     |
 54 |                                 true exact result.                        |
 55 |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
 56 |                                 smaller than the magnitude of the true    |
 57 |                                 exact result.                             |
 58 |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
 59 |                                 bit smaller than the magnitude of the     |
 60 |                                 true exact result.                        |
 61 |                                                                           |
 62 +---------------------------------------------------------------------------*/
 63
 64/*---------------------------------------------------------------------------+
 65 |  The code in this module has become quite complex, but it should handle   |
 66 |  all of the FPU flags which are set at this stage of the basic arithmetic |
 67 |  computations.                                                            |
 68 |  There are a few rare cases where the results are not set identically to  |
 69 |  a real FPU. These require a bit more thought because at this stage the   |
 70 |  results of the code here appear to be more consistent...                 |
 71 |  This may be changed in a future version.                                 |
 72 +---------------------------------------------------------------------------*/
 73
 74
 75#include "fpu_emu.h"
 76#include "exception.h"
 77#include "control_w.h"
 78
 79/* Flags for FPU_bits_lost */
 80#define	LOST_DOWN	$1
 81#define	LOST_UP		$2
 82
 83/* Flags for FPU_denormal */
 84#define	DENORMAL	$1
 85#define	UNMASKED_UNDERFLOW $2
 86
 87
 88#ifndef NON_REENTRANT_FPU
 89/*	Make the code re-entrant by putting
 90	local storage on the stack: */
 91#define FPU_bits_lost	(%esp)
 92#define FPU_denormal	1(%esp)
 93
 94#else
 95/*	Not re-entrant, so we can gain speed by putting
 96	local storage in a static area: */
 97.data
 98	.align 4,0
 99FPU_bits_lost:
100	.byte	0
101FPU_denormal:
102	.byte	0
103#endif /* NON_REENTRANT_FPU */
104
105
106.text
107.globl fpu_reg_round
108.globl fpu_Arith_exit
109
110/* Entry point when called from C */
111ENTRY(FPU_round)
112	pushl	%ebp
113	movl	%esp,%ebp
114	pushl	%esi
115	pushl	%edi
116	pushl	%ebx
117
118	movl	PARAM1,%edi
119	movl	SIGH(%edi),%eax
120	movl	SIGL(%edi),%ebx
121	movl	PARAM2,%edx
122
123fpu_reg_round:			/* Normal entry point */
124	movl	PARAM4,%ecx
125
126#ifndef NON_REENTRANT_FPU
127	pushl	%ebx		/* adjust the stack pointer */
128#endif /* NON_REENTRANT_FPU */ 
129
130#ifdef PARANOID
131/* Cannot use this here yet */
132/*	orl	%eax,%eax */
133/*	jns	L_entry_bugged */
134#endif /* PARANOID */
135
136	cmpw	EXP_UNDER,EXP(%edi)
137	jle	L_Make_denorm			/* The number is a de-normal */
138
139	movb	$0,FPU_denormal			/* 0 -> not a de-normal */
140
141Denorm_done:
142	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */
143
144	movl	%ecx,%esi
145	andl	CW_PC,%ecx
146	cmpl	PR_64_BITS,%ecx
147	je	LRound_To_64
148
149	cmpl	PR_53_BITS,%ecx
150	je	LRound_To_53
151
152	cmpl	PR_24_BITS,%ecx
153	je	LRound_To_24
154
155#ifdef PECULIAR_486
156/* With the precision control bits set to 01 "(reserved)", a real 80486
157   behaves as if the precision control bits were set to 11 "64 bits" */
158	cmpl	PR_RESERVED_BITS,%ecx
159	je	LRound_To_64
160#ifdef PARANOID
161	jmp	L_bugged_denorm_486
162#endif /* PARANOID */ 
163#else
164#ifdef PARANOID
165	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
166#endif /* PARANOID */ 
167#endif /* PECULIAR_486 */
168
169
170/* Round etc to 24 bit precision */
171LRound_To_24:
172	movl	%esi,%ecx
173	andl	CW_RC,%ecx
174	cmpl	RC_RND,%ecx
175	je	LRound_nearest_24
176
177	cmpl	RC_CHOP,%ecx
178	je	LCheck_truncate_24
179
180	cmpl	RC_UP,%ecx		/* Towards +infinity */
181	je	LUp_24
182
183	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
184	je	LDown_24
185
186#ifdef PARANOID
187	jmp	L_bugged_round24
188#endif /* PARANOID */ 
189
190LUp_24:
191	cmpb	SIGN_POS,PARAM5
192	jne	LCheck_truncate_24	/* If negative then  up==truncate */
193
194	jmp	LCheck_24_round_up
195
196LDown_24:
197	cmpb	SIGN_POS,PARAM5
198	je	LCheck_truncate_24	/* If positive then  down==truncate */
199
200LCheck_24_round_up:
201	movl	%eax,%ecx
202	andl	$0x000000ff,%ecx
203	orl	%ebx,%ecx
204	orl	%edx,%ecx
205	jnz	LDo_24_round_up
206	jmp	L_Re_normalise
207
208LRound_nearest_24:
209	/* Do rounding of the 24th bit if needed (nearest or even) */
210	movl	%eax,%ecx
211	andl	$0x000000ff,%ecx
212	cmpl	$0x00000080,%ecx
213	jc	LCheck_truncate_24	/* less than half, no increment needed */
214
215	jne	LGreater_Half_24	/* greater than half, increment needed */
216
217	/* Possibly half, we need to check the ls bits */
218	orl	%ebx,%ebx
219	jnz	LGreater_Half_24	/* greater than half, increment needed */
220
221	orl	%edx,%edx
222	jnz	LGreater_Half_24	/* greater than half, increment needed */
223
224	/* Exactly half, increment only if 24th bit is 1 (round to even) */
225	testl	$0x00000100,%eax
226	jz	LDo_truncate_24
227
228LGreater_Half_24:			/* Rounding: increment at the 24th bit */
229LDo_24_round_up:
230	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
231	xorl	%ebx,%ebx
232	movb	LOST_UP,FPU_bits_lost
233	addl	$0x00000100,%eax
234	jmp	LCheck_Round_Overflow
235
236LCheck_truncate_24:
237	movl	%eax,%ecx
238	andl	$0x000000ff,%ecx
239	orl	%ebx,%ecx
240	orl	%edx,%ecx
241	jz	L_Re_normalise		/* No truncation needed */
242
243LDo_truncate_24:
244	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
245	xorl	%ebx,%ebx
246	movb	LOST_DOWN,FPU_bits_lost
247	jmp	L_Re_normalise
248
249
250/* Round etc to 53 bit precision */
251LRound_To_53:
252	movl	%esi,%ecx
253	andl	CW_RC,%ecx
254	cmpl	RC_RND,%ecx
255	je	LRound_nearest_53
256
257	cmpl	RC_CHOP,%ecx
258	je	LCheck_truncate_53
259
260	cmpl	RC_UP,%ecx		/* Towards +infinity */
261	je	LUp_53
262
263	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
264	je	LDown_53
265
266#ifdef PARANOID
267	jmp	L_bugged_round53
268#endif /* PARANOID */ 
269
270LUp_53:
271	cmpb	SIGN_POS,PARAM5
272	jne	LCheck_truncate_53	/* If negative then  up==truncate */
273
274	jmp	LCheck_53_round_up
275
276LDown_53:
277	cmpb	SIGN_POS,PARAM5
278	je	LCheck_truncate_53	/* If positive then  down==truncate */
279
280LCheck_53_round_up:
281	movl	%ebx,%ecx
282	andl	$0x000007ff,%ecx
283	orl	%edx,%ecx
284	jnz	LDo_53_round_up
285	jmp	L_Re_normalise
286
287LRound_nearest_53:
288	/* Do rounding of the 53rd bit if needed (nearest or even) */
289	movl	%ebx,%ecx
290	andl	$0x000007ff,%ecx
291	cmpl	$0x00000400,%ecx
292	jc	LCheck_truncate_53	/* less than half, no increment needed */
293
294	jnz	LGreater_Half_53	/* greater than half, increment needed */
295
296	/* Possibly half, we need to check the ls bits */
297	orl	%edx,%edx
298	jnz	LGreater_Half_53	/* greater than half, increment needed */
299
300	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
301	testl	$0x00000800,%ebx
302	jz	LTruncate_53
303
304LGreater_Half_53:			/* Rounding: increment at the 53rd bit */
305LDo_53_round_up:
306	movb	LOST_UP,FPU_bits_lost
307	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
308	addl	$0x00000800,%ebx
309	adcl	$0,%eax
310	jmp	LCheck_Round_Overflow
311
312LCheck_truncate_53:
313	movl	%ebx,%ecx
314	andl	$0x000007ff,%ecx
315	orl	%edx,%ecx
316	jz	L_Re_normalise
317
318LTruncate_53:
319	movb	LOST_DOWN,FPU_bits_lost
320	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
321	jmp	L_Re_normalise
322
323
324/* Round etc to 64 bit precision */
325LRound_To_64:
326	movl	%esi,%ecx
327	andl	CW_RC,%ecx
328	cmpl	RC_RND,%ecx
329	je	LRound_nearest_64
330
331	cmpl	RC_CHOP,%ecx
332	je	LCheck_truncate_64
333
334	cmpl	RC_UP,%ecx		/* Towards +infinity */
335	je	LUp_64
336
337	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
338	je	LDown_64
339
340#ifdef PARANOID
341	jmp	L_bugged_round64
342#endif /* PARANOID */ 
343
344LUp_64:
345	cmpb	SIGN_POS,PARAM5
346	jne	LCheck_truncate_64	/* If negative then  up==truncate */
347
348	orl	%edx,%edx
349	jnz	LDo_64_round_up
350	jmp	L_Re_normalise
351
352LDown_64:
353	cmpb	SIGN_POS,PARAM5
354	je	LCheck_truncate_64	/* If positive then  down==truncate */
355
356	orl	%edx,%edx
357	jnz	LDo_64_round_up
358	jmp	L_Re_normalise
359
360LRound_nearest_64:
361	cmpl	$0x80000000,%edx
362	jc	LCheck_truncate_64
363
364	jne	LDo_64_round_up
365
366	/* Now test for round-to-even */
367	testb	$1,%bl
368	jz	LCheck_truncate_64
369
370LDo_64_round_up:
371	movb	LOST_UP,FPU_bits_lost
372	addl	$1,%ebx
373	adcl	$0,%eax
374
375LCheck_Round_Overflow:
376	jnc	L_Re_normalise
377
378	/* Overflow, adjust the result (significand to 1.0) */
379	rcrl	$1,%eax
380	rcrl	$1,%ebx
381	incw	EXP(%edi)
382	jmp	L_Re_normalise
383
384LCheck_truncate_64:
385	orl	%edx,%edx
386	jz	L_Re_normalise
387
388LTruncate_64:
389	movb	LOST_DOWN,FPU_bits_lost
390
391L_Re_normalise:
392	testb	$0xff,FPU_denormal
393	jnz	Normalise_result
394
395L_Normalised:
396	movl	TAG_Valid,%edx
397
398L_deNormalised:
399	cmpb	LOST_UP,FPU_bits_lost
400	je	L_precision_lost_up
401
402	cmpb	LOST_DOWN,FPU_bits_lost
403	je	L_precision_lost_down
404
405L_no_precision_loss:
406	/* store the result */
407
408L_Store_significand:
409	movl	%eax,SIGH(%edi)
410	movl	%ebx,SIGL(%edi)
411
412	cmpw	EXP_OVER,EXP(%edi)
413	jge	L_overflow
414
415	movl	%edx,%eax
416
417	/* Convert the exponent to 80x87 form. */
418	addw	EXTENDED_Ebias,EXP(%edi)
419	andw	$0x7fff,EXP(%edi)
420
421fpu_reg_round_signed_special_exit:
422
423	cmpb	SIGN_POS,PARAM5
424	je	fpu_reg_round_special_exit
425
426	orw	$0x8000,EXP(%edi)	/* Negative sign for the result. */
427
428fpu_reg_round_special_exit:
429
430#ifndef NON_REENTRANT_FPU
431	popl	%ebx		/* adjust the stack pointer */
432#endif /* NON_REENTRANT_FPU */ 
433
434fpu_Arith_exit:
435	popl	%ebx
436	popl	%edi
437	popl	%esi
438	leave
439	ret
440
441
442/*
443 * Set the FPU status flags to represent precision loss due to
444 * round-up.
445 */
446L_precision_lost_up:
447	push	%edx
448	push	%eax
449	call	set_precision_flag_up
450	popl	%eax
451	popl	%edx
452	jmp	L_no_precision_loss
453
454/*
455 * Set the FPU status flags to represent precision loss due to
456 * truncation.
457 */
458L_precision_lost_down:
459	push	%edx
460	push	%eax
461	call	set_precision_flag_down
462	popl	%eax
463	popl	%edx
464	jmp	L_no_precision_loss
465
466
467/*
468 * The number is a denormal (which might get rounded up to a normal)
469 * Shift the number right the required number of bits, which will
470 * have to be undone later...
471 */
472L_Make_denorm:
473	/* The action to be taken depends upon whether the underflow
474	   exception is masked */
475	testb	CW_Underflow,%cl		/* Underflow mask. */
476	jz	Unmasked_underflow		/* Do not make a denormal. */
477
478	movb	DENORMAL,FPU_denormal
479
480	pushl	%ecx		/* Save */
481	movw	EXP_UNDER+1,%cx
482	subw	EXP(%edi),%cx
483
484	cmpw	$64,%cx	/* shrd only works for 0..31 bits */
485	jnc	Denorm_shift_more_than_63
486
487	cmpw	$32,%cx	/* shrd only works for 0..31 bits */
488	jnc	Denorm_shift_more_than_32
489
490/*
491 * We got here without jumps by assuming that the most common requirement
492 *   is for a small de-normalising shift.
493 * Shift by [1..31] bits
494 */
495	addw	%cx,EXP(%edi)
496	orl	%edx,%edx	/* extension */
497	setne	%ch		/* Save whether %edx is non-zero */
498	xorl	%edx,%edx
499	shrd	%cl,%ebx,%edx
500	shrd	%cl,%eax,%ebx
501	shr	%cl,%eax
502	orb	%ch,%dl
503	popl	%ecx
504	jmp	Denorm_done
505
506/* Shift by [32..63] bits */
507Denorm_shift_more_than_32:
508	addw	%cx,EXP(%edi)
509	subb	$32,%cl
510	orl	%edx,%edx
511	setne	%ch
512	orb	%ch,%bl
513	xorl	%edx,%edx
514	shrd	%cl,%ebx,%edx
515	shrd	%cl,%eax,%ebx
516	shr	%cl,%eax
517	orl	%edx,%edx		/* test these 32 bits */
518	setne	%cl
519	orb	%ch,%bl
520	orb	%cl,%bl
521	movl	%ebx,%edx
522	movl	%eax,%ebx
523	xorl	%eax,%eax
524	popl	%ecx
525	jmp	Denorm_done
526
527/* Shift by [64..) bits */
528Denorm_shift_more_than_63:
529	cmpw	$64,%cx
530	jne	Denorm_shift_more_than_64
531
532/* Exactly 64 bit shift */
533	addw	%cx,EXP(%edi)
534	xorl	%ecx,%ecx
535	orl	%edx,%edx
536	setne	%cl
537	orl	%ebx,%ebx
538	setne	%ch
539	orb	%ch,%cl
540	orb	%cl,%al
541	movl	%eax,%edx
542	xorl	%eax,%eax
543	xorl	%ebx,%ebx
544	popl	%ecx
545	jmp	Denorm_done
546
547Denorm_shift_more_than_64:
548	movw	EXP_UNDER+1,EXP(%edi)
549/* This is easy, %eax must be non-zero, so.. */
550	movl	$1,%edx
551	xorl	%eax,%eax
552	xorl	%ebx,%ebx
553	popl	%ecx
554	jmp	Denorm_done
555
556
557Unmasked_underflow:
558	movb	UNMASKED_UNDERFLOW,FPU_denormal
559	jmp	Denorm_done
560
561
562/* Undo the de-normalisation. */
563Normalise_result:
564	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
565	je	Signal_underflow
566
567/* The number must be a denormal if we got here. */
568#ifdef PARANOID
569	/* But check it... just in case. */
570	cmpw	EXP_UNDER+1,EXP(%edi)
571	jne	L_norm_bugged
572#endif /* PARANOID */
573
574#ifdef PECULIAR_486
575	/*
576	 * This implements a special feature of 80486 behaviour.
577	 * Underflow will be signalled even if the number is
578	 * not a denormal after rounding.
579	 * This difference occurs only for masked underflow, and not
580	 * in the unmasked case.
581	 * Actual 80486 behaviour differs from this in some circumstances.
582	 */
583	orl	%eax,%eax		/* ms bits */
584	js	LPseudoDenormal		/* Will be masked underflow */
585#else
586	orl	%eax,%eax		/* ms bits */
587	js	L_Normalised		/* No longer a denormal */
588#endif /* PECULIAR_486 */ 
589
590	jnz	LDenormal_adj_exponent
591
592	orl	%ebx,%ebx
593	jz	L_underflow_to_zero	/* The contents are zero */
594
595LDenormal_adj_exponent:
596	decw	EXP(%edi)
597
598LPseudoDenormal:
599	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
600	movl	TAG_Special,%edx
601	jz	L_deNormalised
602
603	/* There must be a masked underflow */
604	push	%eax
605	pushl	EX_Underflow
606	call	EXCEPTION
607	popl	%eax
608	popl	%eax
609	movl	TAG_Special,%edx
610	jmp	L_deNormalised
611
612
613/*
614 * The operations resulted in a number too small to represent.
615 * Masked response.
616 */
617L_underflow_to_zero:
618	push	%eax
619	call	set_precision_flag_down
620	popl	%eax
621
622	push	%eax
623	pushl	EX_Underflow
624	call	EXCEPTION
625	popl	%eax
626	popl	%eax
627
628/* Reduce the exponent to EXP_UNDER */
629	movw	EXP_UNDER,EXP(%edi)
630	movl	TAG_Zero,%edx
631	jmp	L_Store_significand
632
633
634/* The operations resulted in a number too large to represent. */
635L_overflow:
636	addw	EXTENDED_Ebias,EXP(%edi)	/* Set for unmasked response. */
637	push	%edi
638	call	arith_overflow
639	pop	%edi
640	jmp	fpu_reg_round_signed_special_exit
641
642
643Signal_underflow:
644	/* The number may have been changed to a non-denormal */
645	/* by the rounding operations. */
646	cmpw	EXP_UNDER,EXP(%edi)
647	jle	Do_unmasked_underflow
648
649	jmp	L_Normalised
650
651Do_unmasked_underflow:
652	/* Increase the exponent by the magic number */
653	addw	$(3*(1<<13)),EXP(%edi)
654	push	%eax
655	pushl	EX_Underflow
656	call	EXCEPTION
657	popl	%eax
658	popl	%eax
659	jmp	L_Normalised
660
661
662#ifdef PARANOID
663#ifdef PECULIAR_486
664L_bugged_denorm_486:
665	pushl	EX_INTERNAL|0x236
666	call	EXCEPTION
667	popl	%ebx
668	jmp	L_exception_exit
669#else
670L_bugged_denorm:
671	pushl	EX_INTERNAL|0x230
672	call	EXCEPTION
673	popl	%ebx
674	jmp	L_exception_exit
675#endif /* PECULIAR_486 */ 
676
677L_bugged_round24:
678	pushl	EX_INTERNAL|0x231
679	call	EXCEPTION
680	popl	%ebx
681	jmp	L_exception_exit
682
683L_bugged_round53:
684	pushl	EX_INTERNAL|0x232
685	call	EXCEPTION
686	popl	%ebx
687	jmp	L_exception_exit
688
689L_bugged_round64:
690	pushl	EX_INTERNAL|0x233
691	call	EXCEPTION
692	popl	%ebx
693	jmp	L_exception_exit
694
695L_norm_bugged:
696	pushl	EX_INTERNAL|0x234
697	call	EXCEPTION
698	popl	%ebx
699	jmp	L_exception_exit
700
701L_entry_bugged:
702	pushl	EX_INTERNAL|0x235
703	call	EXCEPTION
704	popl	%ebx
705L_exception_exit:
706	mov	$-1,%eax
707	jmp	fpu_reg_round_special_exit
708#endif /* PARANOID */

  1/* SPDX-License-Identifier: GPL-2.0 */
  2	.file "reg_round.S"
  3/*---------------------------------------------------------------------------+
  4 |  reg_round.S                                                              |
  5 |                                                                           |
  6 | Rounding/truncation/etc for FPU basic arithmetic functions.               |
  7 |                                                                           |
  8 | Copyright (C) 1993,1995,1997                                              |
  9 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 10 |                       Australia.  E-mail billm@suburbia.net               |
 11 |                                                                           |
 12 | This code has four possible entry points.                                 |
 13 | The following must be entered by a jmp instruction:                       |
 14 |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
 15 |                                                                           |
 16 | The FPU_round entry point is intended to be used by C code.               |
 17 | From C, call as:                                                          |
 18 |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
 19 |                                                                           |
 20 |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
 21 |    one was raised, or -1 on internal error.                               |
 22 |                                                                           |
 23 | For correct "up" and "down" rounding, the argument must have the correct  |
 24 | sign.                                                                     |
 25 |                                                                           |
 26 +---------------------------------------------------------------------------*/
 27
 28/*---------------------------------------------------------------------------+
 29 | Four entry points.                                                        |
 30 |                                                                           |
 31 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
 32 |  %eax:%ebx  64 bit significand                                            |
 33 |  %edx       32 bit extension of the significand                           |
 34 |  %edi       pointer to an FPU_REG for the result to be stored             |
 35 |  stack      calling function must have set up a C stack frame and         |
 36 |             pushed %esi, %edi, and %ebx                                   |
 37 |                                                                           |
 38 | Needed just for the fpu_reg_round_sqrt entry point:                       |
 39 |  %cx  A control word in the same format as the FPU control word.          |
 40 | Otherwise, PARAM4 must give such a value.                                 |
 41 |                                                                           |
 42 |                                                                           |
 43 | The significand and its extension are assumed to be exact in the          |
 44 | following sense:                                                          |
 45 |   If the significand by itself is the exact result then the significand   |
 46 |   extension (%edx) must contain 0, otherwise the significand extension    |
 47 |   must be non-zero.                                                       |
 48 |   If the significand extension is non-zero then the significand is        |
 49 |   smaller than the magnitude of the correct exact result by an amount     |
 50 |   greater than zero and less than one ls bit of the significand.          |
 51 |   The significand extension is only required to have three possible       |
 52 |   non-zero values:                                                        |
 53 |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
 54 |                                 bit smaller than the magnitude of the     |
 55 |                                 true exact result.                        |
 56 |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
 57 |                                 smaller than the magnitude of the true    |
 58 |                                 exact result.                             |
 59 |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
 60 |                                 bit smaller than the magnitude of the     |
 61 |                                 true exact result.                        |
 62 |                                                                           |
 63 +---------------------------------------------------------------------------*/
 64
 65/*---------------------------------------------------------------------------+
 66 |  The code in this module has become quite complex, but it should handle   |
 67 |  all of the FPU flags which are set at this stage of the basic arithmetic |
 68 |  computations.                                                            |
 69 |  There are a few rare cases where the results are not set identically to  |
 70 |  a real FPU. These require a bit more thought because at this stage the   |
 71 |  results of the code here appear to be more consistent...                 |
 72 |  This may be changed in a future version.                                 |
 73 +---------------------------------------------------------------------------*/
 74
 75
 76#include "fpu_emu.h"
 77#include "exception.h"
 78#include "control_w.h"
 79
 80/* Flags for FPU_bits_lost */
 81#define	LOST_DOWN	$1
 82#define	LOST_UP		$2
 83
 84/* Flags for FPU_denormal */
 85#define	DENORMAL	$1
 86#define	UNMASKED_UNDERFLOW $2
 87
 88
 89#ifndef NON_REENTRANT_FPU
 90/*	Make the code re-entrant by putting
 91	local storage on the stack: */
 92#define FPU_bits_lost	(%esp)
 93#define FPU_denormal	1(%esp)
 94
 95#else
 96/*	Not re-entrant, so we can gain speed by putting
 97	local storage in a static area: */
 98.data
 99	.align 4,0
100FPU_bits_lost:
101	.byte	0
102FPU_denormal:
103	.byte	0
104#endif /* NON_REENTRANT_FPU */
105
106
107.text
108.globl fpu_reg_round
109.globl fpu_Arith_exit
110
111/* Entry point when called from C */
112SYM_FUNC_START(FPU_round)
113	pushl	%ebp
114	movl	%esp,%ebp
115	pushl	%esi
116	pushl	%edi
117	pushl	%ebx
118
119	movl	PARAM1,%edi
120	movl	SIGH(%edi),%eax
121	movl	SIGL(%edi),%ebx
122	movl	PARAM2,%edx
123
124fpu_reg_round:			/* Normal entry point */
125	movl	PARAM4,%ecx
126
127#ifndef NON_REENTRANT_FPU
128	pushl	%ebx		/* adjust the stack pointer */
129#endif /* NON_REENTRANT_FPU */ 
130
131#ifdef PARANOID
132/* Cannot use this here yet */
133/*	orl	%eax,%eax */
134/*	jns	L_entry_bugged */
135#endif /* PARANOID */
136
137	cmpw	EXP_UNDER,EXP(%edi)
138	jle	L_Make_denorm			/* The number is a de-normal */
139
140	movb	$0,FPU_denormal			/* 0 -> not a de-normal */
141
142Denorm_done:
143	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */
144
145	movl	%ecx,%esi
146	andl	CW_PC,%ecx
147	cmpl	PR_64_BITS,%ecx
148	je	LRound_To_64
149
150	cmpl	PR_53_BITS,%ecx
151	je	LRound_To_53
152
153	cmpl	PR_24_BITS,%ecx
154	je	LRound_To_24
155
156#ifdef PECULIAR_486
157/* With the precision control bits set to 01 "(reserved)", a real 80486
158   behaves as if the precision control bits were set to 11 "64 bits" */
159	cmpl	PR_RESERVED_BITS,%ecx
160	je	LRound_To_64
161#ifdef PARANOID
162	jmp	L_bugged_denorm_486
163#endif /* PARANOID */ 
164#else
165#ifdef PARANOID
166	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
167#endif /* PARANOID */ 
168#endif /* PECULIAR_486 */
169
170
171/* Round etc to 24 bit precision */
172LRound_To_24:
173	movl	%esi,%ecx
174	andl	CW_RC,%ecx
175	cmpl	RC_RND,%ecx
176	je	LRound_nearest_24
177
178	cmpl	RC_CHOP,%ecx
179	je	LCheck_truncate_24
180
181	cmpl	RC_UP,%ecx		/* Towards +infinity */
182	je	LUp_24
183
184	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
185	je	LDown_24
186
187#ifdef PARANOID
188	jmp	L_bugged_round24
189#endif /* PARANOID */ 
190
191LUp_24:
192	cmpb	SIGN_POS,PARAM5
193	jne	LCheck_truncate_24	/* If negative then  up==truncate */
194
195	jmp	LCheck_24_round_up
196
197LDown_24:
198	cmpb	SIGN_POS,PARAM5
199	je	LCheck_truncate_24	/* If positive then  down==truncate */
200
201LCheck_24_round_up:
202	movl	%eax,%ecx
203	andl	$0x000000ff,%ecx
204	orl	%ebx,%ecx
205	orl	%edx,%ecx
206	jnz	LDo_24_round_up
207	jmp	L_Re_normalise
208
209LRound_nearest_24:
210	/* Do rounding of the 24th bit if needed (nearest or even) */
211	movl	%eax,%ecx
212	andl	$0x000000ff,%ecx
213	cmpl	$0x00000080,%ecx
214	jc	LCheck_truncate_24	/* less than half, no increment needed */
215
216	jne	LGreater_Half_24	/* greater than half, increment needed */
217
218	/* Possibly half, we need to check the ls bits */
219	orl	%ebx,%ebx
220	jnz	LGreater_Half_24	/* greater than half, increment needed */
221
222	orl	%edx,%edx
223	jnz	LGreater_Half_24	/* greater than half, increment needed */
224
225	/* Exactly half, increment only if 24th bit is 1 (round to even) */
226	testl	$0x00000100,%eax
227	jz	LDo_truncate_24
228
229LGreater_Half_24:			/* Rounding: increment at the 24th bit */
230LDo_24_round_up:
231	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
232	xorl	%ebx,%ebx
233	movb	LOST_UP,FPU_bits_lost
234	addl	$0x00000100,%eax
235	jmp	LCheck_Round_Overflow
236
237LCheck_truncate_24:
238	movl	%eax,%ecx
239	andl	$0x000000ff,%ecx
240	orl	%ebx,%ecx
241	orl	%edx,%ecx
242	jz	L_Re_normalise		/* No truncation needed */
243
244LDo_truncate_24:
245	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
246	xorl	%ebx,%ebx
247	movb	LOST_DOWN,FPU_bits_lost
248	jmp	L_Re_normalise
249
250
251/* Round etc to 53 bit precision */
252LRound_To_53:
253	movl	%esi,%ecx
254	andl	CW_RC,%ecx
255	cmpl	RC_RND,%ecx
256	je	LRound_nearest_53
257
258	cmpl	RC_CHOP,%ecx
259	je	LCheck_truncate_53
260
261	cmpl	RC_UP,%ecx		/* Towards +infinity */
262	je	LUp_53
263
264	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
265	je	LDown_53
266
267#ifdef PARANOID
268	jmp	L_bugged_round53
269#endif /* PARANOID */ 
270
271LUp_53:
272	cmpb	SIGN_POS,PARAM5
273	jne	LCheck_truncate_53	/* If negative then  up==truncate */
274
275	jmp	LCheck_53_round_up
276
277LDown_53:
278	cmpb	SIGN_POS,PARAM5
279	je	LCheck_truncate_53	/* If positive then  down==truncate */
280
281LCheck_53_round_up:
282	movl	%ebx,%ecx
283	andl	$0x000007ff,%ecx
284	orl	%edx,%ecx
285	jnz	LDo_53_round_up
286	jmp	L_Re_normalise
287
288LRound_nearest_53:
289	/* Do rounding of the 53rd bit if needed (nearest or even) */
290	movl	%ebx,%ecx
291	andl	$0x000007ff,%ecx
292	cmpl	$0x00000400,%ecx
293	jc	LCheck_truncate_53	/* less than half, no increment needed */
294
295	jnz	LGreater_Half_53	/* greater than half, increment needed */
296
297	/* Possibly half, we need to check the ls bits */
298	orl	%edx,%edx
299	jnz	LGreater_Half_53	/* greater than half, increment needed */
300
301	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
302	testl	$0x00000800,%ebx
303	jz	LTruncate_53
304
305LGreater_Half_53:			/* Rounding: increment at the 53rd bit */
306LDo_53_round_up:
307	movb	LOST_UP,FPU_bits_lost
308	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
309	addl	$0x00000800,%ebx
310	adcl	$0,%eax
311	jmp	LCheck_Round_Overflow
312
313LCheck_truncate_53:
314	movl	%ebx,%ecx
315	andl	$0x000007ff,%ecx
316	orl	%edx,%ecx
317	jz	L_Re_normalise
318
319LTruncate_53:
320	movb	LOST_DOWN,FPU_bits_lost
321	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
322	jmp	L_Re_normalise
323
324
325/* Round etc to 64 bit precision */
326LRound_To_64:
327	movl	%esi,%ecx
328	andl	CW_RC,%ecx
329	cmpl	RC_RND,%ecx
330	je	LRound_nearest_64
331
332	cmpl	RC_CHOP,%ecx
333	je	LCheck_truncate_64
334
335	cmpl	RC_UP,%ecx		/* Towards +infinity */
336	je	LUp_64
337
338	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
339	je	LDown_64
340
341#ifdef PARANOID
342	jmp	L_bugged_round64
343#endif /* PARANOID */ 
344
345LUp_64:
346	cmpb	SIGN_POS,PARAM5
347	jne	LCheck_truncate_64	/* If negative then  up==truncate */
348
349	orl	%edx,%edx
350	jnz	LDo_64_round_up
351	jmp	L_Re_normalise
352
353LDown_64:
354	cmpb	SIGN_POS,PARAM5
355	je	LCheck_truncate_64	/* If positive then  down==truncate */
356
357	orl	%edx,%edx
358	jnz	LDo_64_round_up
359	jmp	L_Re_normalise
360
361LRound_nearest_64:
362	cmpl	$0x80000000,%edx
363	jc	LCheck_truncate_64
364
365	jne	LDo_64_round_up
366
367	/* Now test for round-to-even */
368	testb	$1,%bl
369	jz	LCheck_truncate_64
370
371LDo_64_round_up:
372	movb	LOST_UP,FPU_bits_lost
373	addl	$1,%ebx
374	adcl	$0,%eax
375
376LCheck_Round_Overflow:
377	jnc	L_Re_normalise
378
379	/* Overflow, adjust the result (significand to 1.0) */
380	rcrl	$1,%eax
381	rcrl	$1,%ebx
382	incw	EXP(%edi)
383	jmp	L_Re_normalise
384
385LCheck_truncate_64:
386	orl	%edx,%edx
387	jz	L_Re_normalise
388
389LTruncate_64:
390	movb	LOST_DOWN,FPU_bits_lost
391
392L_Re_normalise:
393	testb	$0xff,FPU_denormal
394	jnz	Normalise_result
395
396L_Normalised:
397	movl	TAG_Valid,%edx
398
399L_deNormalised:
400	cmpb	LOST_UP,FPU_bits_lost
401	je	L_precision_lost_up
402
403	cmpb	LOST_DOWN,FPU_bits_lost
404	je	L_precision_lost_down
405
406L_no_precision_loss:
407	/* store the result */
408
409L_Store_significand:
410	movl	%eax,SIGH(%edi)
411	movl	%ebx,SIGL(%edi)
412
413	cmpw	EXP_OVER,EXP(%edi)
414	jge	L_overflow
415
416	movl	%edx,%eax
417
418	/* Convert the exponent to 80x87 form. */
419	addw	EXTENDED_Ebias,EXP(%edi)
420	andw	$0x7fff,EXP(%edi)
421
422fpu_reg_round_signed_special_exit:
423
424	cmpb	SIGN_POS,PARAM5
425	je	fpu_reg_round_special_exit
426
427	orw	$0x8000,EXP(%edi)	/* Negative sign for the result. */
428
429fpu_reg_round_special_exit:
430
431#ifndef NON_REENTRANT_FPU
432	popl	%ebx		/* adjust the stack pointer */
433#endif /* NON_REENTRANT_FPU */ 
434
435fpu_Arith_exit:
436	popl	%ebx
437	popl	%edi
438	popl	%esi
439	leave
440	RET
441
442
443/*
444 * Set the FPU status flags to represent precision loss due to
445 * round-up.
446 */
447L_precision_lost_up:
448	push	%edx
449	push	%eax
450	call	set_precision_flag_up
451	popl	%eax
452	popl	%edx
453	jmp	L_no_precision_loss
454
455/*
456 * Set the FPU status flags to represent precision loss due to
457 * truncation.
458 */
459L_precision_lost_down:
460	push	%edx
461	push	%eax
462	call	set_precision_flag_down
463	popl	%eax
464	popl	%edx
465	jmp	L_no_precision_loss
466
467
468/*
469 * The number is a denormal (which might get rounded up to a normal)
470 * Shift the number right the required number of bits, which will
471 * have to be undone later...
472 */
473L_Make_denorm:
474	/* The action to be taken depends upon whether the underflow
475	   exception is masked */
476	testb	CW_Underflow,%cl		/* Underflow mask. */
477	jz	Unmasked_underflow		/* Do not make a denormal. */
478
479	movb	DENORMAL,FPU_denormal
480
481	pushl	%ecx		/* Save */
482	movw	EXP_UNDER+1,%cx
483	subw	EXP(%edi),%cx
484
485	cmpw	$64,%cx	/* shrd only works for 0..31 bits */
486	jnc	Denorm_shift_more_than_63
487
488	cmpw	$32,%cx	/* shrd only works for 0..31 bits */
489	jnc	Denorm_shift_more_than_32
490
491/*
492 * We got here without jumps by assuming that the most common requirement
493 *   is for a small de-normalising shift.
494 * Shift by [1..31] bits
495 */
496	addw	%cx,EXP(%edi)
497	orl	%edx,%edx	/* extension */
498	setne	%ch		/* Save whether %edx is non-zero */
499	xorl	%edx,%edx
500	shrd	%cl,%ebx,%edx
501	shrd	%cl,%eax,%ebx
502	shr	%cl,%eax
503	orb	%ch,%dl
504	popl	%ecx
505	jmp	Denorm_done
506
507/* Shift by [32..63] bits */
508Denorm_shift_more_than_32:
509	addw	%cx,EXP(%edi)
510	subb	$32,%cl
511	orl	%edx,%edx
512	setne	%ch
513	orb	%ch,%bl
514	xorl	%edx,%edx
515	shrd	%cl,%ebx,%edx
516	shrd	%cl,%eax,%ebx
517	shr	%cl,%eax
518	orl	%edx,%edx		/* test these 32 bits */
519	setne	%cl
520	orb	%ch,%bl
521	orb	%cl,%bl
522	movl	%ebx,%edx
523	movl	%eax,%ebx
524	xorl	%eax,%eax
525	popl	%ecx
526	jmp	Denorm_done
527
528/* Shift by [64..) bits */
529Denorm_shift_more_than_63:
530	cmpw	$64,%cx
531	jne	Denorm_shift_more_than_64
532
533/* Exactly 64 bit shift */
534	addw	%cx,EXP(%edi)
535	xorl	%ecx,%ecx
536	orl	%edx,%edx
537	setne	%cl
538	orl	%ebx,%ebx
539	setne	%ch
540	orb	%ch,%cl
541	orb	%cl,%al
542	movl	%eax,%edx
543	xorl	%eax,%eax
544	xorl	%ebx,%ebx
545	popl	%ecx
546	jmp	Denorm_done
547
548Denorm_shift_more_than_64:
549	movw	EXP_UNDER+1,EXP(%edi)
550/* This is easy, %eax must be non-zero, so.. */
551	movl	$1,%edx
552	xorl	%eax,%eax
553	xorl	%ebx,%ebx
554	popl	%ecx
555	jmp	Denorm_done
556
557
558Unmasked_underflow:
559	movb	UNMASKED_UNDERFLOW,FPU_denormal
560	jmp	Denorm_done
561
562
563/* Undo the de-normalisation. */
564Normalise_result:
565	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
566	je	Signal_underflow
567
568/* The number must be a denormal if we got here. */
569#ifdef PARANOID
570	/* But check it... just in case. */
571	cmpw	EXP_UNDER+1,EXP(%edi)
572	jne	L_norm_bugged
573#endif /* PARANOID */
574
575#ifdef PECULIAR_486
576	/*
577	 * This implements a special feature of 80486 behaviour.
578	 * Underflow will be signaled even if the number is
579	 * not a denormal after rounding.
580	 * This difference occurs only for masked underflow, and not
581	 * in the unmasked case.
582	 * Actual 80486 behaviour differs from this in some circumstances.
583	 */
584	orl	%eax,%eax		/* ms bits */
585	js	LPseudoDenormal		/* Will be masked underflow */
586#else
587	orl	%eax,%eax		/* ms bits */
588	js	L_Normalised		/* No longer a denormal */
589#endif /* PECULIAR_486 */ 
590
591	jnz	LDenormal_adj_exponent
592
593	orl	%ebx,%ebx
594	jz	L_underflow_to_zero	/* The contents are zero */
595
596LDenormal_adj_exponent:
597	decw	EXP(%edi)
598
599LPseudoDenormal:
600	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
601	movl	TAG_Special,%edx
602	jz	L_deNormalised
603
604	/* There must be a masked underflow */
605	push	%eax
606	pushl	EX_Underflow
607	call	EXCEPTION
608	popl	%eax
609	popl	%eax
610	movl	TAG_Special,%edx
611	jmp	L_deNormalised
612
613
614/*
615 * The operations resulted in a number too small to represent.
616 * Masked response.
617 */
618L_underflow_to_zero:
619	push	%eax
620	call	set_precision_flag_down
621	popl	%eax
622
623	push	%eax
624	pushl	EX_Underflow
625	call	EXCEPTION
626	popl	%eax
627	popl	%eax
628
629/* Reduce the exponent to EXP_UNDER */
630	movw	EXP_UNDER,EXP(%edi)
631	movl	TAG_Zero,%edx
632	jmp	L_Store_significand
633
634
635/* The operations resulted in a number too large to represent. */
636L_overflow:
637	addw	EXTENDED_Ebias,EXP(%edi)	/* Set for unmasked response. */
638	push	%edi
639	call	arith_overflow
640	pop	%edi
641	jmp	fpu_reg_round_signed_special_exit
642
643
644Signal_underflow:
645	/* The number may have been changed to a non-denormal */
646	/* by the rounding operations. */
647	cmpw	EXP_UNDER,EXP(%edi)
648	jle	Do_unmasked_underflow
649
650	jmp	L_Normalised
651
652Do_unmasked_underflow:
653	/* Increase the exponent by the magic number */
654	addw	$(3*(1<<13)),EXP(%edi)
655	push	%eax
656	pushl	EX_Underflow
657	call	EXCEPTION
658	popl	%eax
659	popl	%eax
660	jmp	L_Normalised
661
662
663#ifdef PARANOID
664#ifdef PECULIAR_486
665L_bugged_denorm_486:
666	pushl	EX_INTERNAL|0x236
667	call	EXCEPTION
668	popl	%ebx
669	jmp	L_exception_exit
670#else
671L_bugged_denorm:
672	pushl	EX_INTERNAL|0x230
673	call	EXCEPTION
674	popl	%ebx
675	jmp	L_exception_exit
676#endif /* PECULIAR_486 */ 
677
678L_bugged_round24:
679	pushl	EX_INTERNAL|0x231
680	call	EXCEPTION
681	popl	%ebx
682	jmp	L_exception_exit
683
684L_bugged_round53:
685	pushl	EX_INTERNAL|0x232
686	call	EXCEPTION
687	popl	%ebx
688	jmp	L_exception_exit
689
690L_bugged_round64:
691	pushl	EX_INTERNAL|0x233
692	call	EXCEPTION
693	popl	%ebx
694	jmp	L_exception_exit
695
696L_norm_bugged:
697	pushl	EX_INTERNAL|0x234
698	call	EXCEPTION
699	popl	%ebx
700	jmp	L_exception_exit
701
702L_entry_bugged:
703	pushl	EX_INTERNAL|0x235
704	call	EXCEPTION
705	popl	%ebx
706L_exception_exit:
707	mov	$-1,%eax
708	jmp	fpu_reg_round_special_exit
709#endif /* PARANOID */ 
710
711SYM_FUNC_END(FPU_round)