Loading...
1 .file "reg_round.S"
2/*---------------------------------------------------------------------------+
3 | reg_round.S |
4 | |
5 | Rounding/truncation/etc for FPU basic arithmetic functions. |
6 | |
7 | Copyright (C) 1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | This code has four possible entry points. |
12 | The following must be entered by a jmp instruction: |
13 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
14 | |
15 | The FPU_round entry point is intended to be used by C code. |
16 | From C, call as: |
17 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
18 | |
19 | Return value is the tag of the answer, or-ed with FPU_Exception if |
20 | one was raised, or -1 on internal error. |
21 | |
22 | For correct "up" and "down" rounding, the argument must have the correct |
23 | sign. |
24 | |
25 +---------------------------------------------------------------------------*/
26
27/*---------------------------------------------------------------------------+
28 | Four entry points. |
29 | |
30 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
31 | %eax:%ebx 64 bit significand |
32 | %edx 32 bit extension of the significand |
33 | %edi pointer to an FPU_REG for the result to be stored |
34 | stack calling function must have set up a C stack frame and |
35 | pushed %esi, %edi, and %ebx |
36 | |
37 | Needed just for the fpu_reg_round_sqrt entry point: |
38 | %cx A control word in the same format as the FPU control word. |
39 | Otherwise, PARAM4 must give such a value. |
40 | |
41 | |
42 | The significand and its extension are assumed to be exact in the |
43 | following sense: |
44 | If the significand by itself is the exact result then the significand |
45 | extension (%edx) must contain 0, otherwise the significand extension |
46 | must be non-zero. |
47 | If the significand extension is non-zero then the significand is |
48 | smaller than the magnitude of the correct exact result by an amount |
49 | greater than zero and less than one ls bit of the significand. |
50 | The significand extension is only required to have three possible |
51 | non-zero values: |
52 | less than 0x80000000 <=> the significand is less than 1/2 an ls |
53 | bit smaller than the magnitude of the |
54 | true exact result. |
55 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
56 | smaller than the magnitude of the true |
57 | exact result. |
58 | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
59 | bit smaller than the magnitude of the |
60 | true exact result. |
61 | |
62 +---------------------------------------------------------------------------*/
63
64/*---------------------------------------------------------------------------+
65 | The code in this module has become quite complex, but it should handle |
66 | all of the FPU flags which are set at this stage of the basic arithmetic |
67 | computations. |
68 | There are a few rare cases where the results are not set identically to |
69 | a real FPU. These require a bit more thought because at this stage the |
70 | results of the code here appear to be more consistent... |
71 | This may be changed in a future version. |
72 +---------------------------------------------------------------------------*/
73
74
75#include "fpu_emu.h"
76#include "exception.h"
77#include "control_w.h"
78
79/* Flags for FPU_bits_lost */
80#define LOST_DOWN $1
81#define LOST_UP $2
82
83/* Flags for FPU_denormal */
84#define DENORMAL $1
85#define UNMASKED_UNDERFLOW $2
86
87
88#ifndef NON_REENTRANT_FPU
89/* Make the code re-entrant by putting
90 local storage on the stack: */
91#define FPU_bits_lost (%esp)
92#define FPU_denormal 1(%esp)
93
94#else
95/* Not re-entrant, so we can gain speed by putting
96 local storage in a static area: */
97.data
98 .align 4,0
99FPU_bits_lost:
100 .byte 0
101FPU_denormal:
102 .byte 0
103#endif /* NON_REENTRANT_FPU */
104
105
106.text
107.globl fpu_reg_round
108.globl fpu_Arith_exit
109
110/* Entry point when called from C */
111ENTRY(FPU_round)
112 pushl %ebp
113 movl %esp,%ebp
114 pushl %esi
115 pushl %edi
116 pushl %ebx
117
118 movl PARAM1,%edi
119 movl SIGH(%edi),%eax
120 movl SIGL(%edi),%ebx
121 movl PARAM2,%edx
122
123fpu_reg_round: /* Normal entry point */
124 movl PARAM4,%ecx
125
126#ifndef NON_REENTRANT_FPU
127 pushl %ebx /* adjust the stack pointer */
128#endif /* NON_REENTRANT_FPU */
129
130#ifdef PARANOID
131/* Cannot use this here yet */
132/* orl %eax,%eax */
133/* jns L_entry_bugged */
134#endif /* PARANOID */
135
136 cmpw EXP_UNDER,EXP(%edi)
137 jle L_Make_denorm /* The number is a de-normal */
138
139 movb $0,FPU_denormal /* 0 -> not a de-normal */
140
141Denorm_done:
142 movb $0,FPU_bits_lost /* No bits yet lost in rounding */
143
144 movl %ecx,%esi
145 andl CW_PC,%ecx
146 cmpl PR_64_BITS,%ecx
147 je LRound_To_64
148
149 cmpl PR_53_BITS,%ecx
150 je LRound_To_53
151
152 cmpl PR_24_BITS,%ecx
153 je LRound_To_24
154
155#ifdef PECULIAR_486
156/* With the precision control bits set to 01 "(reserved)", a real 80486
157 behaves as if the precision control bits were set to 11 "64 bits" */
158 cmpl PR_RESERVED_BITS,%ecx
159 je LRound_To_64
160#ifdef PARANOID
161 jmp L_bugged_denorm_486
162#endif /* PARANOID */
163#else
164#ifdef PARANOID
165 jmp L_bugged_denorm /* There is no bug, just a bad control word */
166#endif /* PARANOID */
167#endif /* PECULIAR_486 */
168
169
170/* Round etc to 24 bit precision */
171LRound_To_24:
172 movl %esi,%ecx
173 andl CW_RC,%ecx
174 cmpl RC_RND,%ecx
175 je LRound_nearest_24
176
177 cmpl RC_CHOP,%ecx
178 je LCheck_truncate_24
179
180 cmpl RC_UP,%ecx /* Towards +infinity */
181 je LUp_24
182
183 cmpl RC_DOWN,%ecx /* Towards -infinity */
184 je LDown_24
185
186#ifdef PARANOID
187 jmp L_bugged_round24
188#endif /* PARANOID */
189
190LUp_24:
191 cmpb SIGN_POS,PARAM5
192 jne LCheck_truncate_24 /* If negative then up==truncate */
193
194 jmp LCheck_24_round_up
195
196LDown_24:
197 cmpb SIGN_POS,PARAM5
198 je LCheck_truncate_24 /* If positive then down==truncate */
199
200LCheck_24_round_up:
201 movl %eax,%ecx
202 andl $0x000000ff,%ecx
203 orl %ebx,%ecx
204 orl %edx,%ecx
205 jnz LDo_24_round_up
206 jmp L_Re_normalise
207
208LRound_nearest_24:
209 /* Do rounding of the 24th bit if needed (nearest or even) */
210 movl %eax,%ecx
211 andl $0x000000ff,%ecx
212 cmpl $0x00000080,%ecx
213 jc LCheck_truncate_24 /* less than half, no increment needed */
214
215 jne LGreater_Half_24 /* greater than half, increment needed */
216
217 /* Possibly half, we need to check the ls bits */
218 orl %ebx,%ebx
219 jnz LGreater_Half_24 /* greater than half, increment needed */
220
221 orl %edx,%edx
222 jnz LGreater_Half_24 /* greater than half, increment needed */
223
224 /* Exactly half, increment only if 24th bit is 1 (round to even) */
225 testl $0x00000100,%eax
226 jz LDo_truncate_24
227
228LGreater_Half_24: /* Rounding: increment at the 24th bit */
229LDo_24_round_up:
230 andl $0xffffff00,%eax /* Truncate to 24 bits */
231 xorl %ebx,%ebx
232 movb LOST_UP,FPU_bits_lost
233 addl $0x00000100,%eax
234 jmp LCheck_Round_Overflow
235
236LCheck_truncate_24:
237 movl %eax,%ecx
238 andl $0x000000ff,%ecx
239 orl %ebx,%ecx
240 orl %edx,%ecx
241 jz L_Re_normalise /* No truncation needed */
242
243LDo_truncate_24:
244 andl $0xffffff00,%eax /* Truncate to 24 bits */
245 xorl %ebx,%ebx
246 movb LOST_DOWN,FPU_bits_lost
247 jmp L_Re_normalise
248
249
250/* Round etc to 53 bit precision */
251LRound_To_53:
252 movl %esi,%ecx
253 andl CW_RC,%ecx
254 cmpl RC_RND,%ecx
255 je LRound_nearest_53
256
257 cmpl RC_CHOP,%ecx
258 je LCheck_truncate_53
259
260 cmpl RC_UP,%ecx /* Towards +infinity */
261 je LUp_53
262
263 cmpl RC_DOWN,%ecx /* Towards -infinity */
264 je LDown_53
265
266#ifdef PARANOID
267 jmp L_bugged_round53
268#endif /* PARANOID */
269
270LUp_53:
271 cmpb SIGN_POS,PARAM5
272 jne LCheck_truncate_53 /* If negative then up==truncate */
273
274 jmp LCheck_53_round_up
275
276LDown_53:
277 cmpb SIGN_POS,PARAM5
278 je LCheck_truncate_53 /* If positive then down==truncate */
279
280LCheck_53_round_up:
281 movl %ebx,%ecx
282 andl $0x000007ff,%ecx
283 orl %edx,%ecx
284 jnz LDo_53_round_up
285 jmp L_Re_normalise
286
287LRound_nearest_53:
288 /* Do rounding of the 53rd bit if needed (nearest or even) */
289 movl %ebx,%ecx
290 andl $0x000007ff,%ecx
291 cmpl $0x00000400,%ecx
292 jc LCheck_truncate_53 /* less than half, no increment needed */
293
294 jnz LGreater_Half_53 /* greater than half, increment needed */
295
296 /* Possibly half, we need to check the ls bits */
297 orl %edx,%edx
298 jnz LGreater_Half_53 /* greater than half, increment needed */
299
300 /* Exactly half, increment only if 53rd bit is 1 (round to even) */
301 testl $0x00000800,%ebx
302 jz LTruncate_53
303
304LGreater_Half_53: /* Rounding: increment at the 53rd bit */
305LDo_53_round_up:
306 movb LOST_UP,FPU_bits_lost
307 andl $0xfffff800,%ebx /* Truncate to 53 bits */
308 addl $0x00000800,%ebx
309 adcl $0,%eax
310 jmp LCheck_Round_Overflow
311
312LCheck_truncate_53:
313 movl %ebx,%ecx
314 andl $0x000007ff,%ecx
315 orl %edx,%ecx
316 jz L_Re_normalise
317
318LTruncate_53:
319 movb LOST_DOWN,FPU_bits_lost
320 andl $0xfffff800,%ebx /* Truncate to 53 bits */
321 jmp L_Re_normalise
322
323
324/* Round etc to 64 bit precision */
325LRound_To_64:
326 movl %esi,%ecx
327 andl CW_RC,%ecx
328 cmpl RC_RND,%ecx
329 je LRound_nearest_64
330
331 cmpl RC_CHOP,%ecx
332 je LCheck_truncate_64
333
334 cmpl RC_UP,%ecx /* Towards +infinity */
335 je LUp_64
336
337 cmpl RC_DOWN,%ecx /* Towards -infinity */
338 je LDown_64
339
340#ifdef PARANOID
341 jmp L_bugged_round64
342#endif /* PARANOID */
343
344LUp_64:
345 cmpb SIGN_POS,PARAM5
346 jne LCheck_truncate_64 /* If negative then up==truncate */
347
348 orl %edx,%edx
349 jnz LDo_64_round_up
350 jmp L_Re_normalise
351
352LDown_64:
353 cmpb SIGN_POS,PARAM5
354 je LCheck_truncate_64 /* If positive then down==truncate */
355
356 orl %edx,%edx
357 jnz LDo_64_round_up
358 jmp L_Re_normalise
359
360LRound_nearest_64:
361 cmpl $0x80000000,%edx
362 jc LCheck_truncate_64
363
364 jne LDo_64_round_up
365
366 /* Now test for round-to-even */
367 testb $1,%bl
368 jz LCheck_truncate_64
369
370LDo_64_round_up:
371 movb LOST_UP,FPU_bits_lost
372 addl $1,%ebx
373 adcl $0,%eax
374
375LCheck_Round_Overflow:
376 jnc L_Re_normalise
377
378 /* Overflow, adjust the result (significand to 1.0) */
379 rcrl $1,%eax
380 rcrl $1,%ebx
381 incw EXP(%edi)
382 jmp L_Re_normalise
383
384LCheck_truncate_64:
385 orl %edx,%edx
386 jz L_Re_normalise
387
388LTruncate_64:
389 movb LOST_DOWN,FPU_bits_lost
390
391L_Re_normalise:
392 testb $0xff,FPU_denormal
393 jnz Normalise_result
394
395L_Normalised:
396 movl TAG_Valid,%edx
397
398L_deNormalised:
399 cmpb LOST_UP,FPU_bits_lost
400 je L_precision_lost_up
401
402 cmpb LOST_DOWN,FPU_bits_lost
403 je L_precision_lost_down
404
405L_no_precision_loss:
406 /* store the result */
407
408L_Store_significand:
409 movl %eax,SIGH(%edi)
410 movl %ebx,SIGL(%edi)
411
412 cmpw EXP_OVER,EXP(%edi)
413 jge L_overflow
414
415 movl %edx,%eax
416
417 /* Convert the exponent to 80x87 form. */
418 addw EXTENDED_Ebias,EXP(%edi)
419 andw $0x7fff,EXP(%edi)
420
421fpu_reg_round_signed_special_exit:
422
423 cmpb SIGN_POS,PARAM5
424 je fpu_reg_round_special_exit
425
426 orw $0x8000,EXP(%edi) /* Negative sign for the result. */
427
428fpu_reg_round_special_exit:
429
430#ifndef NON_REENTRANT_FPU
431 popl %ebx /* adjust the stack pointer */
432#endif /* NON_REENTRANT_FPU */
433
434fpu_Arith_exit:
435 popl %ebx
436 popl %edi
437 popl %esi
438 leave
439 ret
440
441
442/*
443 * Set the FPU status flags to represent precision loss due to
444 * round-up.
445 */
446L_precision_lost_up:
447 push %edx
448 push %eax
449 call set_precision_flag_up
450 popl %eax
451 popl %edx
452 jmp L_no_precision_loss
453
454/*
455 * Set the FPU status flags to represent precision loss due to
456 * truncation.
457 */
458L_precision_lost_down:
459 push %edx
460 push %eax
461 call set_precision_flag_down
462 popl %eax
463 popl %edx
464 jmp L_no_precision_loss
465
466
467/*
468 * The number is a denormal (which might get rounded up to a normal)
469 * Shift the number right the required number of bits, which will
470 * have to be undone later...
471 */
472L_Make_denorm:
473 /* The action to be taken depends upon whether the underflow
474 exception is masked */
475 testb CW_Underflow,%cl /* Underflow mask. */
476 jz Unmasked_underflow /* Do not make a denormal. */
477
478 movb DENORMAL,FPU_denormal
479
480 pushl %ecx /* Save */
481 movw EXP_UNDER+1,%cx
482 subw EXP(%edi),%cx
483
484 cmpw $64,%cx /* shrd only works for 0..31 bits */
485 jnc Denorm_shift_more_than_63
486
487 cmpw $32,%cx /* shrd only works for 0..31 bits */
488 jnc Denorm_shift_more_than_32
489
490/*
491 * We got here without jumps by assuming that the most common requirement
492 * is for a small de-normalising shift.
493 * Shift by [1..31] bits
494 */
495 addw %cx,EXP(%edi)
496 orl %edx,%edx /* extension */
497 setne %ch /* Save whether %edx is non-zero */
498 xorl %edx,%edx
499 shrd %cl,%ebx,%edx
500 shrd %cl,%eax,%ebx
501 shr %cl,%eax
502 orb %ch,%dl
503 popl %ecx
504 jmp Denorm_done
505
506/* Shift by [32..63] bits */
507Denorm_shift_more_than_32:
508 addw %cx,EXP(%edi)
509 subb $32,%cl
510 orl %edx,%edx
511 setne %ch
512 orb %ch,%bl
513 xorl %edx,%edx
514 shrd %cl,%ebx,%edx
515 shrd %cl,%eax,%ebx
516 shr %cl,%eax
517 orl %edx,%edx /* test these 32 bits */
518 setne %cl
519 orb %ch,%bl
520 orb %cl,%bl
521 movl %ebx,%edx
522 movl %eax,%ebx
523 xorl %eax,%eax
524 popl %ecx
525 jmp Denorm_done
526
527/* Shift by [64..) bits */
528Denorm_shift_more_than_63:
529 cmpw $64,%cx
530 jne Denorm_shift_more_than_64
531
532/* Exactly 64 bit shift */
533 addw %cx,EXP(%edi)
534 xorl %ecx,%ecx
535 orl %edx,%edx
536 setne %cl
537 orl %ebx,%ebx
538 setne %ch
539 orb %ch,%cl
540 orb %cl,%al
541 movl %eax,%edx
542 xorl %eax,%eax
543 xorl %ebx,%ebx
544 popl %ecx
545 jmp Denorm_done
546
547Denorm_shift_more_than_64:
548 movw EXP_UNDER+1,EXP(%edi)
549/* This is easy, %eax must be non-zero, so.. */
550 movl $1,%edx
551 xorl %eax,%eax
552 xorl %ebx,%ebx
553 popl %ecx
554 jmp Denorm_done
555
556
557Unmasked_underflow:
558 movb UNMASKED_UNDERFLOW,FPU_denormal
559 jmp Denorm_done
560
561
562/* Undo the de-normalisation. */
563Normalise_result:
564 cmpb UNMASKED_UNDERFLOW,FPU_denormal
565 je Signal_underflow
566
567/* The number must be a denormal if we got here. */
568#ifdef PARANOID
569 /* But check it... just in case. */
570 cmpw EXP_UNDER+1,EXP(%edi)
571 jne L_norm_bugged
572#endif /* PARANOID */
573
574#ifdef PECULIAR_486
575 /*
576 * This implements a special feature of 80486 behaviour.
577 * Underflow will be signalled even if the number is
578 * not a denormal after rounding.
579 * This difference occurs only for masked underflow, and not
580 * in the unmasked case.
581 * Actual 80486 behaviour differs from this in some circumstances.
582 */
583 orl %eax,%eax /* ms bits */
584 js LPseudoDenormal /* Will be masked underflow */
585#else
586 orl %eax,%eax /* ms bits */
587 js L_Normalised /* No longer a denormal */
588#endif /* PECULIAR_486 */
589
590 jnz LDenormal_adj_exponent
591
592 orl %ebx,%ebx
593 jz L_underflow_to_zero /* The contents are zero */
594
595LDenormal_adj_exponent:
596 decw EXP(%edi)
597
598LPseudoDenormal:
599 testb $0xff,FPU_bits_lost /* bits lost == underflow */
600 movl TAG_Special,%edx
601 jz L_deNormalised
602
603 /* There must be a masked underflow */
604 push %eax
605 pushl EX_Underflow
606 call EXCEPTION
607 popl %eax
608 popl %eax
609 movl TAG_Special,%edx
610 jmp L_deNormalised
611
612
613/*
614 * The operations resulted in a number too small to represent.
615 * Masked response.
616 */
617L_underflow_to_zero:
618 push %eax
619 call set_precision_flag_down
620 popl %eax
621
622 push %eax
623 pushl EX_Underflow
624 call EXCEPTION
625 popl %eax
626 popl %eax
627
628/* Reduce the exponent to EXP_UNDER */
629 movw EXP_UNDER,EXP(%edi)
630 movl TAG_Zero,%edx
631 jmp L_Store_significand
632
633
634/* The operations resulted in a number too large to represent. */
635L_overflow:
636 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
637 push %edi
638 call arith_overflow
639 pop %edi
640 jmp fpu_reg_round_signed_special_exit
641
642
643Signal_underflow:
644 /* The number may have been changed to a non-denormal */
645 /* by the rounding operations. */
646 cmpw EXP_UNDER,EXP(%edi)
647 jle Do_unmasked_underflow
648
649 jmp L_Normalised
650
651Do_unmasked_underflow:
652 /* Increase the exponent by the magic number */
653 addw $(3*(1<<13)),EXP(%edi)
654 push %eax
655 pushl EX_Underflow
656 call EXCEPTION
657 popl %eax
658 popl %eax
659 jmp L_Normalised
660
661
662#ifdef PARANOID
663#ifdef PECULIAR_486
664L_bugged_denorm_486:
665 pushl EX_INTERNAL|0x236
666 call EXCEPTION
667 popl %ebx
668 jmp L_exception_exit
669#else
670L_bugged_denorm:
671 pushl EX_INTERNAL|0x230
672 call EXCEPTION
673 popl %ebx
674 jmp L_exception_exit
675#endif /* PECULIAR_486 */
676
677L_bugged_round24:
678 pushl EX_INTERNAL|0x231
679 call EXCEPTION
680 popl %ebx
681 jmp L_exception_exit
682
683L_bugged_round53:
684 pushl EX_INTERNAL|0x232
685 call EXCEPTION
686 popl %ebx
687 jmp L_exception_exit
688
689L_bugged_round64:
690 pushl EX_INTERNAL|0x233
691 call EXCEPTION
692 popl %ebx
693 jmp L_exception_exit
694
695L_norm_bugged:
696 pushl EX_INTERNAL|0x234
697 call EXCEPTION
698 popl %ebx
699 jmp L_exception_exit
700
701L_entry_bugged:
702 pushl EX_INTERNAL|0x235
703 call EXCEPTION
704 popl %ebx
705L_exception_exit:
706 mov $-1,%eax
707 jmp fpu_reg_round_special_exit
708#endif /* PARANOID */
1/* SPDX-License-Identifier: GPL-2.0 */
2 .file "reg_round.S"
3/*---------------------------------------------------------------------------+
4 | reg_round.S |
5 | |
6 | Rounding/truncation/etc for FPU basic arithmetic functions. |
7 | |
8 | Copyright (C) 1993,1995,1997 |
9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
10 | Australia. E-mail billm@suburbia.net |
11 | |
12 | This code has four possible entry points. |
13 | The following must be entered by a jmp instruction: |
14 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
15 | |
16 | The FPU_round entry point is intended to be used by C code. |
17 | From C, call as: |
18 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
19 | |
20 | Return value is the tag of the answer, or-ed with FPU_Exception if |
21 | one was raised, or -1 on internal error. |
22 | |
23 | For correct "up" and "down" rounding, the argument must have the correct |
24 | sign. |
25 | |
26 +---------------------------------------------------------------------------*/
27
28/*---------------------------------------------------------------------------+
29 | Four entry points. |
30 | |
31 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
32 | %eax:%ebx 64 bit significand |
33 | %edx 32 bit extension of the significand |
34 | %edi pointer to an FPU_REG for the result to be stored |
35 | stack calling function must have set up a C stack frame and |
36 | pushed %esi, %edi, and %ebx |
37 | |
38 | Needed just for the fpu_reg_round_sqrt entry point: |
39 | %cx A control word in the same format as the FPU control word. |
40 | Otherwise, PARAM4 must give such a value. |
41 | |
42 | |
43 | The significand and its extension are assumed to be exact in the |
44 | following sense: |
45 | If the significand by itself is the exact result then the significand |
46 | extension (%edx) must contain 0, otherwise the significand extension |
47 | must be non-zero. |
48 | If the significand extension is non-zero then the significand is |
49 | smaller than the magnitude of the correct exact result by an amount |
50 | greater than zero and less than one ls bit of the significand. |
51 | The significand extension is only required to have three possible |
52 | non-zero values: |
53 | less than 0x80000000 <=> the significand is less than 1/2 an ls |
54 | bit smaller than the magnitude of the |
55 | true exact result. |
56 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
57 | smaller than the magnitude of the true |
58 | exact result. |
59 | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
60 | bit smaller than the magnitude of the |
61 | true exact result. |
62 | |
63 +---------------------------------------------------------------------------*/
64
65/*---------------------------------------------------------------------------+
66 | The code in this module has become quite complex, but it should handle |
67 | all of the FPU flags which are set at this stage of the basic arithmetic |
68 | computations. |
69 | There are a few rare cases where the results are not set identically to |
70 | a real FPU. These require a bit more thought because at this stage the |
71 | results of the code here appear to be more consistent... |
72 | This may be changed in a future version. |
73 +---------------------------------------------------------------------------*/
74
75
76#include "fpu_emu.h"
77#include "exception.h"
78#include "control_w.h"
79
80/* Flags for FPU_bits_lost */
81#define LOST_DOWN $1
82#define LOST_UP $2
83
84/* Flags for FPU_denormal */
85#define DENORMAL $1
86#define UNMASKED_UNDERFLOW $2
87
88
89#ifndef NON_REENTRANT_FPU
90/* Make the code re-entrant by putting
91 local storage on the stack: */
92#define FPU_bits_lost (%esp)
93#define FPU_denormal 1(%esp)
94
95#else
96/* Not re-entrant, so we can gain speed by putting
97 local storage in a static area: */
98.data
99 .align 4,0
100FPU_bits_lost:
101 .byte 0
102FPU_denormal:
103 .byte 0
104#endif /* NON_REENTRANT_FPU */
105
106
107.text
108.globl fpu_reg_round
109.globl fpu_Arith_exit
110
111/* Entry point when called from C */
112SYM_FUNC_START(FPU_round)
113 pushl %ebp
114 movl %esp,%ebp
115 pushl %esi
116 pushl %edi
117 pushl %ebx
118
119 movl PARAM1,%edi
120 movl SIGH(%edi),%eax
121 movl SIGL(%edi),%ebx
122 movl PARAM2,%edx
123
124fpu_reg_round: /* Normal entry point */
125 movl PARAM4,%ecx
126
127#ifndef NON_REENTRANT_FPU
128 pushl %ebx /* adjust the stack pointer */
129#endif /* NON_REENTRANT_FPU */
130
131#ifdef PARANOID
132/* Cannot use this here yet */
133/* orl %eax,%eax */
134/* jns L_entry_bugged */
135#endif /* PARANOID */
136
137 cmpw EXP_UNDER,EXP(%edi)
138 jle L_Make_denorm /* The number is a de-normal */
139
140 movb $0,FPU_denormal /* 0 -> not a de-normal */
141
142Denorm_done:
143 movb $0,FPU_bits_lost /* No bits yet lost in rounding */
144
145 movl %ecx,%esi
146 andl CW_PC,%ecx
147 cmpl PR_64_BITS,%ecx
148 je LRound_To_64
149
150 cmpl PR_53_BITS,%ecx
151 je LRound_To_53
152
153 cmpl PR_24_BITS,%ecx
154 je LRound_To_24
155
156#ifdef PECULIAR_486
157/* With the precision control bits set to 01 "(reserved)", a real 80486
158 behaves as if the precision control bits were set to 11 "64 bits" */
159 cmpl PR_RESERVED_BITS,%ecx
160 je LRound_To_64
161#ifdef PARANOID
162 jmp L_bugged_denorm_486
163#endif /* PARANOID */
164#else
165#ifdef PARANOID
166 jmp L_bugged_denorm /* There is no bug, just a bad control word */
167#endif /* PARANOID */
168#endif /* PECULIAR_486 */
169
170
171/* Round etc to 24 bit precision */
172LRound_To_24:
173 movl %esi,%ecx
174 andl CW_RC,%ecx
175 cmpl RC_RND,%ecx
176 je LRound_nearest_24
177
178 cmpl RC_CHOP,%ecx
179 je LCheck_truncate_24
180
181 cmpl RC_UP,%ecx /* Towards +infinity */
182 je LUp_24
183
184 cmpl RC_DOWN,%ecx /* Towards -infinity */
185 je LDown_24
186
187#ifdef PARANOID
188 jmp L_bugged_round24
189#endif /* PARANOID */
190
191LUp_24:
192 cmpb SIGN_POS,PARAM5
193 jne LCheck_truncate_24 /* If negative then up==truncate */
194
195 jmp LCheck_24_round_up
196
197LDown_24:
198 cmpb SIGN_POS,PARAM5
199 je LCheck_truncate_24 /* If positive then down==truncate */
200
201LCheck_24_round_up:
202 movl %eax,%ecx
203 andl $0x000000ff,%ecx
204 orl %ebx,%ecx
205 orl %edx,%ecx
206 jnz LDo_24_round_up
207 jmp L_Re_normalise
208
209LRound_nearest_24:
210 /* Do rounding of the 24th bit if needed (nearest or even) */
211 movl %eax,%ecx
212 andl $0x000000ff,%ecx
213 cmpl $0x00000080,%ecx
214 jc LCheck_truncate_24 /* less than half, no increment needed */
215
216 jne LGreater_Half_24 /* greater than half, increment needed */
217
218 /* Possibly half, we need to check the ls bits */
219 orl %ebx,%ebx
220 jnz LGreater_Half_24 /* greater than half, increment needed */
221
222 orl %edx,%edx
223 jnz LGreater_Half_24 /* greater than half, increment needed */
224
225 /* Exactly half, increment only if 24th bit is 1 (round to even) */
226 testl $0x00000100,%eax
227 jz LDo_truncate_24
228
229LGreater_Half_24: /* Rounding: increment at the 24th bit */
230LDo_24_round_up:
231 andl $0xffffff00,%eax /* Truncate to 24 bits */
232 xorl %ebx,%ebx
233 movb LOST_UP,FPU_bits_lost
234 addl $0x00000100,%eax
235 jmp LCheck_Round_Overflow
236
237LCheck_truncate_24:
238 movl %eax,%ecx
239 andl $0x000000ff,%ecx
240 orl %ebx,%ecx
241 orl %edx,%ecx
242 jz L_Re_normalise /* No truncation needed */
243
244LDo_truncate_24:
245 andl $0xffffff00,%eax /* Truncate to 24 bits */
246 xorl %ebx,%ebx
247 movb LOST_DOWN,FPU_bits_lost
248 jmp L_Re_normalise
249
250
251/* Round etc to 53 bit precision */
252LRound_To_53:
253 movl %esi,%ecx
254 andl CW_RC,%ecx
255 cmpl RC_RND,%ecx
256 je LRound_nearest_53
257
258 cmpl RC_CHOP,%ecx
259 je LCheck_truncate_53
260
261 cmpl RC_UP,%ecx /* Towards +infinity */
262 je LUp_53
263
264 cmpl RC_DOWN,%ecx /* Towards -infinity */
265 je LDown_53
266
267#ifdef PARANOID
268 jmp L_bugged_round53
269#endif /* PARANOID */
270
271LUp_53:
272 cmpb SIGN_POS,PARAM5
273 jne LCheck_truncate_53 /* If negative then up==truncate */
274
275 jmp LCheck_53_round_up
276
277LDown_53:
278 cmpb SIGN_POS,PARAM5
279 je LCheck_truncate_53 /* If positive then down==truncate */
280
281LCheck_53_round_up:
282 movl %ebx,%ecx
283 andl $0x000007ff,%ecx
284 orl %edx,%ecx
285 jnz LDo_53_round_up
286 jmp L_Re_normalise
287
288LRound_nearest_53:
289 /* Do rounding of the 53rd bit if needed (nearest or even) */
290 movl %ebx,%ecx
291 andl $0x000007ff,%ecx
292 cmpl $0x00000400,%ecx
293 jc LCheck_truncate_53 /* less than half, no increment needed */
294
295 jnz LGreater_Half_53 /* greater than half, increment needed */
296
297 /* Possibly half, we need to check the ls bits */
298 orl %edx,%edx
299 jnz LGreater_Half_53 /* greater than half, increment needed */
300
301 /* Exactly half, increment only if 53rd bit is 1 (round to even) */
302 testl $0x00000800,%ebx
303 jz LTruncate_53
304
305LGreater_Half_53: /* Rounding: increment at the 53rd bit */
306LDo_53_round_up:
307 movb LOST_UP,FPU_bits_lost
308 andl $0xfffff800,%ebx /* Truncate to 53 bits */
309 addl $0x00000800,%ebx
310 adcl $0,%eax
311 jmp LCheck_Round_Overflow
312
313LCheck_truncate_53:
314 movl %ebx,%ecx
315 andl $0x000007ff,%ecx
316 orl %edx,%ecx
317 jz L_Re_normalise
318
319LTruncate_53:
320 movb LOST_DOWN,FPU_bits_lost
321 andl $0xfffff800,%ebx /* Truncate to 53 bits */
322 jmp L_Re_normalise
323
324
325/* Round etc to 64 bit precision */
326LRound_To_64:
327 movl %esi,%ecx
328 andl CW_RC,%ecx
329 cmpl RC_RND,%ecx
330 je LRound_nearest_64
331
332 cmpl RC_CHOP,%ecx
333 je LCheck_truncate_64
334
335 cmpl RC_UP,%ecx /* Towards +infinity */
336 je LUp_64
337
338 cmpl RC_DOWN,%ecx /* Towards -infinity */
339 je LDown_64
340
341#ifdef PARANOID
342 jmp L_bugged_round64
343#endif /* PARANOID */
344
345LUp_64:
346 cmpb SIGN_POS,PARAM5
347 jne LCheck_truncate_64 /* If negative then up==truncate */
348
349 orl %edx,%edx
350 jnz LDo_64_round_up
351 jmp L_Re_normalise
352
353LDown_64:
354 cmpb SIGN_POS,PARAM5
355 je LCheck_truncate_64 /* If positive then down==truncate */
356
357 orl %edx,%edx
358 jnz LDo_64_round_up
359 jmp L_Re_normalise
360
361LRound_nearest_64:
362 cmpl $0x80000000,%edx
363 jc LCheck_truncate_64
364
365 jne LDo_64_round_up
366
367 /* Now test for round-to-even */
368 testb $1,%bl
369 jz LCheck_truncate_64
370
371LDo_64_round_up:
372 movb LOST_UP,FPU_bits_lost
373 addl $1,%ebx
374 adcl $0,%eax
375
376LCheck_Round_Overflow:
377 jnc L_Re_normalise
378
379 /* Overflow, adjust the result (significand to 1.0) */
380 rcrl $1,%eax
381 rcrl $1,%ebx
382 incw EXP(%edi)
383 jmp L_Re_normalise
384
385LCheck_truncate_64:
386 orl %edx,%edx
387 jz L_Re_normalise
388
389LTruncate_64:
390 movb LOST_DOWN,FPU_bits_lost
391
392L_Re_normalise:
393 testb $0xff,FPU_denormal
394 jnz Normalise_result
395
396L_Normalised:
397 movl TAG_Valid,%edx
398
399L_deNormalised:
400 cmpb LOST_UP,FPU_bits_lost
401 je L_precision_lost_up
402
403 cmpb LOST_DOWN,FPU_bits_lost
404 je L_precision_lost_down
405
406L_no_precision_loss:
407 /* store the result */
408
409L_Store_significand:
410 movl %eax,SIGH(%edi)
411 movl %ebx,SIGL(%edi)
412
413 cmpw EXP_OVER,EXP(%edi)
414 jge L_overflow
415
416 movl %edx,%eax
417
418 /* Convert the exponent to 80x87 form. */
419 addw EXTENDED_Ebias,EXP(%edi)
420 andw $0x7fff,EXP(%edi)
421
422fpu_reg_round_signed_special_exit:
423
424 cmpb SIGN_POS,PARAM5
425 je fpu_reg_round_special_exit
426
427 orw $0x8000,EXP(%edi) /* Negative sign for the result. */
428
429fpu_reg_round_special_exit:
430
431#ifndef NON_REENTRANT_FPU
432 popl %ebx /* adjust the stack pointer */
433#endif /* NON_REENTRANT_FPU */
434
435fpu_Arith_exit:
436 popl %ebx
437 popl %edi
438 popl %esi
439 leave
440 RET
441
442
443/*
444 * Set the FPU status flags to represent precision loss due to
445 * round-up.
446 */
447L_precision_lost_up:
448 push %edx
449 push %eax
450 call set_precision_flag_up
451 popl %eax
452 popl %edx
453 jmp L_no_precision_loss
454
455/*
456 * Set the FPU status flags to represent precision loss due to
457 * truncation.
458 */
459L_precision_lost_down:
460 push %edx
461 push %eax
462 call set_precision_flag_down
463 popl %eax
464 popl %edx
465 jmp L_no_precision_loss
466
467
468/*
469 * The number is a denormal (which might get rounded up to a normal)
470 * Shift the number right the required number of bits, which will
471 * have to be undone later...
472 */
473L_Make_denorm:
474 /* The action to be taken depends upon whether the underflow
475 exception is masked */
476 testb CW_Underflow,%cl /* Underflow mask. */
477 jz Unmasked_underflow /* Do not make a denormal. */
478
479 movb DENORMAL,FPU_denormal
480
481 pushl %ecx /* Save */
482 movw EXP_UNDER+1,%cx
483 subw EXP(%edi),%cx
484
485 cmpw $64,%cx /* shrd only works for 0..31 bits */
486 jnc Denorm_shift_more_than_63
487
488 cmpw $32,%cx /* shrd only works for 0..31 bits */
489 jnc Denorm_shift_more_than_32
490
491/*
492 * We got here without jumps by assuming that the most common requirement
493 * is for a small de-normalising shift.
494 * Shift by [1..31] bits
495 */
496 addw %cx,EXP(%edi)
497 orl %edx,%edx /* extension */
498 setne %ch /* Save whether %edx is non-zero */
499 xorl %edx,%edx
500 shrd %cl,%ebx,%edx
501 shrd %cl,%eax,%ebx
502 shr %cl,%eax
503 orb %ch,%dl
504 popl %ecx
505 jmp Denorm_done
506
507/* Shift by [32..63] bits */
508Denorm_shift_more_than_32:
509 addw %cx,EXP(%edi)
510 subb $32,%cl
511 orl %edx,%edx
512 setne %ch
513 orb %ch,%bl
514 xorl %edx,%edx
515 shrd %cl,%ebx,%edx
516 shrd %cl,%eax,%ebx
517 shr %cl,%eax
518 orl %edx,%edx /* test these 32 bits */
519 setne %cl
520 orb %ch,%bl
521 orb %cl,%bl
522 movl %ebx,%edx
523 movl %eax,%ebx
524 xorl %eax,%eax
525 popl %ecx
526 jmp Denorm_done
527
528/* Shift by [64..) bits */
529Denorm_shift_more_than_63:
530 cmpw $64,%cx
531 jne Denorm_shift_more_than_64
532
533/* Exactly 64 bit shift */
534 addw %cx,EXP(%edi)
535 xorl %ecx,%ecx
536 orl %edx,%edx
537 setne %cl
538 orl %ebx,%ebx
539 setne %ch
540 orb %ch,%cl
541 orb %cl,%al
542 movl %eax,%edx
543 xorl %eax,%eax
544 xorl %ebx,%ebx
545 popl %ecx
546 jmp Denorm_done
547
548Denorm_shift_more_than_64:
549 movw EXP_UNDER+1,EXP(%edi)
550/* This is easy, %eax must be non-zero, so.. */
551 movl $1,%edx
552 xorl %eax,%eax
553 xorl %ebx,%ebx
554 popl %ecx
555 jmp Denorm_done
556
557
558Unmasked_underflow:
559 movb UNMASKED_UNDERFLOW,FPU_denormal
560 jmp Denorm_done
561
562
563/* Undo the de-normalisation. */
564Normalise_result:
565 cmpb UNMASKED_UNDERFLOW,FPU_denormal
566 je Signal_underflow
567
568/* The number must be a denormal if we got here. */
569#ifdef PARANOID
570 /* But check it... just in case. */
571 cmpw EXP_UNDER+1,EXP(%edi)
572 jne L_norm_bugged
573#endif /* PARANOID */
574
575#ifdef PECULIAR_486
576 /*
577 * This implements a special feature of 80486 behaviour.
578 * Underflow will be signaled even if the number is
579 * not a denormal after rounding.
580 * This difference occurs only for masked underflow, and not
581 * in the unmasked case.
582 * Actual 80486 behaviour differs from this in some circumstances.
583 */
584 orl %eax,%eax /* ms bits */
585 js LPseudoDenormal /* Will be masked underflow */
586#else
587 orl %eax,%eax /* ms bits */
588 js L_Normalised /* No longer a denormal */
589#endif /* PECULIAR_486 */
590
591 jnz LDenormal_adj_exponent
592
593 orl %ebx,%ebx
594 jz L_underflow_to_zero /* The contents are zero */
595
596LDenormal_adj_exponent:
597 decw EXP(%edi)
598
599LPseudoDenormal:
600 testb $0xff,FPU_bits_lost /* bits lost == underflow */
601 movl TAG_Special,%edx
602 jz L_deNormalised
603
604 /* There must be a masked underflow */
605 push %eax
606 pushl EX_Underflow
607 call EXCEPTION
608 popl %eax
609 popl %eax
610 movl TAG_Special,%edx
611 jmp L_deNormalised
612
613
614/*
615 * The operations resulted in a number too small to represent.
616 * Masked response.
617 */
618L_underflow_to_zero:
619 push %eax
620 call set_precision_flag_down
621 popl %eax
622
623 push %eax
624 pushl EX_Underflow
625 call EXCEPTION
626 popl %eax
627 popl %eax
628
629/* Reduce the exponent to EXP_UNDER */
630 movw EXP_UNDER,EXP(%edi)
631 movl TAG_Zero,%edx
632 jmp L_Store_significand
633
634
635/* The operations resulted in a number too large to represent. */
636L_overflow:
637 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
638 push %edi
639 call arith_overflow
640 pop %edi
641 jmp fpu_reg_round_signed_special_exit
642
643
644Signal_underflow:
645 /* The number may have been changed to a non-denormal */
646 /* by the rounding operations. */
647 cmpw EXP_UNDER,EXP(%edi)
648 jle Do_unmasked_underflow
649
650 jmp L_Normalised
651
652Do_unmasked_underflow:
653 /* Increase the exponent by the magic number */
654 addw $(3*(1<<13)),EXP(%edi)
655 push %eax
656 pushl EX_Underflow
657 call EXCEPTION
658 popl %eax
659 popl %eax
660 jmp L_Normalised
661
662
663#ifdef PARANOID
664#ifdef PECULIAR_486
665L_bugged_denorm_486:
666 pushl EX_INTERNAL|0x236
667 call EXCEPTION
668 popl %ebx
669 jmp L_exception_exit
670#else
671L_bugged_denorm:
672 pushl EX_INTERNAL|0x230
673 call EXCEPTION
674 popl %ebx
675 jmp L_exception_exit
676#endif /* PECULIAR_486 */
677
678L_bugged_round24:
679 pushl EX_INTERNAL|0x231
680 call EXCEPTION
681 popl %ebx
682 jmp L_exception_exit
683
684L_bugged_round53:
685 pushl EX_INTERNAL|0x232
686 call EXCEPTION
687 popl %ebx
688 jmp L_exception_exit
689
690L_bugged_round64:
691 pushl EX_INTERNAL|0x233
692 call EXCEPTION
693 popl %ebx
694 jmp L_exception_exit
695
696L_norm_bugged:
697 pushl EX_INTERNAL|0x234
698 call EXCEPTION
699 popl %ebx
700 jmp L_exception_exit
701
702L_entry_bugged:
703 pushl EX_INTERNAL|0x235
704 call EXCEPTION
705 popl %ebx
706L_exception_exit:
707 mov $-1,%eax
708 jmp fpu_reg_round_special_exit
709#endif /* PARANOID */
710
711SYM_FUNC_END(FPU_round)