fp_util.S - arch/m68k/math-emu/fp_util.S - Linux diff v5.9

   1/*
   2 * fp_util.S
   3 *
   4 * Copyright Roman Zippel, 1997.  All rights reserved.
   5 *
   6 * Redistribution and use in source and binary forms, with or without
   7 * modification, are permitted provided that the following conditions
   8 * are met:
   9 * 1. Redistributions of source code must retain the above copyright
  10 *    notice, and the entire permission notice in its entirety,
  11 *    including the disclaimer of warranties.
  12 * 2. Redistributions in binary form must reproduce the above copyright
  13 *    notice, this list of conditions and the following disclaimer in the
  14 *    documentation and/or other materials provided with the distribution.
  15 * 3. The name of the author may not be used to endorse or promote
  16 *    products derived from this software without specific prior
  17 *    written permission.
  18 *
  19 * ALTERNATIVELY, this product may be distributed under the terms of
  20 * the GNU General Public License, in which case the provisions of the GPL are
  21 * required INSTEAD OF the above restrictions.  (This clause is
  22 * necessary due to a potential bad interaction between the GPL and
  23 * the restrictions contained in a BSD-style copyright.)
  24 *
  25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  28 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  35 * OF THE POSSIBILITY OF SUCH DAMAGE.
  36 */
  37
  38#include "fp_emu.h"
  39
  40/*
  41 * Here are lots of conversion and normalization functions mainly
  42 * used by fp_scan.S
  43 * Note that these functions are optimized for "normal" numbers,
  44 * these are handled first and exit as fast as possible, this is
  45 * especially important for fp_normalize_ext/fp_conv_ext2ext, as
  46 * it's called very often.
  47 * The register usage is optimized for fp_scan.S and which register
  48 * is currently at that time unused, be careful if you want change
  49 * something here. %d0 and %d1 is always usable, sometimes %d2 (or
  50 * only the lower half) most function have to return the %a0
  51 * unmodified, so that the caller can immediately reuse it.
  52 */
  53
  54	.globl	fp_ill, fp_end
  55
  56	| exits from fp_scan:
  57	| illegal instruction
  58fp_ill:
  59	printf	,"fp_illegal\n"
  60	rts
  61	| completed instruction
  62fp_end:
  63	tst.l	(TASK_MM-8,%a2)
  64	jmi	1f
  65	tst.l	(TASK_MM-4,%a2)
  66	jmi	1f
  67	tst.l	(TASK_MM,%a2)
  68	jpl	2f
  691:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
  702:	clr.l	%d0
  71	rts
  72
  73	.globl	fp_conv_long2ext, fp_conv_single2ext
  74	.globl	fp_conv_double2ext, fp_conv_ext2ext
  75	.globl	fp_normalize_ext, fp_normalize_double
  76	.globl	fp_normalize_single, fp_normalize_single_fast
  77	.globl	fp_conv_ext2double, fp_conv_ext2single
  78	.globl	fp_conv_ext2long, fp_conv_ext2short
  79	.globl	fp_conv_ext2byte
  80	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
  81	.globl	fp_finalrounding_double
  82	.globl	fp_finalrounding, fp_finaltest, fp_final
  83
  84/*
  85 * First several conversion functions from a source operand
  86 * into the extended format. Note, that only fp_conv_ext2ext
  87 * normalizes the number and is always called after the other
  88 * conversion functions, which only move the information into
  89 * fp_ext structure.
  90 */
  91
  92	| fp_conv_long2ext:
  93	|
  94	| args:	%d0 = source (32-bit long)
  95	|	%a0 = destination (ptr to struct fp_ext)
  96
  97fp_conv_long2ext:
  98	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
  99	clr.l	%d1			| sign defaults to zero
 100	tst.l	%d0
 101	jeq	fp_l2e_zero		| is source zero?
 102	jpl	1f			| positive?
 103	moveq	#1,%d1
 104	neg.l	%d0
 1051:	swap	%d1
 106	move.w	#0x3fff+31,%d1
 107	move.l	%d1,(%a0)+		| set sign / exp
 108	move.l	%d0,(%a0)+		| set mantissa
 109	clr.l	(%a0)
 110	subq.l	#8,%a0			| restore %a0
 111	printx	PCONV,%a0@
 112	printf	PCONV,")\n"
 113	rts
 114	| source is zero
 115fp_l2e_zero:
 116	clr.l	(%a0)+
 117	clr.l	(%a0)+
 118	clr.l	(%a0)
 119	subq.l	#8,%a0
 120	printx	PCONV,%a0@
 121	printf	PCONV,")\n"
 122	rts
 123
 124	| fp_conv_single2ext
 125	| args:	%d0 = source (single-precision fp value)
 126	|	%a0 = dest (struct fp_ext *)
 127
 128fp_conv_single2ext:
 129	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
 130	move.l	%d0,%d1
 131	lsl.l	#8,%d0			| shift mantissa
 132	lsr.l	#8,%d1			| exponent / sign
 133	lsr.l	#7,%d1
 134	lsr.w	#8,%d1
 135	jeq	fp_s2e_small		| zero / denormal?
 136	cmp.w	#0xff,%d1		| NaN / Inf?
 137	jeq	fp_s2e_large
 138	bset	#31,%d0			| set explizit bit
 139	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
 1409:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
 141	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
 142	clr.l	(%a0)			| low lword = 0
 143	subq.l	#8,%a0
 144	printx	PCONV,%a0@
 145	printf	PCONV,")\n"
 146	rts
 147	| zeros and denormalized
 148fp_s2e_small:
 149	| exponent is zero, so explizit bit is already zero too
 150	tst.l	%d0
 151	jeq	9b
 152	move.w	#0x4000-0x7f,%d1
 153	jra	9b
 154	| infinities and NAN
 155fp_s2e_large:
 156	bclr	#31,%d0			| clear explizit bit
 157	move.w	#0x7fff,%d1
 158	jra	9b
 159
 160fp_conv_double2ext:
 161#ifdef FPU_EMU_DEBUG
 162	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
 163	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
 164	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
 165#endif
 166	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
 167	move.l	%d0,%d1
 168	lsl.l	#8,%d0			| shift high mantissa
 169	lsl.l	#3,%d0
 170	lsr.l	#8,%d1			| exponent / sign
 171	lsr.l	#7,%d1
 172	lsr.w	#5,%d1
 173	jeq	fp_d2e_small		| zero / denormal?
 174	cmp.w	#0x7ff,%d1		| NaN / Inf?
 175	jeq	fp_d2e_large
 176	bset	#31,%d0			| set explizit bit
 177	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
 1789:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
 179	move.l	%d0,(%a0)+
 180	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
 181	move.l	%d0,%d1
 182	lsl.l	#8,%d0
 183	lsl.l	#3,%d0
 184	move.l	%d0,(%a0)
 185	moveq	#21,%d0
 186	lsr.l	%d0,%d1
 187	or.l	%d1,-(%a0)
 188	subq.l	#4,%a0
 189	printx	PCONV,%a0@
 190	printf	PCONV,")\n"
 191	rts
 192	| zeros and denormalized
 193fp_d2e_small:
 194	| exponent is zero, so explizit bit is already zero too
 195	tst.l	%d0
 196	jeq	9b
 197	move.w	#0x4000-0x3ff,%d1
 198	jra	9b
 199	| infinities and NAN
 200fp_d2e_large:
 201	bclr	#31,%d0			| clear explizit bit
 202	move.w	#0x7fff,%d1
 203	jra	9b
 204
 205	| fp_conv_ext2ext:
 206	| originally used to get longdouble from userspace, now it's
 207	| called before arithmetic operations to make sure the number
 208	| is normalized [maybe rename it?].
 209	| args:	%a0 = dest (struct fp_ext *)
 210	| returns 0 in %d0 for a NaN, otherwise 1
 211
 212fp_conv_ext2ext:
 213	printf	PCONV,"e2e: %p(",1,%a0
 214	printx	PCONV,%a0@
 215	printf	PCONV,"), "
 216	move.l	(%a0)+,%d0
 217	cmp.w	#0x7fff,%d0		| Inf / NaN?
 218	jeq	fp_e2e_large
 219	move.l	(%a0),%d0
 220	jpl	fp_e2e_small		| zero / denorm?
 221	| The high bit is set, so normalization is irrelevant.
 222fp_e2e_checkround:
 223	subq.l	#4,%a0
 224#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 225	move.b	(%a0),%d0
 226	jne	fp_e2e_round
 227#endif
 228	printf	PCONV,"%p(",1,%a0
 229	printx	PCONV,%a0@
 230	printf	PCONV,")\n"
 231	moveq	#1,%d0
 232	rts
 233#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 234fp_e2e_round:
 235	fp_set_sr FPSR_EXC_INEX2
 236	clr.b	(%a0)
 237	move.w	(FPD_RND,FPDATA),%d2
 238	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
 239	tst.b	%d0			| test guard bit
 240	jpl	9f			| zero is closer
 241	btst	#0,(11,%a0)		| test lsb bit
 242	jne	fp_e2e_doroundup	| round to infinity
 243	lsl.b	#1,%d0			| check low bits
 244	jeq	9f			| round to zero
 245fp_e2e_doroundup:
 246	addq.l	#1,(8,%a0)
 247	jcc	9f
 248	addq.l	#1,(4,%a0)
 249	jcc	9f
 250	move.w	#0x8000,(4,%a0)
 251	addq.w	#1,(2,%a0)
 2529:	printf	PNORM,"%p(",1,%a0
 253	printx	PNORM,%a0@
 254	printf	PNORM,")\n"
 255	rts
 256fp_e2e_roundother:
 257	subq.w	#2,%d2
 258	jcs	9b			| %d2 < 2, round to zero
 259	jhi	1f			| %d2 > 2, round to +infinity
 260	tst.b	(1,%a0)			| to -inf
 261	jne	fp_e2e_doroundup	| negative, round to infinity
 262	jra	9b			| positive, round to zero
 2631:	tst.b	(1,%a0)			| to +inf
 264	jeq	fp_e2e_doroundup	| positive, round to infinity
 265	jra	9b			| negative, round to zero
 266#endif
 267	| zeros and subnormals:
 268	| try to normalize these anyway.
 269fp_e2e_small:
 270	jne	fp_e2e_small1		| high lword zero?
 271	move.l	(4,%a0),%d0
 272	jne	fp_e2e_small2
 273#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 274	clr.l	%d0
 275	move.b	(-4,%a0),%d0
 276	jne	fp_e2e_small3
 277#endif
 278	| Genuine zero.
 279	clr.w	-(%a0)
 280	subq.l	#2,%a0
 281	printf	PNORM,"%p(",1,%a0
 282	printx	PNORM,%a0@
 283	printf	PNORM,")\n"
 284	moveq	#1,%d0
 285	rts
 286	| definitely subnormal, need to shift all 64 bits
 287fp_e2e_small1:
 288	bfffo	%d0{#0,#32},%d1
 289	move.w	-(%a0),%d2
 290	sub.w	%d1,%d2
 291	jcc	1f
 292	| Pathologically small, denormalize.
 293	add.w	%d2,%d1
 294	clr.w	%d2
 2951:	move.w	%d2,(%a0)+
 296	move.w	%d1,%d2
 297	jeq	fp_e2e_checkround
 298	| fancy 64-bit double-shift begins here
 299	lsl.l	%d2,%d0
 300	move.l	%d0,(%a0)+
 301	move.l	(%a0),%d0
 302	move.l	%d0,%d1
 303	lsl.l	%d2,%d0
 304	move.l	%d0,(%a0)
 305	neg.w	%d2
 306	and.w	#0x1f,%d2
 307	lsr.l	%d2,%d1
 308	or.l	%d1,-(%a0)
 309#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 310fp_e2e_extra1:
 311	clr.l	%d0
 312	move.b	(-4,%a0),%d0
 313	neg.w	%d2
 314	add.w	#24,%d2
 315	jcc	1f
 316	clr.b	(-4,%a0)
 317	lsl.l	%d2,%d0
 318	or.l	%d0,(4,%a0)
 319	jra	fp_e2e_checkround
 3201:	addq.w	#8,%d2
 321	lsl.l	%d2,%d0
 322	move.b	%d0,(-4,%a0)
 323	lsr.l	#8,%d0
 324	or.l	%d0,(4,%a0)
 325#endif
 326	jra	fp_e2e_checkround
 327	| pathologically small subnormal
 328fp_e2e_small2:
 329	bfffo	%d0{#0,#32},%d1
 330	add.w	#32,%d1
 331	move.w	-(%a0),%d2
 332	sub.w	%d1,%d2
 333	jcc	1f
 334	| Beyond pathologically small, denormalize.
 335	add.w	%d2,%d1
 336	clr.w	%d2
 3371:	move.w	%d2,(%a0)+
 338	ext.l	%d1
 339	jeq	fp_e2e_checkround
 340	clr.l	(4,%a0)
 341	sub.w	#32,%d2
 342	jcs	1f
 343	lsl.l	%d1,%d0			| lower lword needs only to be shifted
 344	move.l	%d0,(%a0)		| into the higher lword
 345#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 346	clr.l	%d0
 347	move.b	(-4,%a0),%d0
 348	clr.b	(-4,%a0)
 349	neg.w	%d1
 350	add.w	#32,%d1
 351	bfins	%d0,(%a0){%d1,#8}
 352#endif
 353	jra	fp_e2e_checkround
 3541:	neg.w	%d1			| lower lword is splitted between
 355	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
 356#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
 357	jra	fp_e2e_checkround
 358#else
 359	move.w	%d1,%d2
 360	jra	fp_e2e_extra1
 361	| These are extremely small numbers, that will mostly end up as zero
 362	| anyway, so this is only important for correct rounding.
 363fp_e2e_small3:
 364	bfffo	%d0{#24,#8},%d1
 365	add.w	#40,%d1
 366	move.w	-(%a0),%d2
 367	sub.w	%d1,%d2
 368	jcc	1f
 369	| Pathologically small, denormalize.
 370	add.w	%d2,%d1
 371	clr.w	%d2
 3721:	move.w	%d2,(%a0)+
 373	ext.l	%d1
 374	jeq	fp_e2e_checkround
 375	cmp.w	#8,%d1
 376	jcs	2f
 3771:	clr.b	(-4,%a0)
 378	sub.w	#64,%d1
 379	jcs	1f
 380	add.w	#24,%d1
 381	lsl.l	%d1,%d0
 382	move.l	%d0,(%a0)
 383	jra	fp_e2e_checkround
 3841:	neg.w	%d1
 385	bfins	%d0,(%a0){%d1,#8}
 386	jra	fp_e2e_checkround
 3872:	lsl.l	%d1,%d0
 388	move.b	%d0,(-4,%a0)
 389	lsr.l	#8,%d0
 390	move.b	%d0,(7,%a0)
 391	jra	fp_e2e_checkround
 392#endif
 3931:	move.l	%d0,%d1			| lower lword is splitted between
 394	lsl.l	%d2,%d0			| higher and lower lword
 395	move.l	%d0,(%a0)
 396	move.l	%d1,%d0
 397	neg.w	%d2
 398	add.w	#32,%d2
 399	lsr.l	%d2,%d0
 400	move.l	%d0,-(%a0)
 401	jra	fp_e2e_checkround
 402	| Infinities and NaNs
 403fp_e2e_large:
 404	move.l	(%a0)+,%d0
 405	jne	3f
 4061:	tst.l	(%a0)
 407	jne	4f
 408	moveq	#1,%d0
 4092:	subq.l	#8,%a0
 410	printf	PCONV,"%p(",1,%a0
 411	printx	PCONV,%a0@
 412	printf	PCONV,")\n"
 413	rts
 414	| we have maybe a NaN, shift off the highest bit
 4153:	lsl.l	#1,%d0
 416	jeq	1b
 417	| we have a NaN, clear the return value
 4184:	clrl	%d0
 419	jra	2b
 420
 421
 422/*
 423 * Normalization functions.  Call these on the output of general
 424 * FP operators, and before any conversion into the destination
 425 * formats. fp_normalize_ext has always to be called first, the
 426 * following conversion functions expect an already normalized
 427 * number.
 428 */
 429
 430	| fp_normalize_ext:
 431	| normalize an extended in extended (unpacked) format, basically
 432	| it does the same as fp_conv_ext2ext, additionally it also does
 433	| the necessary postprocessing checks.
 434	| args:	%a0 (struct fp_ext *)
 435	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
 436
 437fp_normalize_ext:
 438	printf	PNORM,"ne: %p(",1,%a0
 439	printx	PNORM,%a0@
 440	printf	PNORM,"), "
 441	move.l	(%a0)+,%d0
 442	cmp.w	#0x7fff,%d0		| Inf / NaN?
 443	jeq	fp_ne_large
 444	move.l	(%a0),%d0
 445	jpl	fp_ne_small		| zero / denorm?
 446	| The high bit is set, so normalization is irrelevant.
 447fp_ne_checkround:
 448	subq.l	#4,%a0
 449#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 450	move.b	(%a0),%d0
 451	jne	fp_ne_round
 452#endif
 453	printf	PNORM,"%p(",1,%a0
 454	printx	PNORM,%a0@
 455	printf	PNORM,")\n"
 456	rts
 457#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 458fp_ne_round:
 459	fp_set_sr FPSR_EXC_INEX2
 460	clr.b	(%a0)
 461	move.w	(FPD_RND,FPDATA),%d2
 462	jne	fp_ne_roundother	| %d2 == 0, round to nearest
 463	tst.b	%d0			| test guard bit
 464	jpl	9f			| zero is closer
 465	btst	#0,(11,%a0)		| test lsb bit
 466	jne	fp_ne_doroundup		| round to infinity
 467	lsl.b	#1,%d0			| check low bits
 468	jeq	9f			| round to zero
 469fp_ne_doroundup:
 470	addq.l	#1,(8,%a0)
 471	jcc	9f
 472	addq.l	#1,(4,%a0)
 473	jcc	9f
 474	addq.w	#1,(2,%a0)
 475	move.w	#0x8000,(4,%a0)
 4769:	printf	PNORM,"%p(",1,%a0
 477	printx	PNORM,%a0@
 478	printf	PNORM,")\n"
 479	rts
 480fp_ne_roundother:
 481	subq.w	#2,%d2
 482	jcs	9b			| %d2 < 2, round to zero
 483	jhi	1f			| %d2 > 2, round to +infinity
 484	tst.b	(1,%a0)			| to -inf
 485	jne	fp_ne_doroundup		| negative, round to infinity
 486	jra	9b			| positive, round to zero
 4871:	tst.b	(1,%a0)			| to +inf
 488	jeq	fp_ne_doroundup		| positive, round to infinity
 489	jra	9b			| negative, round to zero
 490#endif
 491	| Zeros and subnormal numbers
 492	| These are probably merely subnormal, rather than "denormalized"
 493	|  numbers, so we will try to make them normal again.
 494fp_ne_small:
 495	jne	fp_ne_small1		| high lword zero?
 496	move.l	(4,%a0),%d0
 497	jne	fp_ne_small2
 498#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 499	clr.l	%d0
 500	move.b	(-4,%a0),%d0
 501	jne	fp_ne_small3
 502#endif
 503	| Genuine zero.
 504	clr.w	-(%a0)
 505	subq.l	#2,%a0
 506	printf	PNORM,"%p(",1,%a0
 507	printx	PNORM,%a0@
 508	printf	PNORM,")\n"
 509	rts
 510	| Subnormal.
 511fp_ne_small1:
 512	bfffo	%d0{#0,#32},%d1
 513	move.w	-(%a0),%d2
 514	sub.w	%d1,%d2
 515	jcc	1f
 516	| Pathologically small, denormalize.
 517	add.w	%d2,%d1
 518	clr.w	%d2
 519	fp_set_sr FPSR_EXC_UNFL
 5201:	move.w	%d2,(%a0)+
 521	move.w	%d1,%d2
 522	jeq	fp_ne_checkround
 523	| This is exactly the same 64-bit double shift as seen above.
 524	lsl.l	%d2,%d0
 525	move.l	%d0,(%a0)+
 526	move.l	(%a0),%d0
 527	move.l	%d0,%d1
 528	lsl.l	%d2,%d0
 529	move.l	%d0,(%a0)
 530	neg.w	%d2
 531	and.w	#0x1f,%d2
 532	lsr.l	%d2,%d1
 533	or.l	%d1,-(%a0)
 534#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 535fp_ne_extra1:
 536	clr.l	%d0
 537	move.b	(-4,%a0),%d0
 538	neg.w	%d2
 539	add.w	#24,%d2
 540	jcc	1f
 541	clr.b	(-4,%a0)
 542	lsl.l	%d2,%d0
 543	or.l	%d0,(4,%a0)
 544	jra	fp_ne_checkround
 5451:	addq.w	#8,%d2
 546	lsl.l	%d2,%d0
 547	move.b	%d0,(-4,%a0)
 548	lsr.l	#8,%d0
 549	or.l	%d0,(4,%a0)
 550#endif
 551	jra	fp_ne_checkround
 552	| May or may not be subnormal, if so, only 32 bits to shift.
 553fp_ne_small2:
 554	bfffo	%d0{#0,#32},%d1
 555	add.w	#32,%d1
 556	move.w	-(%a0),%d2
 557	sub.w	%d1,%d2
 558	jcc	1f
 559	| Beyond pathologically small, denormalize.
 560	add.w	%d2,%d1
 561	clr.w	%d2
 562	fp_set_sr FPSR_EXC_UNFL
 5631:	move.w	%d2,(%a0)+
 564	ext.l	%d1
 565	jeq	fp_ne_checkround
 566	clr.l	(4,%a0)
 567	sub.w	#32,%d1
 568	jcs	1f
 569	lsl.l	%d1,%d0			| lower lword needs only to be shifted
 570	move.l	%d0,(%a0)		| into the higher lword
 571#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 572	clr.l	%d0
 573	move.b	(-4,%a0),%d0
 574	clr.b	(-4,%a0)
 575	neg.w	%d1
 576	add.w	#32,%d1
 577	bfins	%d0,(%a0){%d1,#8}
 578#endif
 579	jra	fp_ne_checkround
 5801:	neg.w	%d1			| lower lword is splitted between
 581	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
 582#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
 583	jra	fp_ne_checkround
 584#else
 585	move.w	%d1,%d2
 586	jra	fp_ne_extra1
 587	| These are extremely small numbers, that will mostly end up as zero
 588	| anyway, so this is only important for correct rounding.
 589fp_ne_small3:
 590	bfffo	%d0{#24,#8},%d1
 591	add.w	#40,%d1
 592	move.w	-(%a0),%d2
 593	sub.w	%d1,%d2
 594	jcc	1f
 595	| Pathologically small, denormalize.
 596	add.w	%d2,%d1
 597	clr.w	%d2
 5981:	move.w	%d2,(%a0)+
 599	ext.l	%d1
 600	jeq	fp_ne_checkround
 601	cmp.w	#8,%d1
 602	jcs	2f
 6031:	clr.b	(-4,%a0)
 604	sub.w	#64,%d1
 605	jcs	1f
 606	add.w	#24,%d1
 607	lsl.l	%d1,%d0
 608	move.l	%d0,(%a0)
 609	jra	fp_ne_checkround
 6101:	neg.w	%d1
 611	bfins	%d0,(%a0){%d1,#8}
 612	jra	fp_ne_checkround
 6132:	lsl.l	%d1,%d0
 614	move.b	%d0,(-4,%a0)
 615	lsr.l	#8,%d0
 616	move.b	%d0,(7,%a0)
 617	jra	fp_ne_checkround
 618#endif
 619	| Infinities and NaNs, again, same as above.
 620fp_ne_large:
 621	move.l	(%a0)+,%d0
 622	jne	3f
 6231:	tst.l	(%a0)
 624	jne	4f
 6252:	subq.l	#8,%a0
 626	printf	PNORM,"%p(",1,%a0
 627	printx	PNORM,%a0@
 628	printf	PNORM,")\n"
 629	rts
 630	| we have maybe a NaN, shift off the highest bit
 6313:	move.l	%d0,%d1
 632	lsl.l	#1,%d1
 633	jne	4f
 634	clr.l	(-4,%a0)
 635	jra	1b
 636	| we have a NaN, test if it is signaling
 6374:	bset	#30,%d0
 638	jne	2b
 639	fp_set_sr FPSR_EXC_SNAN
 640	move.l	%d0,(-4,%a0)
 641	jra	2b
 642
 643	| these next two do rounding as per the IEEE standard.
 644	| values for the rounding modes appear to be:
 645	| 0:	Round to nearest
 646	| 1:	Round to zero
 647	| 2:	Round to -Infinity
 648	| 3:	Round to +Infinity
 649	| both functions expect that fp_normalize was already
 650	| called (and extended argument is already normalized
 651	| as far as possible), these are used if there is different
 652	| rounding precision is selected and before converting
 653	| into single/double
 654
 655	| fp_normalize_double:
 656	| normalize an extended with double (52-bit) precision
 657	| args:	 %a0 (struct fp_ext *)
 658
 659fp_normalize_double:
 660	printf	PNORM,"nd: %p(",1,%a0
 661	printx	PNORM,%a0@
 662	printf	PNORM,"), "
 663	move.l	(%a0)+,%d2
 664	tst.w	%d2
 665	jeq	fp_nd_zero		| zero / denormalized
 666	cmp.w	#0x7fff,%d2
 667	jeq	fp_nd_huge		| NaN / infinitive.
 668	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
 669	jcs	fp_nd_small		| too small.
 670	cmp.w	#0x7fe,%d2
 671	jcc	fp_nd_large		| too big.
 672	addq.l	#4,%a0
 673	move.l	(%a0),%d0		| low lword of mantissa
 674	| now, round off the low 11 bits.
 675fp_nd_round:
 676	moveq	#21,%d1
 677	lsl.l	%d1,%d0			| keep 11 low bits.
 678	jne	fp_nd_checkround	| Are they non-zero?
 679	| nothing to do here
 6809:	subq.l	#8,%a0
 681	printf	PNORM,"%p(",1,%a0
 682	printx	PNORM,%a0@
 683	printf	PNORM,")\n"
 684	rts
 685	| Be careful with the X bit! It contains the lsb
 686	| from the shift above, it is needed for round to nearest.
 687fp_nd_checkround:
 688	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
 689	and.w	#0xf800,(2,%a0)		| clear bits 0-10
 690	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
 691	jne	2f			| %d2 == 0, round to nearest
 692	tst.l	%d0			| test guard bit
 693	jpl	9b			| zero is closer
 694	| here we test the X bit by adding it to %d2
 695	clr.w	%d2			| first set z bit, addx only clears it
 696	addx.w	%d2,%d2			| test lsb bit
 697	| IEEE754-specified "round to even" behaviour.  If the guard
 698	| bit is set, then the number is odd, so rounding works like
 699	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
 700	| Otherwise, an equal distance rounds towards zero, so as not
 701	| to produce an odd number.  This is strange, but it is what
 702	| the standard says.
 703	jne	fp_nd_doroundup		| round to infinity
 704	lsl.l	#1,%d0			| check low bits
 705	jeq	9b			| round to zero
 706fp_nd_doroundup:
 707	| round (the mantissa, that is) towards infinity
 708	add.l	#0x800,(%a0)
 709	jcc	9b			| no overflow, good.
 710	addq.l	#1,-(%a0)		| extend to high lword
 711	jcc	1f			| no overflow, good.
 712	| Yow! we have managed to overflow the mantissa.  Since this
 713	| only happens when %d1 was 0xfffff800, it is now zero, so
 714	| reset the high bit, and increment the exponent.
 715	move.w	#0x8000,(%a0)
 716	addq.w	#1,-(%a0)
 717	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
 718	jeq	fp_nd_large		| yes, so make it infinity.
 7191:	subq.l	#4,%a0
 720	printf	PNORM,"%p(",1,%a0
 721	printx	PNORM,%a0@
 722	printf	PNORM,")\n"
 723	rts
 7242:	subq.w	#2,%d2
 725	jcs	9b			| %d2 < 2, round to zero
 726	jhi	3f			| %d2 > 2, round to +infinity
 727	| Round to +Inf or -Inf.  High word of %d2 contains the
 728	| sign of the number, by the way.
 729	swap	%d2			| to -inf
 730	tst.b	%d2
 731	jne	fp_nd_doroundup		| negative, round to infinity
 732	jra	9b			| positive, round to zero
 7333:	swap	%d2			| to +inf
 734	tst.b	%d2
 735	jeq	fp_nd_doroundup		| positive, round to infinity
 736	jra	9b			| negative, round to zero
 737	| Exponent underflow.  Try to make a denormal, and set it to
 738	| the smallest possible fraction if this fails.
 739fp_nd_small:
 740	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
 741	move.w	#0x3c01,(-2,%a0)	| 2**-1022
 742	neg.w	%d2			| degree of underflow
 743	cmp.w	#32,%d2			| single or double shift?
 744	jcc	1f
 745	| Again, another 64-bit double shift.
 746	move.l	(%a0),%d0
 747	move.l	%d0,%d1
 748	lsr.l	%d2,%d0
 749	move.l	%d0,(%a0)+
 750	move.l	(%a0),%d0
 751	lsr.l	%d2,%d0
 752	neg.w	%d2
 753	add.w	#32,%d2
 754	lsl.l	%d2,%d1
 755	or.l	%d1,%d0
 756	move.l	(%a0),%d1
 757	move.l	%d0,(%a0)
 758	| Check to see if we shifted off any significant bits
 759	lsl.l	%d2,%d1
 760	jeq	fp_nd_round		| Nope, round.
 761	bset	#0,%d0			| Yes, so set the "sticky bit".
 762	jra	fp_nd_round		| Now, round.
 763	| Another 64-bit single shift and store
 7641:	sub.w	#32,%d2
 765	cmp.w	#32,%d2			| Do we really need to shift?
 766	jcc	2f			| No, the number is too small.
 767	move.l	(%a0),%d0
 768	clr.l	(%a0)+
 769	move.l	%d0,%d1
 770	lsr.l	%d2,%d0
 771	neg.w	%d2
 772	add.w	#32,%d2
 773	| Again, check to see if we shifted off any significant bits.
 774	tst.l	(%a0)
 775	jeq	1f
 776	bset	#0,%d0			| Sticky bit.
 7771:	move.l	%d0,(%a0)
 778	lsl.l	%d2,%d1
 779	jeq	fp_nd_round
 780	bset	#0,%d0
 781	jra	fp_nd_round
 782	| Sorry, the number is just too small.
 7832:	clr.l	(%a0)+
 784	clr.l	(%a0)
 785	moveq	#1,%d0			| Smallest possible fraction,
 786	jra	fp_nd_round		| round as desired.
 787	| zero and denormalized
 788fp_nd_zero:
 789	tst.l	(%a0)+
 790	jne	1f
 791	tst.l	(%a0)
 792	jne	1f
 793	subq.l	#8,%a0
 794	printf	PNORM,"%p(",1,%a0
 795	printx	PNORM,%a0@
 796	printf	PNORM,")\n"
 797	rts				| zero.  nothing to do.
 798	| These are not merely subnormal numbers, but true denormals,
 799	| i.e. pathologically small (exponent is 2**-16383) numbers.
 800	| It is clearly impossible for even a normal extended number
 801	| with that exponent to fit into double precision, so just
 802	| write these ones off as "too darn small".
 8031:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
 804	clr.l	(%a0)
 805	clr.l	-(%a0)
 806	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
 807	addq.l	#6,%a0
 808	moveq	#1,%d0
 809	jra	fp_nd_round		| round.
 810	| Exponent overflow.  Just call it infinity.
 811fp_nd_large:
 812	move.w	#0x7ff,%d0
 813	and.w	(6,%a0),%d0
 814	jeq	1f
 815	fp_set_sr FPSR_EXC_INEX2
 8161:	fp_set_sr FPSR_EXC_OVFL
 817	move.w	(FPD_RND,FPDATA),%d2
 818	jne	3f			| %d2 = 0 round to nearest
 8191:	move.w	#0x7fff,(-2,%a0)
 820	clr.l	(%a0)+
 821	clr.l	(%a0)
 8222:	subq.l	#8,%a0
 823	printf	PNORM,"%p(",1,%a0
 824	printx	PNORM,%a0@
 825	printf	PNORM,")\n"
 826	rts
 8273:	subq.w	#2,%d2
 828	jcs	5f			| %d2 < 2, round to zero
 829	jhi	4f			| %d2 > 2, round to +infinity
 830	tst.b	(-3,%a0)		| to -inf
 831	jne	1b
 832	jra	5f
 8334:	tst.b	(-3,%a0)		| to +inf
 834	jeq	1b
 8355:	move.w	#0x43fe,(-2,%a0)
 836	moveq	#-1,%d0
 837	move.l	%d0,(%a0)+
 838	move.w	#0xf800,%d0
 839	move.l	%d0,(%a0)
 840	jra	2b
 841	| Infinities or NaNs
 842fp_nd_huge:
 843	subq.l	#4,%a0
 844	printf	PNORM,"%p(",1,%a0
 845	printx	PNORM,%a0@
 846	printf	PNORM,")\n"
 847	rts
 848
 849	| fp_normalize_single:
 850	| normalize an extended with single (23-bit) precision
 851	| args:	 %a0 (struct fp_ext *)
 852
 853fp_normalize_single:
 854	printf	PNORM,"ns: %p(",1,%a0
 855	printx	PNORM,%a0@
 856	printf	PNORM,") "
 857	addq.l	#2,%a0
 858	move.w	(%a0)+,%d2
 859	jeq	fp_ns_zero		| zero / denormalized
 860	cmp.w	#0x7fff,%d2
 861	jeq	fp_ns_huge		| NaN / infinitive.
 862	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
 863	jcs	fp_ns_small		| too small.
 864	cmp.w	#0xfe,%d2
 865	jcc	fp_ns_large		| too big.
 866	move.l	(%a0)+,%d0		| get high lword of mantissa
 867fp_ns_round:
 868	tst.l	(%a0)			| check the low lword
 869	jeq	1f
 870	| Set a sticky bit if it is non-zero.  This should only
 871	| affect the rounding in what would otherwise be equal-
 872	| distance situations, which is what we want it to do.
 873	bset	#0,%d0
 8741:	clr.l	(%a0)			| zap it from memory.
 875	| now, round off the low 8 bits of the hi lword.
 876	tst.b	%d0			| 8 low bits.
 877	jne	fp_ns_checkround	| Are they non-zero?
 878	| nothing to do here
 879	subq.l	#8,%a0
 880	printf	PNORM,"%p(",1,%a0
 881	printx	PNORM,%a0@
 882	printf	PNORM,")\n"
 883	rts
 884fp_ns_checkround:
 885	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
 886	clr.b	-(%a0)			| clear low byte of high lword
 887	subq.l	#3,%a0
 888	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
 889	jne	2f			| %d2 == 0, round to nearest
 890	tst.b	%d0			| test guard bit
 891	jpl	9f			| zero is closer
 892	btst	#8,%d0			| test lsb bit
 893	| round to even behaviour, see above.
 894	jne	fp_ns_doroundup		| round to infinity
 895	lsl.b	#1,%d0			| check low bits
 896	jeq	9f			| round to zero
 897fp_ns_doroundup:
 898	| round (the mantissa, that is) towards infinity
 899	add.l	#0x100,(%a0)
 900	jcc	9f			| no overflow, good.
 901	| Overflow.  This means that the %d1 was 0xffffff00, so it
 902	| is now zero.  We will set the mantissa to reflect this, and
 903	| increment the exponent (checking for overflow there too)
 904	move.w	#0x8000,(%a0)
 905	addq.w	#1,-(%a0)
 906	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
 907	jeq	fp_ns_large		| yes, so make it infinity.
 9089:	subq.l	#4,%a0
 909	printf	PNORM,"%p(",1,%a0
 910	printx	PNORM,%a0@
 911	printf	PNORM,")\n"
 912	rts
 913	| check nondefault rounding modes
 9142:	subq.w	#2,%d2
 915	jcs	9b			| %d2 < 2, round to zero
 916	jhi	3f			| %d2 > 2, round to +infinity
 917	tst.b	(-3,%a0)		| to -inf
 918	jne	fp_ns_doroundup		| negative, round to infinity
 919	jra	9b			| positive, round to zero
 9203:	tst.b	(-3,%a0)		| to +inf
 921	jeq	fp_ns_doroundup		| positive, round to infinity
 922	jra	9b			| negative, round to zero
 923	| Exponent underflow.  Try to make a denormal, and set it to
 924	| the smallest possible fraction if this fails.
 925fp_ns_small:
 926	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
 927	move.w	#0x3f81,(-2,%a0)	| 2**-126
 928	neg.w	%d2			| degree of underflow
 929	cmp.w	#32,%d2			| single or double shift?
 930	jcc	2f
 931	| a 32-bit shift.
 932	move.l	(%a0),%d0
 933	move.l	%d0,%d1
 934	lsr.l	%d2,%d0
 935	move.l	%d0,(%a0)+
 936	| Check to see if we shifted off any significant bits.
 937	neg.w	%d2
 938	add.w	#32,%d2
 939	lsl.l	%d2,%d1
 940	jeq	1f
 941	bset	#0,%d0			| Sticky bit.
 942	| Check the lower lword
 9431:	tst.l	(%a0)
 944	jeq	fp_ns_round
 945	clr	(%a0)
 946	bset	#0,%d0			| Sticky bit.
 947	jra	fp_ns_round
 948	| Sorry, the number is just too small.
 9492:	clr.l	(%a0)+
 950	clr.l	(%a0)
 951	moveq	#1,%d0			| Smallest possible fraction,
 952	jra	fp_ns_round		| round as desired.
 953	| Exponent overflow.  Just call it infinity.
 954fp_ns_large:
 955	tst.b	(3,%a0)
 956	jeq	1f
 957	fp_set_sr FPSR_EXC_INEX2
 9581:	fp_set_sr FPSR_EXC_OVFL
 959	move.w	(FPD_RND,FPDATA),%d2
 960	jne	3f			| %d2 = 0 round to nearest
 9611:	move.w	#0x7fff,(-2,%a0)
 962	clr.l	(%a0)+
 963	clr.l	(%a0)
 9642:	subq.l	#8,%a0
 965	printf	PNORM,"%p(",1,%a0
 966	printx	PNORM,%a0@
 967	printf	PNORM,")\n"
 968	rts
 9693:	subq.w	#2,%d2
 970	jcs	5f			| %d2 < 2, round to zero
 971	jhi	4f			| %d2 > 2, round to +infinity
 972	tst.b	(-3,%a0)		| to -inf
 973	jne	1b
 974	jra	5f
 9754:	tst.b	(-3,%a0)		| to +inf
 976	jeq	1b
 9775:	move.w	#0x407e,(-2,%a0)
 978	move.l	#0xffffff00,(%a0)+
 979	clr.l	(%a0)
 980	jra	2b
 981	| zero and denormalized
 982fp_ns_zero:
 983	tst.l	(%a0)+
 984	jne	1f
 985	tst.l	(%a0)
 986	jne	1f
 987	subq.l	#8,%a0
 988	printf	PNORM,"%p(",1,%a0
 989	printx	PNORM,%a0@
 990	printf	PNORM,")\n"
 991	rts				| zero.  nothing to do.
 992	| These are not merely subnormal numbers, but true denormals,
 993	| i.e. pathologically small (exponent is 2**-16383) numbers.
 994	| It is clearly impossible for even a normal extended number
 995	| with that exponent to fit into single precision, so just
 996	| write these ones off as "too darn small".
 9971:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
 998	clr.l	(%a0)
 999	clr.l	-(%a0)
1000	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
1001	addq.l	#6,%a0
1002	moveq	#1,%d0
1003	jra	fp_ns_round		| round.
1004	| Infinities or NaNs
1005fp_ns_huge:
1006	subq.l	#4,%a0
1007	printf	PNORM,"%p(",1,%a0
1008	printx	PNORM,%a0@
1009	printf	PNORM,")\n"
1010	rts
1011
1012	| fp_normalize_single_fast:
1013	| normalize an extended with single (23-bit) precision
1014	| this is only used by fsgldiv/fsgdlmul, where the
1015	| operand is not completly normalized.
1016	| args:	 %a0 (struct fp_ext *)
1017
1018fp_normalize_single_fast:
1019	printf	PNORM,"nsf: %p(",1,%a0
1020	printx	PNORM,%a0@
1021	printf	PNORM,") "
1022	addq.l	#2,%a0
1023	move.w	(%a0)+,%d2
1024	cmp.w	#0x7fff,%d2
1025	jeq	fp_nsf_huge		| NaN / infinitive.
1026	move.l	(%a0)+,%d0		| get high lword of mantissa
1027fp_nsf_round:
1028	tst.l	(%a0)			| check the low lword
1029	jeq	1f
1030	| Set a sticky bit if it is non-zero.  This should only
1031	| affect the rounding in what would otherwise be equal-
1032	| distance situations, which is what we want it to do.
1033	bset	#0,%d0
10341:	clr.l	(%a0)			| zap it from memory.
1035	| now, round off the low 8 bits of the hi lword.
1036	tst.b	%d0			| 8 low bits.
1037	jne	fp_nsf_checkround	| Are they non-zero?
1038	| nothing to do here
1039	subq.l	#8,%a0
1040	printf	PNORM,"%p(",1,%a0
1041	printx	PNORM,%a0@
1042	printf	PNORM,")\n"
1043	rts
1044fp_nsf_checkround:
1045	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1046	clr.b	-(%a0)			| clear low byte of high lword
1047	subq.l	#3,%a0
1048	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1049	jne	2f			| %d2 == 0, round to nearest
1050	tst.b	%d0			| test guard bit
1051	jpl	9f			| zero is closer
1052	btst	#8,%d0			| test lsb bit
1053	| round to even behaviour, see above.
1054	jne	fp_nsf_doroundup		| round to infinity
1055	lsl.b	#1,%d0			| check low bits
1056	jeq	9f			| round to zero
1057fp_nsf_doroundup:
1058	| round (the mantissa, that is) towards infinity
1059	add.l	#0x100,(%a0)
1060	jcc	9f			| no overflow, good.
1061	| Overflow.  This means that the %d1 was 0xffffff00, so it
1062	| is now zero.  We will set the mantissa to reflect this, and
1063	| increment the exponent (checking for overflow there too)
1064	move.w	#0x8000,(%a0)
1065	addq.w	#1,-(%a0)
1066	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
1067	jeq	fp_nsf_large		| yes, so make it infinity.
10689:	subq.l	#4,%a0
1069	printf	PNORM,"%p(",1,%a0
1070	printx	PNORM,%a0@
1071	printf	PNORM,")\n"
1072	rts
1073	| check nondefault rounding modes
10742:	subq.w	#2,%d2
1075	jcs	9b			| %d2 < 2, round to zero
1076	jhi	3f			| %d2 > 2, round to +infinity
1077	tst.b	(-3,%a0)		| to -inf
1078	jne	fp_nsf_doroundup	| negative, round to infinity
1079	jra	9b			| positive, round to zero
10803:	tst.b	(-3,%a0)		| to +inf
1081	jeq	fp_nsf_doroundup		| positive, round to infinity
1082	jra	9b			| negative, round to zero
1083	| Exponent overflow.  Just call it infinity.
1084fp_nsf_large:
1085	tst.b	(3,%a0)
1086	jeq	1f
1087	fp_set_sr FPSR_EXC_INEX2
10881:	fp_set_sr FPSR_EXC_OVFL
1089	move.w	(FPD_RND,FPDATA),%d2
1090	jne	3f			| %d2 = 0 round to nearest
10911:	move.w	#0x7fff,(-2,%a0)
1092	clr.l	(%a0)+
1093	clr.l	(%a0)
10942:	subq.l	#8,%a0
1095	printf	PNORM,"%p(",1,%a0
1096	printx	PNORM,%a0@
1097	printf	PNORM,")\n"
1098	rts
10993:	subq.w	#2,%d2
1100	jcs	5f			| %d2 < 2, round to zero
1101	jhi	4f			| %d2 > 2, round to +infinity
1102	tst.b	(-3,%a0)		| to -inf
1103	jne	1b
1104	jra	5f
11054:	tst.b	(-3,%a0)		| to +inf
1106	jeq	1b
11075:	move.w	#0x407e,(-2,%a0)
1108	move.l	#0xffffff00,(%a0)+
1109	clr.l	(%a0)
1110	jra	2b
1111	| Infinities or NaNs
1112fp_nsf_huge:
1113	subq.l	#4,%a0
1114	printf	PNORM,"%p(",1,%a0
1115	printx	PNORM,%a0@
1116	printf	PNORM,")\n"
1117	rts
1118
1119	| conv_ext2int (macro):
1120	| Generates a subroutine that converts an extended value to an
1121	| integer of a given size, again, with the appropriate type of
1122	| rounding.
1123
1124	| Macro arguments:
1125	| s:	size, as given in an assembly instruction.
1126	| b:	number of bits in that size.
1127
1128	| Subroutine arguments:
1129	| %a0:	source (struct fp_ext *)
1130
1131	| Returns the integer in %d0 (like it should)
1132
1133.macro conv_ext2int s,b
1134	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
1135	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
1136	printx	PCONV,%a0@
1137	printf	PCONV,") "
1138	addq.l	#2,%a0
1139	move.w	(%a0)+,%d2		| exponent
1140	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
1141	cmp.w	#0x7fff,%d2
1142	jeq	fp_e2i_huge\b		| Inf / NaN
1143	sub.w	#0x3ffe,%d2
1144	jcs	fp_e2i_small\b
1145	cmp.w	#\b,%d2
1146	jhi	fp_e2i_large\b
1147	move.l	(%a0),%d0
1148	move.l	%d0,%d1
1149	lsl.l	%d2,%d1
1150	jne	fp_e2i_round\b
1151	tst.l	(4,%a0)
1152	jne	fp_e2i_round\b
1153	neg.w	%d2
1154	add.w	#32,%d2
1155	lsr.l	%d2,%d0
11569:	tst.w	(-4,%a0)
1157	jne	1f
1158	tst.\s	%d0
1159	jmi	fp_e2i_large\b
1160	printf	PCONV,"-> %p\n",1,%d0
1161	rts
11621:	neg.\s	%d0
1163	jeq	1f
1164	jpl	fp_e2i_large\b
11651:	printf	PCONV,"-> %p\n",1,%d0
1166	rts
1167fp_e2i_round\b:
1168	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1169	neg.w	%d2
1170	add.w	#32,%d2
1171	.if	\b>16
1172	jeq	5f
1173	.endif
1174	lsr.l	%d2,%d0
1175	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1176	jne	2f			| %d2 == 0, round to nearest
1177	tst.l	%d1			| test guard bit
1178	jpl	9b			| zero is closer
1179	btst	%d2,%d0			| test lsb bit (%d2 still 0)
1180	jne	fp_e2i_doroundup\b
1181	lsl.l	#1,%d1			| check low bits
1182	jne	fp_e2i_doroundup\b
1183	tst.l	(4,%a0)
1184	jeq	9b
1185fp_e2i_doroundup\b:
1186	addq.l	#1,%d0
1187	jra	9b
1188	| check nondefault rounding modes
11892:	subq.w	#2,%d2
1190	jcs	9b			| %d2 < 2, round to zero
1191	jhi	3f			| %d2 > 2, round to +infinity
1192	tst.w	(-4,%a0)		| to -inf
1193	jne	fp_e2i_doroundup\b	| negative, round to infinity
1194	jra	9b			| positive, round to zero
11953:	tst.w	(-4,%a0)		| to +inf
1196	jeq	fp_e2i_doroundup\b	| positive, round to infinity
1197	jra	9b	| negative, round to zero
1198	| we are only want -2**127 get correctly rounded here,
1199	| since the guard bit is in the lower lword.
1200	| everything else ends up anyway as overflow.
1201	.if	\b>16
12025:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1203	jne	2b			| %d2 == 0, round to nearest
1204	move.l	(4,%a0),%d1		| test guard bit
1205	jpl	9b			| zero is closer
1206	lsl.l	#1,%d1			| check low bits
1207	jne	fp_e2i_doroundup\b
1208	jra	9b
1209	.endif
1210fp_e2i_zero\b:
1211	clr.l	%d0
1212	tst.l	(%a0)+
1213	jne	1f
1214	tst.l	(%a0)
1215	jeq	3f
12161:	subq.l	#4,%a0
1217	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
1218fp_e2i_small\b:
1219	fp_set_sr FPSR_EXC_INEX2
1220	clr.l	%d0
1221	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1222	subq.w	#2,%d2
1223	jcs	3f			| %d2 < 2, round to nearest/zero
1224	jhi	2f			| %d2 > 2, round to +infinity
1225	tst.w	(-4,%a0)		| to -inf
1226	jeq	3f
1227	subq.\s	#1,%d0
1228	jra	3f
12292:	tst.w	(-4,%a0)		| to +inf
1230	jne	3f
1231	addq.\s	#1,%d0
12323:	printf	PCONV,"-> %p\n",1,%d0
1233	rts
1234fp_e2i_large\b:
1235	fp_set_sr FPSR_EXC_OPERR
1236	move.\s	#inf,%d0
1237	tst.w	(-4,%a0)
1238	jeq	1f
1239	addq.\s	#1,%d0
12401:	printf	PCONV,"-> %p\n",1,%d0
1241	rts
1242fp_e2i_huge\b:
1243	move.\s	(%a0),%d0
1244	tst.l	(%a0)
1245	jne	1f
1246	tst.l	(%a0)
1247	jeq	fp_e2i_large\b
1248	| fp_normalize_ext has set this bit already
1249	| and made the number nonsignaling
12501:	fp_tst_sr FPSR_EXC_SNAN
1251	jne	1f
1252	fp_set_sr FPSR_EXC_OPERR
12531:	printf	PCONV,"-> %p\n",1,%d0
1254	rts
1255.endm
1256
1257fp_conv_ext2long:
1258	conv_ext2int l,32
1259
1260fp_conv_ext2short:
1261	conv_ext2int w,16
1262
1263fp_conv_ext2byte:
1264	conv_ext2int b,8
1265
1266fp_conv_ext2double:
1267	jsr	fp_normalize_double
1268	printf	PCONV,"e2d: %p(",1,%a0
1269	printx	PCONV,%a0@
1270	printf	PCONV,"), "
1271	move.l	(%a0)+,%d2
1272	cmp.w	#0x7fff,%d2
1273	jne	1f
1274	move.w	#0x7ff,%d2
1275	move.l	(%a0)+,%d0
1276	jra	2f
12771:	sub.w	#0x3fff-0x3ff,%d2
1278	move.l	(%a0)+,%d0
1279	jmi	2f
1280	clr.w	%d2
12812:	lsl.w	#5,%d2
1282	lsl.l	#7,%d2
1283	lsl.l	#8,%d2
1284	move.l	%d0,%d1
1285	lsl.l	#1,%d0
1286	lsr.l	#4,%d0
1287	lsr.l	#8,%d0
1288	or.l	%d2,%d0
1289	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
1290	moveq	#21,%d0
1291	lsl.l	%d0,%d1
1292	move.l	(%a0),%d0
1293	lsr.l	#4,%d0
1294	lsr.l	#7,%d0
1295	or.l	%d1,%d0
1296	putuser.l %d0,(%a1),fp_err_ua2,%a1
1297#ifdef FPU_EMU_DEBUG
1298	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
1299	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
1300	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
1301#endif
1302	rts
1303
1304fp_conv_ext2single:
1305	jsr	fp_normalize_single
1306	printf	PCONV,"e2s: %p(",1,%a0
1307	printx	PCONV,%a0@
1308	printf	PCONV,"), "
1309	move.l	(%a0)+,%d1
1310	cmp.w	#0x7fff,%d1
1311	jne	1f
1312	move.w	#0xff,%d1
1313	move.l	(%a0)+,%d0
1314	jra	2f
13151:	sub.w	#0x3fff-0x7f,%d1
1316	move.l	(%a0)+,%d0
1317	jmi	2f
1318	clr.w	%d1
13192:	lsl.w	#8,%d1
1320	lsl.l	#7,%d1
1321	lsl.l	#8,%d1
1322	bclr	#31,%d0
1323	lsr.l	#8,%d0
1324	or.l	%d1,%d0
1325	printf	PCONV,"%08x\n",1,%d0
1326	rts
1327
1328	| special return addresses for instr that
1329	| encode the rounding precision in the opcode
1330	| (e.g. fsmove,fdmove)
1331
1332fp_finalrounding_single:
1333	addq.l	#8,%sp
1334	jsr	fp_normalize_ext
1335	jsr	fp_normalize_single
1336	jra	fp_finaltest
1337
1338fp_finalrounding_single_fast:
1339	addq.l	#8,%sp
1340	jsr	fp_normalize_ext
1341	jsr	fp_normalize_single_fast
1342	jra	fp_finaltest
1343
1344fp_finalrounding_double:
1345	addq.l	#8,%sp
1346	jsr	fp_normalize_ext
1347	jsr	fp_normalize_double
1348	jra	fp_finaltest
1349
1350	| fp_finaltest:
1351	| set the emulated status register based on the outcome of an
1352	| emulated instruction.
1353
1354fp_finalrounding:
1355	addq.l	#8,%sp
1356|	printf	,"f: %p\n",1,%a0
1357	jsr	fp_normalize_ext
1358	move.w	(FPD_PREC,FPDATA),%d0
1359	subq.w	#1,%d0
1360	jcs	fp_finaltest
1361	jne	1f
1362	jsr	fp_normalize_single
1363	jra	2f
13641:	jsr	fp_normalize_double
13652:|	printf	,"f: %p\n",1,%a0
1366fp_finaltest:
1367	| First, we do some of the obvious tests for the exception
1368	| status byte and condition code bytes of fp_sr here, so that
1369	| they do not have to be handled individually by every
1370	| emulated instruction.
1371	clr.l	%d0
1372	addq.l	#1,%a0
1373	tst.b	(%a0)+			| sign
1374	jeq	1f
1375	bset	#FPSR_CC_NEG-24,%d0	| N bit
13761:	cmp.w	#0x7fff,(%a0)+		| exponent
1377	jeq	2f
1378	| test for zero
1379	moveq	#FPSR_CC_Z-24,%d1
1380	tst.l	(%a0)+
1381	jne	9f
1382	tst.l	(%a0)
1383	jne	9f
1384	jra	8f
1385	| infinitiv and NAN
13862:	moveq	#FPSR_CC_NAN-24,%d1
1387	move.l	(%a0)+,%d2
1388	lsl.l	#1,%d2			| ignore high bit
1389	jne	8f
1390	tst.l	(%a0)
1391	jne	8f
1392	moveq	#FPSR_CC_INF-24,%d1
13938:	bset	%d1,%d0
13949:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
1395	| move instructions enter here
1396	| Here, we test things in the exception status byte, and set
1397	| other things in the accrued exception byte accordingly.
1398	| Emulated instructions can set various things in the former,
1399	| as defined in fp_emu.h.
1400fp_final:
1401	move.l	(FPD_FPSR,FPDATA),%d0
1402#if 0
1403	btst	#FPSR_EXC_SNAN,%d0	| EXC_SNAN
1404	jne	1f
1405	btst	#FPSR_EXC_OPERR,%d0	| EXC_OPERR
1406	jeq	2f
14071:	bset	#FPSR_AEXC_IOP,%d0	| set IOP bit
14082:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1409	jeq	1f
1410	bset	#FPSR_AEXC_OVFL,%d0	| set OVFL bit
14111:	btst	#FPSR_EXC_UNFL,%d0	| EXC_UNFL
1412	jeq	1f
1413	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1414	jeq	1f
1415	bset	#FPSR_AEXC_UNFL,%d0	| set UNFL bit
14161:	btst	#FPSR_EXC_DZ,%d0	| EXC_INEX1
1417	jeq	1f
1418	bset	#FPSR_AEXC_DZ,%d0	| set DZ bit
14191:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1420	jne	1f
1421	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1422	jne	1f
1423	btst	#FPSR_EXC_INEX1,%d0	| EXC_INEX1
1424	jeq	2f
14251:	bset	#FPSR_AEXC_INEX,%d0	| set INEX bit
14262:	move.l	%d0,(FPD_FPSR,FPDATA)
1427#else
1428	| same as above, greatly optimized, but untested (yet)
1429	move.l	%d0,%d2
1430	lsr.l	#5,%d0
1431	move.l	%d0,%d1
1432	lsr.l	#4,%d1
1433	or.l	%d0,%d1
1434	and.b	#0x08,%d1
1435	move.l	%d2,%d0
1436	lsr.l	#6,%d0
1437	or.l	%d1,%d0
1438	move.l	%d2,%d1
1439	lsr.l	#4,%d1
1440	or.b	#0xdf,%d1
1441	and.b	%d1,%d0
1442	move.l	%d2,%d1
1443	lsr.l	#7,%d1
1444	and.b	#0x80,%d1
1445	or.b	%d1,%d0
1446	and.b	#0xf8,%d0
1447	or.b	%d0,%d2
1448	move.l	%d2,(FPD_FPSR,FPDATA)
1449#endif
1450	move.b	(FPD_FPSR+2,FPDATA),%d0
1451	and.b	(FPD_FPCR+2,FPDATA),%d0
1452	jeq	1f
1453	printf	,"send signal!!!\n"
14541:	jra	fp_end

   1/*
   2 * fp_util.S
   3 *
   4 * Copyright Roman Zippel, 1997.  All rights reserved.
   5 *
   6 * Redistribution and use in source and binary forms, with or without
   7 * modification, are permitted provided that the following conditions
   8 * are met:
   9 * 1. Redistributions of source code must retain the above copyright
  10 *    notice, and the entire permission notice in its entirety,
  11 *    including the disclaimer of warranties.
  12 * 2. Redistributions in binary form must reproduce the above copyright
  13 *    notice, this list of conditions and the following disclaimer in the
  14 *    documentation and/or other materials provided with the distribution.
  15 * 3. The name of the author may not be used to endorse or promote
  16 *    products derived from this software without specific prior
  17 *    written permission.
  18 *
  19 * ALTERNATIVELY, this product may be distributed under the terms of
  20 * the GNU General Public License, in which case the provisions of the GPL are
  21 * required INSTEAD OF the above restrictions.  (This clause is
  22 * necessary due to a potential bad interaction between the GPL and
  23 * the restrictions contained in a BSD-style copyright.)
  24 *
  25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  28 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  35 * OF THE POSSIBILITY OF SUCH DAMAGE.
  36 */
  37
  38#include "fp_emu.h"
  39
  40/*
  41 * Here are lots of conversion and normalization functions mainly
  42 * used by fp_scan.S
  43 * Note that these functions are optimized for "normal" numbers,
  44 * these are handled first and exit as fast as possible, this is
  45 * especially important for fp_normalize_ext/fp_conv_ext2ext, as
  46 * it's called very often.
  47 * The register usage is optimized for fp_scan.S and which register
  48 * is currently at that time unused, be careful if you want change
  49 * something here. %d0 and %d1 is always usable, sometimes %d2 (or
  50 * only the lower half) most function have to return the %a0
  51 * unmodified, so that the caller can immediately reuse it.
  52 */
  53
  54	.globl	fp_ill, fp_end
  55
  56	| exits from fp_scan:
  57	| illegal instruction
  58fp_ill:
  59	printf	,"fp_illegal\n"
  60	rts
  61	| completed instruction
  62fp_end:
  63	tst.l	(TASK_MM-8,%a2)
  64	jmi	1f
  65	tst.l	(TASK_MM-4,%a2)
  66	jmi	1f
  67	tst.l	(TASK_MM,%a2)
  68	jpl	2f
  691:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
  702:	clr.l	%d0
  71	rts
  72
  73	.globl	fp_conv_long2ext, fp_conv_single2ext
  74	.globl	fp_conv_double2ext, fp_conv_ext2ext
  75	.globl	fp_normalize_ext, fp_normalize_double
  76	.globl	fp_normalize_single, fp_normalize_single_fast
  77	.globl	fp_conv_ext2double, fp_conv_ext2single
  78	.globl	fp_conv_ext2long, fp_conv_ext2short
  79	.globl	fp_conv_ext2byte
  80	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
  81	.globl	fp_finalrounding_double
  82	.globl	fp_finalrounding, fp_finaltest, fp_final
  83
  84/*
  85 * First several conversion functions from a source operand
  86 * into the extended format. Note, that only fp_conv_ext2ext
  87 * normalizes the number and is always called after the other
  88 * conversion functions, which only move the information into
  89 * fp_ext structure.
  90 */
  91
  92	| fp_conv_long2ext:
  93	|
  94	| args:	%d0 = source (32-bit long)
  95	|	%a0 = destination (ptr to struct fp_ext)
  96
  97fp_conv_long2ext:
  98	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
  99	clr.l	%d1			| sign defaults to zero
 100	tst.l	%d0
 101	jeq	fp_l2e_zero		| is source zero?
 102	jpl	1f			| positive?
 103	moveq	#1,%d1
 104	neg.l	%d0
 1051:	swap	%d1
 106	move.w	#0x3fff+31,%d1
 107	move.l	%d1,(%a0)+		| set sign / exp
 108	move.l	%d0,(%a0)+		| set mantissa
 109	clr.l	(%a0)
 110	subq.l	#8,%a0			| restore %a0
 111	printx	PCONV,%a0@
 112	printf	PCONV,")\n"
 113	rts
 114	| source is zero
 115fp_l2e_zero:
 116	clr.l	(%a0)+
 117	clr.l	(%a0)+
 118	clr.l	(%a0)
 119	subq.l	#8,%a0
 120	printx	PCONV,%a0@
 121	printf	PCONV,")\n"
 122	rts
 123
 124	| fp_conv_single2ext
 125	| args:	%d0 = source (single-precision fp value)
 126	|	%a0 = dest (struct fp_ext *)
 127
 128fp_conv_single2ext:
 129	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
 130	move.l	%d0,%d1
 131	lsl.l	#8,%d0			| shift mantissa
 132	lsr.l	#8,%d1			| exponent / sign
 133	lsr.l	#7,%d1
 134	lsr.w	#8,%d1
 135	jeq	fp_s2e_small		| zero / denormal?
 136	cmp.w	#0xff,%d1		| NaN / Inf?
 137	jeq	fp_s2e_large
 138	bset	#31,%d0			| set explizit bit
 139	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
 1409:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
 141	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
 142	clr.l	(%a0)			| low lword = 0
 143	subq.l	#8,%a0
 144	printx	PCONV,%a0@
 145	printf	PCONV,")\n"
 146	rts
 147	| zeros and denormalized
 148fp_s2e_small:
 149	| exponent is zero, so explizit bit is already zero too
 150	tst.l	%d0
 151	jeq	9b
 152	move.w	#0x4000-0x7f,%d1
 153	jra	9b
 154	| infinities and NAN
 155fp_s2e_large:
 156	bclr	#31,%d0			| clear explizit bit
 157	move.w	#0x7fff,%d1
 158	jra	9b
 159
 160fp_conv_double2ext:
 161#ifdef FPU_EMU_DEBUG
 162	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
 163	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
 164	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
 165#endif
 166	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
 167	move.l	%d0,%d1
 168	lsl.l	#8,%d0			| shift high mantissa
 169	lsl.l	#3,%d0
 170	lsr.l	#8,%d1			| exponent / sign
 171	lsr.l	#7,%d1
 172	lsr.w	#5,%d1
 173	jeq	fp_d2e_small		| zero / denormal?
 174	cmp.w	#0x7ff,%d1		| NaN / Inf?
 175	jeq	fp_d2e_large
 176	bset	#31,%d0			| set explizit bit
 177	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
 1789:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
 179	move.l	%d0,(%a0)+
 180	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
 181	move.l	%d0,%d1
 182	lsl.l	#8,%d0
 183	lsl.l	#3,%d0
 184	move.l	%d0,(%a0)
 185	moveq	#21,%d0
 186	lsr.l	%d0,%d1
 187	or.l	%d1,-(%a0)
 188	subq.l	#4,%a0
 189	printx	PCONV,%a0@
 190	printf	PCONV,")\n"
 191	rts
 192	| zeros and denormalized
 193fp_d2e_small:
 194	| exponent is zero, so explizit bit is already zero too
 195	tst.l	%d0
 196	jeq	9b
 197	move.w	#0x4000-0x3ff,%d1
 198	jra	9b
 199	| infinities and NAN
 200fp_d2e_large:
 201	bclr	#31,%d0			| clear explizit bit
 202	move.w	#0x7fff,%d1
 203	jra	9b
 204
 205	| fp_conv_ext2ext:
 206	| originally used to get longdouble from userspace, now it's
 207	| called before arithmetic operations to make sure the number
 208	| is normalized [maybe rename it?].
 209	| args:	%a0 = dest (struct fp_ext *)
 210	| returns 0 in %d0 for a NaN, otherwise 1
 211
 212fp_conv_ext2ext:
 213	printf	PCONV,"e2e: %p(",1,%a0
 214	printx	PCONV,%a0@
 215	printf	PCONV,"), "
 216	move.l	(%a0)+,%d0
 217	cmp.w	#0x7fff,%d0		| Inf / NaN?
 218	jeq	fp_e2e_large
 219	move.l	(%a0),%d0
 220	jpl	fp_e2e_small		| zero / denorm?
 221	| The high bit is set, so normalization is irrelevant.
 222fp_e2e_checkround:
 223	subq.l	#4,%a0
 224#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 225	move.b	(%a0),%d0
 226	jne	fp_e2e_round
 227#endif
 228	printf	PCONV,"%p(",1,%a0
 229	printx	PCONV,%a0@
 230	printf	PCONV,")\n"
 231	moveq	#1,%d0
 232	rts
 233#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 234fp_e2e_round:
 235	fp_set_sr FPSR_EXC_INEX2
 236	clr.b	(%a0)
 237	move.w	(FPD_RND,FPDATA),%d2
 238	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
 239	tst.b	%d0			| test guard bit
 240	jpl	9f			| zero is closer
 241	btst	#0,(11,%a0)		| test lsb bit
 242	jne	fp_e2e_doroundup	| round to infinity
 243	lsl.b	#1,%d0			| check low bits
 244	jeq	9f			| round to zero
 245fp_e2e_doroundup:
 246	addq.l	#1,(8,%a0)
 247	jcc	9f
 248	addq.l	#1,(4,%a0)
 249	jcc	9f
 250	move.w	#0x8000,(4,%a0)
 251	addq.w	#1,(2,%a0)
 2529:	printf	PNORM,"%p(",1,%a0
 253	printx	PNORM,%a0@
 254	printf	PNORM,")\n"
 255	rts
 256fp_e2e_roundother:
 257	subq.w	#2,%d2
 258	jcs	9b			| %d2 < 2, round to zero
 259	jhi	1f			| %d2 > 2, round to +infinity
 260	tst.b	(1,%a0)			| to -inf
 261	jne	fp_e2e_doroundup	| negative, round to infinity
 262	jra	9b			| positive, round to zero
 2631:	tst.b	(1,%a0)			| to +inf
 264	jeq	fp_e2e_doroundup	| positive, round to infinity
 265	jra	9b			| negative, round to zero
 266#endif
 267	| zeros and subnormals:
 268	| try to normalize these anyway.
 269fp_e2e_small:
 270	jne	fp_e2e_small1		| high lword zero?
 271	move.l	(4,%a0),%d0
 272	jne	fp_e2e_small2
 273#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 274	clr.l	%d0
 275	move.b	(-4,%a0),%d0
 276	jne	fp_e2e_small3
 277#endif
 278	| Genuine zero.
 279	clr.w	-(%a0)
 280	subq.l	#2,%a0
 281	printf	PNORM,"%p(",1,%a0
 282	printx	PNORM,%a0@
 283	printf	PNORM,")\n"
 284	moveq	#1,%d0
 285	rts
 286	| definitely subnormal, need to shift all 64 bits
 287fp_e2e_small1:
 288	bfffo	%d0{#0,#32},%d1
 289	move.w	-(%a0),%d2
 290	sub.w	%d1,%d2
 291	jcc	1f
 292	| Pathologically small, denormalize.
 293	add.w	%d2,%d1
 294	clr.w	%d2
 2951:	move.w	%d2,(%a0)+
 296	move.w	%d1,%d2
 297	jeq	fp_e2e_checkround
 298	| fancy 64-bit double-shift begins here
 299	lsl.l	%d2,%d0
 300	move.l	%d0,(%a0)+
 301	move.l	(%a0),%d0
 302	move.l	%d0,%d1
 303	lsl.l	%d2,%d0
 304	move.l	%d0,(%a0)
 305	neg.w	%d2
 306	and.w	#0x1f,%d2
 307	lsr.l	%d2,%d1
 308	or.l	%d1,-(%a0)
 309#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 310fp_e2e_extra1:
 311	clr.l	%d0
 312	move.b	(-4,%a0),%d0
 313	neg.w	%d2
 314	add.w	#24,%d2
 315	jcc	1f
 316	clr.b	(-4,%a0)
 317	lsl.l	%d2,%d0
 318	or.l	%d0,(4,%a0)
 319	jra	fp_e2e_checkround
 3201:	addq.w	#8,%d2
 321	lsl.l	%d2,%d0
 322	move.b	%d0,(-4,%a0)
 323	lsr.l	#8,%d0
 324	or.l	%d0,(4,%a0)
 325#endif
 326	jra	fp_e2e_checkround
 327	| pathologically small subnormal
 328fp_e2e_small2:
 329	bfffo	%d0{#0,#32},%d1
 330	add.w	#32,%d1
 331	move.w	-(%a0),%d2
 332	sub.w	%d1,%d2
 333	jcc	1f
 334	| Beyond pathologically small, denormalize.
 335	add.w	%d2,%d1
 336	clr.w	%d2
 3371:	move.w	%d2,(%a0)+
 338	ext.l	%d1
 339	jeq	fp_e2e_checkround
 340	clr.l	(4,%a0)
 341	sub.w	#32,%d2
 342	jcs	1f
 343	lsl.l	%d1,%d0			| lower lword needs only to be shifted
 344	move.l	%d0,(%a0)		| into the higher lword
 345#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 346	clr.l	%d0
 347	move.b	(-4,%a0),%d0
 348	clr.b	(-4,%a0)
 349	neg.w	%d1
 350	add.w	#32,%d1
 351	bfins	%d0,(%a0){%d1,#8}
 352#endif
 353	jra	fp_e2e_checkround
 3541:	neg.w	%d1			| lower lword is splitted between
 355	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
 356#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
 357	jra	fp_e2e_checkround
 358#else
 359	move.w	%d1,%d2
 360	jra	fp_e2e_extra1
 361	| These are extremely small numbers, that will mostly end up as zero
 362	| anyway, so this is only important for correct rounding.
 363fp_e2e_small3:
 364	bfffo	%d0{#24,#8},%d1
 365	add.w	#40,%d1
 366	move.w	-(%a0),%d2
 367	sub.w	%d1,%d2
 368	jcc	1f
 369	| Pathologically small, denormalize.
 370	add.w	%d2,%d1
 371	clr.w	%d2
 3721:	move.w	%d2,(%a0)+
 373	ext.l	%d1
 374	jeq	fp_e2e_checkround
 375	cmp.w	#8,%d1
 376	jcs	2f
 3771:	clr.b	(-4,%a0)
 378	sub.w	#64,%d1
 379	jcs	1f
 380	add.w	#24,%d1
 381	lsl.l	%d1,%d0
 382	move.l	%d0,(%a0)
 383	jra	fp_e2e_checkround
 3841:	neg.w	%d1
 385	bfins	%d0,(%a0){%d1,#8}
 386	jra	fp_e2e_checkround
 3872:	lsl.l	%d1,%d0
 388	move.b	%d0,(-4,%a0)
 389	lsr.l	#8,%d0
 390	move.b	%d0,(7,%a0)
 391	jra	fp_e2e_checkround
 392#endif
 3931:	move.l	%d0,%d1			| lower lword is splitted between
 394	lsl.l	%d2,%d0			| higher and lower lword
 395	move.l	%d0,(%a0)
 396	move.l	%d1,%d0
 397	neg.w	%d2
 398	add.w	#32,%d2
 399	lsr.l	%d2,%d0
 400	move.l	%d0,-(%a0)
 401	jra	fp_e2e_checkround
 402	| Infinities and NaNs
 403fp_e2e_large:
 404	move.l	(%a0)+,%d0
 405	jne	3f
 4061:	tst.l	(%a0)
 407	jne	4f
 408	moveq	#1,%d0
 4092:	subq.l	#8,%a0
 410	printf	PCONV,"%p(",1,%a0
 411	printx	PCONV,%a0@
 412	printf	PCONV,")\n"
 413	rts
 414	| we have maybe a NaN, shift off the highest bit
 4153:	lsl.l	#1,%d0
 416	jeq	1b
 417	| we have a NaN, clear the return value
 4184:	clrl	%d0
 419	jra	2b
 420
 421
 422/*
 423 * Normalization functions.  Call these on the output of general
 424 * FP operators, and before any conversion into the destination
 425 * formats. fp_normalize_ext has always to be called first, the
 426 * following conversion functions expect an already normalized
 427 * number.
 428 */
 429
 430	| fp_normalize_ext:
 431	| normalize an extended in extended (unpacked) format, basically
 432	| it does the same as fp_conv_ext2ext, additionally it also does
 433	| the necessary postprocessing checks.
 434	| args:	%a0 (struct fp_ext *)
 435	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
 436
 437fp_normalize_ext:
 438	printf	PNORM,"ne: %p(",1,%a0
 439	printx	PNORM,%a0@
 440	printf	PNORM,"), "
 441	move.l	(%a0)+,%d0
 442	cmp.w	#0x7fff,%d0		| Inf / NaN?
 443	jeq	fp_ne_large
 444	move.l	(%a0),%d0
 445	jpl	fp_ne_small		| zero / denorm?
 446	| The high bit is set, so normalization is irrelevant.
 447fp_ne_checkround:
 448	subq.l	#4,%a0
 449#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 450	move.b	(%a0),%d0
 451	jne	fp_ne_round
 452#endif
 453	printf	PNORM,"%p(",1,%a0
 454	printx	PNORM,%a0@
 455	printf	PNORM,")\n"
 456	rts
 457#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 458fp_ne_round:
 459	fp_set_sr FPSR_EXC_INEX2
 460	clr.b	(%a0)
 461	move.w	(FPD_RND,FPDATA),%d2
 462	jne	fp_ne_roundother	| %d2 == 0, round to nearest
 463	tst.b	%d0			| test guard bit
 464	jpl	9f			| zero is closer
 465	btst	#0,(11,%a0)		| test lsb bit
 466	jne	fp_ne_doroundup		| round to infinity
 467	lsl.b	#1,%d0			| check low bits
 468	jeq	9f			| round to zero
 469fp_ne_doroundup:
 470	addq.l	#1,(8,%a0)
 471	jcc	9f
 472	addq.l	#1,(4,%a0)
 473	jcc	9f
 474	addq.w	#1,(2,%a0)
 475	move.w	#0x8000,(4,%a0)
 4769:	printf	PNORM,"%p(",1,%a0
 477	printx	PNORM,%a0@
 478	printf	PNORM,")\n"
 479	rts
 480fp_ne_roundother:
 481	subq.w	#2,%d2
 482	jcs	9b			| %d2 < 2, round to zero
 483	jhi	1f			| %d2 > 2, round to +infinity
 484	tst.b	(1,%a0)			| to -inf
 485	jne	fp_ne_doroundup		| negative, round to infinity
 486	jra	9b			| positive, round to zero
 4871:	tst.b	(1,%a0)			| to +inf
 488	jeq	fp_ne_doroundup		| positive, round to infinity
 489	jra	9b			| negative, round to zero
 490#endif
 491	| Zeros and subnormal numbers
 492	| These are probably merely subnormal, rather than "denormalized"
 493	|  numbers, so we will try to make them normal again.
 494fp_ne_small:
 495	jne	fp_ne_small1		| high lword zero?
 496	move.l	(4,%a0),%d0
 497	jne	fp_ne_small2
 498#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 499	clr.l	%d0
 500	move.b	(-4,%a0),%d0
 501	jne	fp_ne_small3
 502#endif
 503	| Genuine zero.
 504	clr.w	-(%a0)
 505	subq.l	#2,%a0
 506	printf	PNORM,"%p(",1,%a0
 507	printx	PNORM,%a0@
 508	printf	PNORM,")\n"
 509	rts
 510	| Subnormal.
 511fp_ne_small1:
 512	bfffo	%d0{#0,#32},%d1
 513	move.w	-(%a0),%d2
 514	sub.w	%d1,%d2
 515	jcc	1f
 516	| Pathologically small, denormalize.
 517	add.w	%d2,%d1
 518	clr.w	%d2
 519	fp_set_sr FPSR_EXC_UNFL
 5201:	move.w	%d2,(%a0)+
 521	move.w	%d1,%d2
 522	jeq	fp_ne_checkround
 523	| This is exactly the same 64-bit double shift as seen above.
 524	lsl.l	%d2,%d0
 525	move.l	%d0,(%a0)+
 526	move.l	(%a0),%d0
 527	move.l	%d0,%d1
 528	lsl.l	%d2,%d0
 529	move.l	%d0,(%a0)
 530	neg.w	%d2
 531	and.w	#0x1f,%d2
 532	lsr.l	%d2,%d1
 533	or.l	%d1,-(%a0)
 534#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 535fp_ne_extra1:
 536	clr.l	%d0
 537	move.b	(-4,%a0),%d0
 538	neg.w	%d2
 539	add.w	#24,%d2
 540	jcc	1f
 541	clr.b	(-4,%a0)
 542	lsl.l	%d2,%d0
 543	or.l	%d0,(4,%a0)
 544	jra	fp_ne_checkround
 5451:	addq.w	#8,%d2
 546	lsl.l	%d2,%d0
 547	move.b	%d0,(-4,%a0)
 548	lsr.l	#8,%d0
 549	or.l	%d0,(4,%a0)
 550#endif
 551	jra	fp_ne_checkround
 552	| May or may not be subnormal, if so, only 32 bits to shift.
 553fp_ne_small2:
 554	bfffo	%d0{#0,#32},%d1
 555	add.w	#32,%d1
 556	move.w	-(%a0),%d2
 557	sub.w	%d1,%d2
 558	jcc	1f
 559	| Beyond pathologically small, denormalize.
 560	add.w	%d2,%d1
 561	clr.w	%d2
 562	fp_set_sr FPSR_EXC_UNFL
 5631:	move.w	%d2,(%a0)+
 564	ext.l	%d1
 565	jeq	fp_ne_checkround
 566	clr.l	(4,%a0)
 567	sub.w	#32,%d1
 568	jcs	1f
 569	lsl.l	%d1,%d0			| lower lword needs only to be shifted
 570	move.l	%d0,(%a0)		| into the higher lword
 571#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 572	clr.l	%d0
 573	move.b	(-4,%a0),%d0
 574	clr.b	(-4,%a0)
 575	neg.w	%d1
 576	add.w	#32,%d1
 577	bfins	%d0,(%a0){%d1,#8}
 578#endif
 579	jra	fp_ne_checkround
 5801:	neg.w	%d1			| lower lword is splitted between
 581	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
 582#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
 583	jra	fp_ne_checkround
 584#else
 585	move.w	%d1,%d2
 586	jra	fp_ne_extra1
 587	| These are extremely small numbers, that will mostly end up as zero
 588	| anyway, so this is only important for correct rounding.
 589fp_ne_small3:
 590	bfffo	%d0{#24,#8},%d1
 591	add.w	#40,%d1
 592	move.w	-(%a0),%d2
 593	sub.w	%d1,%d2
 594	jcc	1f
 595	| Pathologically small, denormalize.
 596	add.w	%d2,%d1
 597	clr.w	%d2
 5981:	move.w	%d2,(%a0)+
 599	ext.l	%d1
 600	jeq	fp_ne_checkround
 601	cmp.w	#8,%d1
 602	jcs	2f
 6031:	clr.b	(-4,%a0)
 604	sub.w	#64,%d1
 605	jcs	1f
 606	add.w	#24,%d1
 607	lsl.l	%d1,%d0
 608	move.l	%d0,(%a0)
 609	jra	fp_ne_checkround
 6101:	neg.w	%d1
 611	bfins	%d0,(%a0){%d1,#8}
 612	jra	fp_ne_checkround
 6132:	lsl.l	%d1,%d0
 614	move.b	%d0,(-4,%a0)
 615	lsr.l	#8,%d0
 616	move.b	%d0,(7,%a0)
 617	jra	fp_ne_checkround
 618#endif
 619	| Infinities and NaNs, again, same as above.
 620fp_ne_large:
 621	move.l	(%a0)+,%d0
 622	jne	3f
 6231:	tst.l	(%a0)
 624	jne	4f
 6252:	subq.l	#8,%a0
 626	printf	PNORM,"%p(",1,%a0
 627	printx	PNORM,%a0@
 628	printf	PNORM,")\n"
 629	rts
 630	| we have maybe a NaN, shift off the highest bit
 6313:	move.l	%d0,%d1
 632	lsl.l	#1,%d1
 633	jne	4f
 634	clr.l	(-4,%a0)
 635	jra	1b
 636	| we have a NaN, test if it is signaling
 6374:	bset	#30,%d0
 638	jne	2b
 639	fp_set_sr FPSR_EXC_SNAN
 640	move.l	%d0,(-4,%a0)
 641	jra	2b
 642
 643	| these next two do rounding as per the IEEE standard.
 644	| values for the rounding modes appear to be:
 645	| 0:	Round to nearest
 646	| 1:	Round to zero
 647	| 2:	Round to -Infinity
 648	| 3:	Round to +Infinity
 649	| both functions expect that fp_normalize was already
 650	| called (and extended argument is already normalized
 651	| as far as possible), these are used if there is different
 652	| rounding precision is selected and before converting
 653	| into single/double
 654
 655	| fp_normalize_double:
 656	| normalize an extended with double (52-bit) precision
 657	| args:	 %a0 (struct fp_ext *)
 658
 659fp_normalize_double:
 660	printf	PNORM,"nd: %p(",1,%a0
 661	printx	PNORM,%a0@
 662	printf	PNORM,"), "
 663	move.l	(%a0)+,%d2
 664	tst.w	%d2
 665	jeq	fp_nd_zero		| zero / denormalized
 666	cmp.w	#0x7fff,%d2
 667	jeq	fp_nd_huge		| NaN / infinitive.
 668	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
 669	jcs	fp_nd_small		| too small.
 670	cmp.w	#0x7fe,%d2
 671	jcc	fp_nd_large		| too big.
 672	addq.l	#4,%a0
 673	move.l	(%a0),%d0		| low lword of mantissa
 674	| now, round off the low 11 bits.
 675fp_nd_round:
 676	moveq	#21,%d1
 677	lsl.l	%d1,%d0			| keep 11 low bits.
 678	jne	fp_nd_checkround	| Are they non-zero?
 679	| nothing to do here
 6809:	subq.l	#8,%a0
 681	printf	PNORM,"%p(",1,%a0
 682	printx	PNORM,%a0@
 683	printf	PNORM,")\n"
 684	rts
 685	| Be careful with the X bit! It contains the lsb
 686	| from the shift above, it is needed for round to nearest.
 687fp_nd_checkround:
 688	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
 689	and.w	#0xf800,(2,%a0)		| clear bits 0-10
 690	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
 691	jne	2f			| %d2 == 0, round to nearest
 692	tst.l	%d0			| test guard bit
 693	jpl	9b			| zero is closer
 694	| here we test the X bit by adding it to %d2
 695	clr.w	%d2			| first set z bit, addx only clears it
 696	addx.w	%d2,%d2			| test lsb bit
 697	| IEEE754-specified "round to even" behaviour.  If the guard
 698	| bit is set, then the number is odd, so rounding works like
 699	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
 700	| Otherwise, an equal distance rounds towards zero, so as not
 701	| to produce an odd number.  This is strange, but it is what
 702	| the standard says.
 703	jne	fp_nd_doroundup		| round to infinity
 704	lsl.l	#1,%d0			| check low bits
 705	jeq	9b			| round to zero
 706fp_nd_doroundup:
 707	| round (the mantissa, that is) towards infinity
 708	add.l	#0x800,(%a0)
 709	jcc	9b			| no overflow, good.
 710	addq.l	#1,-(%a0)		| extend to high lword
 711	jcc	1f			| no overflow, good.
 712	| Yow! we have managed to overflow the mantissa.  Since this
 713	| only happens when %d1 was 0xfffff800, it is now zero, so
 714	| reset the high bit, and increment the exponent.
 715	move.w	#0x8000,(%a0)
 716	addq.w	#1,-(%a0)
 717	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
 718	jeq	fp_nd_large		| yes, so make it infinity.
 7191:	subq.l	#4,%a0
 720	printf	PNORM,"%p(",1,%a0
 721	printx	PNORM,%a0@
 722	printf	PNORM,")\n"
 723	rts
 7242:	subq.w	#2,%d2
 725	jcs	9b			| %d2 < 2, round to zero
 726	jhi	3f			| %d2 > 2, round to +infinity
 727	| Round to +Inf or -Inf.  High word of %d2 contains the
 728	| sign of the number, by the way.
 729	swap	%d2			| to -inf
 730	tst.b	%d2
 731	jne	fp_nd_doroundup		| negative, round to infinity
 732	jra	9b			| positive, round to zero
 7333:	swap	%d2			| to +inf
 734	tst.b	%d2
 735	jeq	fp_nd_doroundup		| positive, round to infinity
 736	jra	9b			| negative, round to zero
 737	| Exponent underflow.  Try to make a denormal, and set it to
 738	| the smallest possible fraction if this fails.
 739fp_nd_small:
 740	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
 741	move.w	#0x3c01,(-2,%a0)	| 2**-1022
 742	neg.w	%d2			| degree of underflow
 743	cmp.w	#32,%d2			| single or double shift?
 744	jcc	1f
 745	| Again, another 64-bit double shift.
 746	move.l	(%a0),%d0
 747	move.l	%d0,%d1
 748	lsr.l	%d2,%d0
 749	move.l	%d0,(%a0)+
 750	move.l	(%a0),%d0
 751	lsr.l	%d2,%d0
 752	neg.w	%d2
 753	add.w	#32,%d2
 754	lsl.l	%d2,%d1
 755	or.l	%d1,%d0
 756	move.l	(%a0),%d1
 757	move.l	%d0,(%a0)
 758	| Check to see if we shifted off any significant bits
 759	lsl.l	%d2,%d1
 760	jeq	fp_nd_round		| Nope, round.
 761	bset	#0,%d0			| Yes, so set the "sticky bit".
 762	jra	fp_nd_round		| Now, round.
 763	| Another 64-bit single shift and store
 7641:	sub.w	#32,%d2
 765	cmp.w	#32,%d2			| Do we really need to shift?
 766	jcc	2f			| No, the number is too small.
 767	move.l	(%a0),%d0
 768	clr.l	(%a0)+
 769	move.l	%d0,%d1
 770	lsr.l	%d2,%d0
 771	neg.w	%d2
 772	add.w	#32,%d2
 773	| Again, check to see if we shifted off any significant bits.
 774	tst.l	(%a0)
 775	jeq	1f
 776	bset	#0,%d0			| Sticky bit.
 7771:	move.l	%d0,(%a0)
 778	lsl.l	%d2,%d1
 779	jeq	fp_nd_round
 780	bset	#0,%d0
 781	jra	fp_nd_round
 782	| Sorry, the number is just too small.
 7832:	clr.l	(%a0)+
 784	clr.l	(%a0)
 785	moveq	#1,%d0			| Smallest possible fraction,
 786	jra	fp_nd_round		| round as desired.
 787	| zero and denormalized
 788fp_nd_zero:
 789	tst.l	(%a0)+
 790	jne	1f
 791	tst.l	(%a0)
 792	jne	1f
 793	subq.l	#8,%a0
 794	printf	PNORM,"%p(",1,%a0
 795	printx	PNORM,%a0@
 796	printf	PNORM,")\n"
 797	rts				| zero.  nothing to do.
 798	| These are not merely subnormal numbers, but true denormals,
 799	| i.e. pathologically small (exponent is 2**-16383) numbers.
 800	| It is clearly impossible for even a normal extended number
 801	| with that exponent to fit into double precision, so just
 802	| write these ones off as "too darn small".
 8031:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
 804	clr.l	(%a0)
 805	clr.l	-(%a0)
 806	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
 807	addq.l	#6,%a0
 808	moveq	#1,%d0
 809	jra	fp_nd_round		| round.
 810	| Exponent overflow.  Just call it infinity.
 811fp_nd_large:
 812	move.w	#0x7ff,%d0
 813	and.w	(6,%a0),%d0
 814	jeq	1f
 815	fp_set_sr FPSR_EXC_INEX2
 8161:	fp_set_sr FPSR_EXC_OVFL
 817	move.w	(FPD_RND,FPDATA),%d2
 818	jne	3f			| %d2 = 0 round to nearest
 8191:	move.w	#0x7fff,(-2,%a0)
 820	clr.l	(%a0)+
 821	clr.l	(%a0)
 8222:	subq.l	#8,%a0
 823	printf	PNORM,"%p(",1,%a0
 824	printx	PNORM,%a0@
 825	printf	PNORM,")\n"
 826	rts
 8273:	subq.w	#2,%d2
 828	jcs	5f			| %d2 < 2, round to zero
 829	jhi	4f			| %d2 > 2, round to +infinity
 830	tst.b	(-3,%a0)		| to -inf
 831	jne	1b
 832	jra	5f
 8334:	tst.b	(-3,%a0)		| to +inf
 834	jeq	1b
 8355:	move.w	#0x43fe,(-2,%a0)
 836	moveq	#-1,%d0
 837	move.l	%d0,(%a0)+
 838	move.w	#0xf800,%d0
 839	move.l	%d0,(%a0)
 840	jra	2b
 841	| Infinities or NaNs
 842fp_nd_huge:
 843	subq.l	#4,%a0
 844	printf	PNORM,"%p(",1,%a0
 845	printx	PNORM,%a0@
 846	printf	PNORM,")\n"
 847	rts
 848
 849	| fp_normalize_single:
 850	| normalize an extended with single (23-bit) precision
 851	| args:	 %a0 (struct fp_ext *)
 852
 853fp_normalize_single:
 854	printf	PNORM,"ns: %p(",1,%a0
 855	printx	PNORM,%a0@
 856	printf	PNORM,") "
 857	addq.l	#2,%a0
 858	move.w	(%a0)+,%d2
 859	jeq	fp_ns_zero		| zero / denormalized
 860	cmp.w	#0x7fff,%d2
 861	jeq	fp_ns_huge		| NaN / infinitive.
 862	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
 863	jcs	fp_ns_small		| too small.
 864	cmp.w	#0xfe,%d2
 865	jcc	fp_ns_large		| too big.
 866	move.l	(%a0)+,%d0		| get high lword of mantissa
 867fp_ns_round:
 868	tst.l	(%a0)			| check the low lword
 869	jeq	1f
 870	| Set a sticky bit if it is non-zero.  This should only
 871	| affect the rounding in what would otherwise be equal-
 872	| distance situations, which is what we want it to do.
 873	bset	#0,%d0
 8741:	clr.l	(%a0)			| zap it from memory.
 875	| now, round off the low 8 bits of the hi lword.
 876	tst.b	%d0			| 8 low bits.
 877	jne	fp_ns_checkround	| Are they non-zero?
 878	| nothing to do here
 879	subq.l	#8,%a0
 880	printf	PNORM,"%p(",1,%a0
 881	printx	PNORM,%a0@
 882	printf	PNORM,")\n"
 883	rts
 884fp_ns_checkround:
 885	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
 886	clr.b	-(%a0)			| clear low byte of high lword
 887	subq.l	#3,%a0
 888	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
 889	jne	2f			| %d2 == 0, round to nearest
 890	tst.b	%d0			| test guard bit
 891	jpl	9f			| zero is closer
 892	btst	#8,%d0			| test lsb bit
 893	| round to even behaviour, see above.
 894	jne	fp_ns_doroundup		| round to infinity
 895	lsl.b	#1,%d0			| check low bits
 896	jeq	9f			| round to zero
 897fp_ns_doroundup:
 898	| round (the mantissa, that is) towards infinity
 899	add.l	#0x100,(%a0)
 900	jcc	9f			| no overflow, good.
 901	| Overflow.  This means that the %d1 was 0xffffff00, so it
 902	| is now zero.  We will set the mantissa to reflect this, and
 903	| increment the exponent (checking for overflow there too)
 904	move.w	#0x8000,(%a0)
 905	addq.w	#1,-(%a0)
 906	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
 907	jeq	fp_ns_large		| yes, so make it infinity.
 9089:	subq.l	#4,%a0
 909	printf	PNORM,"%p(",1,%a0
 910	printx	PNORM,%a0@
 911	printf	PNORM,")\n"
 912	rts
 913	| check nondefault rounding modes
 9142:	subq.w	#2,%d2
 915	jcs	9b			| %d2 < 2, round to zero
 916	jhi	3f			| %d2 > 2, round to +infinity
 917	tst.b	(-3,%a0)		| to -inf
 918	jne	fp_ns_doroundup		| negative, round to infinity
 919	jra	9b			| positive, round to zero
 9203:	tst.b	(-3,%a0)		| to +inf
 921	jeq	fp_ns_doroundup		| positive, round to infinity
 922	jra	9b			| negative, round to zero
 923	| Exponent underflow.  Try to make a denormal, and set it to
 924	| the smallest possible fraction if this fails.
 925fp_ns_small:
 926	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
 927	move.w	#0x3f81,(-2,%a0)	| 2**-126
 928	neg.w	%d2			| degree of underflow
 929	cmp.w	#32,%d2			| single or double shift?
 930	jcc	2f
 931	| a 32-bit shift.
 932	move.l	(%a0),%d0
 933	move.l	%d0,%d1
 934	lsr.l	%d2,%d0
 935	move.l	%d0,(%a0)+
 936	| Check to see if we shifted off any significant bits.
 937	neg.w	%d2
 938	add.w	#32,%d2
 939	lsl.l	%d2,%d1
 940	jeq	1f
 941	bset	#0,%d0			| Sticky bit.
 942	| Check the lower lword
 9431:	tst.l	(%a0)
 944	jeq	fp_ns_round
 945	clr	(%a0)
 946	bset	#0,%d0			| Sticky bit.
 947	jra	fp_ns_round
 948	| Sorry, the number is just too small.
 9492:	clr.l	(%a0)+
 950	clr.l	(%a0)
 951	moveq	#1,%d0			| Smallest possible fraction,
 952	jra	fp_ns_round		| round as desired.
 953	| Exponent overflow.  Just call it infinity.
 954fp_ns_large:
 955	tst.b	(3,%a0)
 956	jeq	1f
 957	fp_set_sr FPSR_EXC_INEX2
 9581:	fp_set_sr FPSR_EXC_OVFL
 959	move.w	(FPD_RND,FPDATA),%d2
 960	jne	3f			| %d2 = 0 round to nearest
 9611:	move.w	#0x7fff,(-2,%a0)
 962	clr.l	(%a0)+
 963	clr.l	(%a0)
 9642:	subq.l	#8,%a0
 965	printf	PNORM,"%p(",1,%a0
 966	printx	PNORM,%a0@
 967	printf	PNORM,")\n"
 968	rts
 9693:	subq.w	#2,%d2
 970	jcs	5f			| %d2 < 2, round to zero
 971	jhi	4f			| %d2 > 2, round to +infinity
 972	tst.b	(-3,%a0)		| to -inf
 973	jne	1b
 974	jra	5f
 9754:	tst.b	(-3,%a0)		| to +inf
 976	jeq	1b
 9775:	move.w	#0x407e,(-2,%a0)
 978	move.l	#0xffffff00,(%a0)+
 979	clr.l	(%a0)
 980	jra	2b
 981	| zero and denormalized
 982fp_ns_zero:
 983	tst.l	(%a0)+
 984	jne	1f
 985	tst.l	(%a0)
 986	jne	1f
 987	subq.l	#8,%a0
 988	printf	PNORM,"%p(",1,%a0
 989	printx	PNORM,%a0@
 990	printf	PNORM,")\n"
 991	rts				| zero.  nothing to do.
 992	| These are not merely subnormal numbers, but true denormals,
 993	| i.e. pathologically small (exponent is 2**-16383) numbers.
 994	| It is clearly impossible for even a normal extended number
 995	| with that exponent to fit into single precision, so just
 996	| write these ones off as "too darn small".
 9971:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
 998	clr.l	(%a0)
 999	clr.l	-(%a0)
1000	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
1001	addq.l	#6,%a0
1002	moveq	#1,%d0
1003	jra	fp_ns_round		| round.
1004	| Infinities or NaNs
1005fp_ns_huge:
1006	subq.l	#4,%a0
1007	printf	PNORM,"%p(",1,%a0
1008	printx	PNORM,%a0@
1009	printf	PNORM,")\n"
1010	rts
1011
1012	| fp_normalize_single_fast:
1013	| normalize an extended with single (23-bit) precision
1014	| this is only used by fsgldiv/fsgdlmul, where the
1015	| operand is not completly normalized.
1016	| args:	 %a0 (struct fp_ext *)
1017
1018fp_normalize_single_fast:
1019	printf	PNORM,"nsf: %p(",1,%a0
1020	printx	PNORM,%a0@
1021	printf	PNORM,") "
1022	addq.l	#2,%a0
1023	move.w	(%a0)+,%d2
1024	cmp.w	#0x7fff,%d2
1025	jeq	fp_nsf_huge		| NaN / infinitive.
1026	move.l	(%a0)+,%d0		| get high lword of mantissa
1027fp_nsf_round:
1028	tst.l	(%a0)			| check the low lword
1029	jeq	1f
1030	| Set a sticky bit if it is non-zero.  This should only
1031	| affect the rounding in what would otherwise be equal-
1032	| distance situations, which is what we want it to do.
1033	bset	#0,%d0
10341:	clr.l	(%a0)			| zap it from memory.
1035	| now, round off the low 8 bits of the hi lword.
1036	tst.b	%d0			| 8 low bits.
1037	jne	fp_nsf_checkround	| Are they non-zero?
1038	| nothing to do here
1039	subq.l	#8,%a0
1040	printf	PNORM,"%p(",1,%a0
1041	printx	PNORM,%a0@
1042	printf	PNORM,")\n"
1043	rts
1044fp_nsf_checkround:
1045	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1046	clr.b	-(%a0)			| clear low byte of high lword
1047	subq.l	#3,%a0
1048	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1049	jne	2f			| %d2 == 0, round to nearest
1050	tst.b	%d0			| test guard bit
1051	jpl	9f			| zero is closer
1052	btst	#8,%d0			| test lsb bit
1053	| round to even behaviour, see above.
1054	jne	fp_nsf_doroundup		| round to infinity
1055	lsl.b	#1,%d0			| check low bits
1056	jeq	9f			| round to zero
1057fp_nsf_doroundup:
1058	| round (the mantissa, that is) towards infinity
1059	add.l	#0x100,(%a0)
1060	jcc	9f			| no overflow, good.
1061	| Overflow.  This means that the %d1 was 0xffffff00, so it
1062	| is now zero.  We will set the mantissa to reflect this, and
1063	| increment the exponent (checking for overflow there too)
1064	move.w	#0x8000,(%a0)
1065	addq.w	#1,-(%a0)
1066	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
1067	jeq	fp_nsf_large		| yes, so make it infinity.
10689:	subq.l	#4,%a0
1069	printf	PNORM,"%p(",1,%a0
1070	printx	PNORM,%a0@
1071	printf	PNORM,")\n"
1072	rts
1073	| check nondefault rounding modes
10742:	subq.w	#2,%d2
1075	jcs	9b			| %d2 < 2, round to zero
1076	jhi	3f			| %d2 > 2, round to +infinity
1077	tst.b	(-3,%a0)		| to -inf
1078	jne	fp_nsf_doroundup	| negative, round to infinity
1079	jra	9b			| positive, round to zero
10803:	tst.b	(-3,%a0)		| to +inf
1081	jeq	fp_nsf_doroundup		| positive, round to infinity
1082	jra	9b			| negative, round to zero
1083	| Exponent overflow.  Just call it infinity.
1084fp_nsf_large:
1085	tst.b	(3,%a0)
1086	jeq	1f
1087	fp_set_sr FPSR_EXC_INEX2
10881:	fp_set_sr FPSR_EXC_OVFL
1089	move.w	(FPD_RND,FPDATA),%d2
1090	jne	3f			| %d2 = 0 round to nearest
10911:	move.w	#0x7fff,(-2,%a0)
1092	clr.l	(%a0)+
1093	clr.l	(%a0)
10942:	subq.l	#8,%a0
1095	printf	PNORM,"%p(",1,%a0
1096	printx	PNORM,%a0@
1097	printf	PNORM,")\n"
1098	rts
10993:	subq.w	#2,%d2
1100	jcs	5f			| %d2 < 2, round to zero
1101	jhi	4f			| %d2 > 2, round to +infinity
1102	tst.b	(-3,%a0)		| to -inf
1103	jne	1b
1104	jra	5f
11054:	tst.b	(-3,%a0)		| to +inf
1106	jeq	1b
11075:	move.w	#0x407e,(-2,%a0)
1108	move.l	#0xffffff00,(%a0)+
1109	clr.l	(%a0)
1110	jra	2b
1111	| Infinities or NaNs
1112fp_nsf_huge:
1113	subq.l	#4,%a0
1114	printf	PNORM,"%p(",1,%a0
1115	printx	PNORM,%a0@
1116	printf	PNORM,")\n"
1117	rts
1118
1119	| conv_ext2int (macro):
1120	| Generates a subroutine that converts an extended value to an
1121	| integer of a given size, again, with the appropriate type of
1122	| rounding.
1123
1124	| Macro arguments:
1125	| s:	size, as given in an assembly instruction.
1126	| b:	number of bits in that size.
1127
1128	| Subroutine arguments:
1129	| %a0:	source (struct fp_ext *)
1130
1131	| Returns the integer in %d0 (like it should)
1132
1133.macro conv_ext2int s,b
1134	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
1135	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
1136	printx	PCONV,%a0@
1137	printf	PCONV,") "
1138	addq.l	#2,%a0
1139	move.w	(%a0)+,%d2		| exponent
1140	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
1141	cmp.w	#0x7fff,%d2
1142	jeq	fp_e2i_huge\b		| Inf / NaN
1143	sub.w	#0x3ffe,%d2
1144	jcs	fp_e2i_small\b
1145	cmp.w	#\b,%d2
1146	jhi	fp_e2i_large\b
1147	move.l	(%a0),%d0
1148	move.l	%d0,%d1
1149	lsl.l	%d2,%d1
1150	jne	fp_e2i_round\b
1151	tst.l	(4,%a0)
1152	jne	fp_e2i_round\b
1153	neg.w	%d2
1154	add.w	#32,%d2
1155	lsr.l	%d2,%d0
11569:	tst.w	(-4,%a0)
1157	jne	1f
1158	tst.\s	%d0
1159	jmi	fp_e2i_large\b
1160	printf	PCONV,"-> %p\n",1,%d0
1161	rts
11621:	neg.\s	%d0
1163	jeq	1f
1164	jpl	fp_e2i_large\b
11651:	printf	PCONV,"-> %p\n",1,%d0
1166	rts
1167fp_e2i_round\b:
1168	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
1169	neg.w	%d2
1170	add.w	#32,%d2
1171	.if	\b>16
1172	jeq	5f
1173	.endif
1174	lsr.l	%d2,%d0
1175	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1176	jne	2f			| %d2 == 0, round to nearest
1177	tst.l	%d1			| test guard bit
1178	jpl	9b			| zero is closer
1179	btst	%d2,%d0			| test lsb bit (%d2 still 0)
1180	jne	fp_e2i_doroundup\b
1181	lsl.l	#1,%d1			| check low bits
1182	jne	fp_e2i_doroundup\b
1183	tst.l	(4,%a0)
1184	jeq	9b
1185fp_e2i_doroundup\b:
1186	addq.l	#1,%d0
1187	jra	9b
1188	| check nondefault rounding modes
11892:	subq.w	#2,%d2
1190	jcs	9b			| %d2 < 2, round to zero
1191	jhi	3f			| %d2 > 2, round to +infinity
1192	tst.w	(-4,%a0)		| to -inf
1193	jne	fp_e2i_doroundup\b	| negative, round to infinity
1194	jra	9b			| positive, round to zero
11953:	tst.w	(-4,%a0)		| to +inf
1196	jeq	fp_e2i_doroundup\b	| positive, round to infinity
1197	jra	9b	| negative, round to zero
1198	| we are only want -2**127 get correctly rounded here,
1199	| since the guard bit is in the lower lword.
1200	| everything else ends up anyway as overflow.
1201	.if	\b>16
12025:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1203	jne	2b			| %d2 == 0, round to nearest
1204	move.l	(4,%a0),%d1		| test guard bit
1205	jpl	9b			| zero is closer
1206	lsl.l	#1,%d1			| check low bits
1207	jne	fp_e2i_doroundup\b
1208	jra	9b
1209	.endif
1210fp_e2i_zero\b:
1211	clr.l	%d0
1212	tst.l	(%a0)+
1213	jne	1f
1214	tst.l	(%a0)
1215	jeq	3f
12161:	subq.l	#4,%a0
1217	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
1218fp_e2i_small\b:
1219	fp_set_sr FPSR_EXC_INEX2
1220	clr.l	%d0
1221	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
1222	subq.w	#2,%d2
1223	jcs	3f			| %d2 < 2, round to nearest/zero
1224	jhi	2f			| %d2 > 2, round to +infinity
1225	tst.w	(-4,%a0)		| to -inf
1226	jeq	3f
1227	subq.\s	#1,%d0
1228	jra	3f
12292:	tst.w	(-4,%a0)		| to +inf
1230	jne	3f
1231	addq.\s	#1,%d0
12323:	printf	PCONV,"-> %p\n",1,%d0
1233	rts
1234fp_e2i_large\b:
1235	fp_set_sr FPSR_EXC_OPERR
1236	move.\s	#inf,%d0
1237	tst.w	(-4,%a0)
1238	jeq	1f
1239	addq.\s	#1,%d0
12401:	printf	PCONV,"-> %p\n",1,%d0
1241	rts
1242fp_e2i_huge\b:
1243	move.\s	(%a0),%d0
1244	tst.l	(%a0)
1245	jne	1f
1246	tst.l	(%a0)
1247	jeq	fp_e2i_large\b
1248	| fp_normalize_ext has set this bit already
1249	| and made the number nonsignaling
12501:	fp_tst_sr FPSR_EXC_SNAN
1251	jne	1f
1252	fp_set_sr FPSR_EXC_OPERR
12531:	printf	PCONV,"-> %p\n",1,%d0
1254	rts
1255.endm
1256
1257fp_conv_ext2long:
1258	conv_ext2int l,32
1259
1260fp_conv_ext2short:
1261	conv_ext2int w,16
1262
1263fp_conv_ext2byte:
1264	conv_ext2int b,8
1265
1266fp_conv_ext2double:
1267	jsr	fp_normalize_double
1268	printf	PCONV,"e2d: %p(",1,%a0
1269	printx	PCONV,%a0@
1270	printf	PCONV,"), "
1271	move.l	(%a0)+,%d2
1272	cmp.w	#0x7fff,%d2
1273	jne	1f
1274	move.w	#0x7ff,%d2
1275	move.l	(%a0)+,%d0
1276	jra	2f
12771:	sub.w	#0x3fff-0x3ff,%d2
1278	move.l	(%a0)+,%d0
1279	jmi	2f
1280	clr.w	%d2
12812:	lsl.w	#5,%d2
1282	lsl.l	#7,%d2
1283	lsl.l	#8,%d2
1284	move.l	%d0,%d1
1285	lsl.l	#1,%d0
1286	lsr.l	#4,%d0
1287	lsr.l	#8,%d0
1288	or.l	%d2,%d0
1289	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
1290	moveq	#21,%d0
1291	lsl.l	%d0,%d1
1292	move.l	(%a0),%d0
1293	lsr.l	#4,%d0
1294	lsr.l	#7,%d0
1295	or.l	%d1,%d0
1296	putuser.l %d0,(%a1),fp_err_ua2,%a1
1297#ifdef FPU_EMU_DEBUG
1298	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
1299	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
1300	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
1301#endif
1302	rts
1303
1304fp_conv_ext2single:
1305	jsr	fp_normalize_single
1306	printf	PCONV,"e2s: %p(",1,%a0
1307	printx	PCONV,%a0@
1308	printf	PCONV,"), "
1309	move.l	(%a0)+,%d1
1310	cmp.w	#0x7fff,%d1
1311	jne	1f
1312	move.w	#0xff,%d1
1313	move.l	(%a0)+,%d0
1314	jra	2f
13151:	sub.w	#0x3fff-0x7f,%d1
1316	move.l	(%a0)+,%d0
1317	jmi	2f
1318	clr.w	%d1
13192:	lsl.w	#8,%d1
1320	lsl.l	#7,%d1
1321	lsl.l	#8,%d1
1322	bclr	#31,%d0
1323	lsr.l	#8,%d0
1324	or.l	%d1,%d0
1325	printf	PCONV,"%08x\n",1,%d0
1326	rts
1327
1328	| special return addresses for instr that
1329	| encode the rounding precision in the opcode
1330	| (e.g. fsmove,fdmove)
1331
1332fp_finalrounding_single:
1333	addq.l	#8,%sp
1334	jsr	fp_normalize_ext
1335	jsr	fp_normalize_single
1336	jra	fp_finaltest
1337
1338fp_finalrounding_single_fast:
1339	addq.l	#8,%sp
1340	jsr	fp_normalize_ext
1341	jsr	fp_normalize_single_fast
1342	jra	fp_finaltest
1343
1344fp_finalrounding_double:
1345	addq.l	#8,%sp
1346	jsr	fp_normalize_ext
1347	jsr	fp_normalize_double
1348	jra	fp_finaltest
1349
1350	| fp_finaltest:
1351	| set the emulated status register based on the outcome of an
1352	| emulated instruction.
1353
1354fp_finalrounding:
1355	addq.l	#8,%sp
1356|	printf	,"f: %p\n",1,%a0
1357	jsr	fp_normalize_ext
1358	move.w	(FPD_PREC,FPDATA),%d0
1359	subq.w	#1,%d0
1360	jcs	fp_finaltest
1361	jne	1f
1362	jsr	fp_normalize_single
1363	jra	2f
13641:	jsr	fp_normalize_double
13652:|	printf	,"f: %p\n",1,%a0
1366fp_finaltest:
1367	| First, we do some of the obvious tests for the exception
1368	| status byte and condition code bytes of fp_sr here, so that
1369	| they do not have to be handled individually by every
1370	| emulated instruction.
1371	clr.l	%d0
1372	addq.l	#1,%a0
1373	tst.b	(%a0)+			| sign
1374	jeq	1f
1375	bset	#FPSR_CC_NEG-24,%d0	| N bit
13761:	cmp.w	#0x7fff,(%a0)+		| exponent
1377	jeq	2f
1378	| test for zero
1379	moveq	#FPSR_CC_Z-24,%d1
1380	tst.l	(%a0)+
1381	jne	9f
1382	tst.l	(%a0)
1383	jne	9f
1384	jra	8f
1385	| infinitiv and NAN
13862:	moveq	#FPSR_CC_NAN-24,%d1
1387	move.l	(%a0)+,%d2
1388	lsl.l	#1,%d2			| ignore high bit
1389	jne	8f
1390	tst.l	(%a0)
1391	jne	8f
1392	moveq	#FPSR_CC_INF-24,%d1
13938:	bset	%d1,%d0
13949:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
1395	| move instructions enter here
1396	| Here, we test things in the exception status byte, and set
1397	| other things in the accrued exception byte accordingly.
1398	| Emulated instructions can set various things in the former,
1399	| as defined in fp_emu.h.
1400fp_final:
1401	move.l	(FPD_FPSR,FPDATA),%d0
1402#if 0
1403	btst	#FPSR_EXC_SNAN,%d0	| EXC_SNAN
1404	jne	1f
1405	btst	#FPSR_EXC_OPERR,%d0	| EXC_OPERR
1406	jeq	2f
14071:	bset	#FPSR_AEXC_IOP,%d0	| set IOP bit
14082:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1409	jeq	1f
1410	bset	#FPSR_AEXC_OVFL,%d0	| set OVFL bit
14111:	btst	#FPSR_EXC_UNFL,%d0	| EXC_UNFL
1412	jeq	1f
1413	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1414	jeq	1f
1415	bset	#FPSR_AEXC_UNFL,%d0	| set UNFL bit
14161:	btst	#FPSR_EXC_DZ,%d0	| EXC_INEX1
1417	jeq	1f
1418	bset	#FPSR_AEXC_DZ,%d0	| set DZ bit
14191:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
1420	jne	1f
1421	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
1422	jne	1f
1423	btst	#FPSR_EXC_INEX1,%d0	| EXC_INEX1
1424	jeq	2f
14251:	bset	#FPSR_AEXC_INEX,%d0	| set INEX bit
14262:	move.l	%d0,(FPD_FPSR,FPDATA)
1427#else
1428	| same as above, greatly optimized, but untested (yet)
1429	move.l	%d0,%d2
1430	lsr.l	#5,%d0
1431	move.l	%d0,%d1
1432	lsr.l	#4,%d1
1433	or.l	%d0,%d1
1434	and.b	#0x08,%d1
1435	move.l	%d2,%d0
1436	lsr.l	#6,%d0
1437	or.l	%d1,%d0
1438	move.l	%d2,%d1
1439	lsr.l	#4,%d1
1440	or.b	#0xdf,%d1
1441	and.b	%d1,%d0
1442	move.l	%d2,%d1
1443	lsr.l	#7,%d1
1444	and.b	#0x80,%d1
1445	or.b	%d1,%d0
1446	and.b	#0xf8,%d0
1447	or.b	%d0,%d2
1448	move.l	%d2,(FPD_FPSR,FPDATA)
1449#endif
1450	move.b	(FPD_FPSR+2,FPDATA),%d0
1451	and.b	(FPD_FPCR+2,FPDATA),%d0
1452	jeq	1f
1453	printf	,"send signal!!!\n"
14541:	jra	fp_end