vfpdouble.c - arch/arm/vfp/vfpdouble.c - Linux diff v6.9.4

   1/*
   2 *  linux/arch/arm/vfp/vfpdouble.c
   3 *
   4 * This code is derived in part from John R. Housers softfloat library, which
   5 * carries the following notice:
   6 *
   7 * ===========================================================================
   8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9 * Arithmetic Package, Release 2.
  10 *
  11 * Written by John R. Hauser.  This work was made possible in part by the
  12 * International Computer Science Institute, located at Suite 600, 1947 Center
  13 * Street, Berkeley, California 94704.  Funding was partially provided by the
  14 * National Science Foundation under grant MIP-9311980.  The original version
  15 * of this code was written as part of a project to build a fixed-point vector
  16 * processor in collaboration with the University of California at Berkeley,
  17 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19 * arithmetic/softfloat.html'.
  20 *
  21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26 *
  27 * Derivative works are acceptable, even for commercial purposes, so long as
  28 * (1) they include prominent notice that the work is derivative, and (2) they
  29 * include prominent notice akin to these three paragraphs for those parts of
  30 * this code that are retained.
  31 * ===========================================================================
  32 */
  33#include <linux/kernel.h>
  34#include <linux/bitops.h>
  35
  36#include <asm/div64.h>
  37#include <asm/vfp.h>
  38
  39#include "vfpinstr.h"
  40#include "vfp.h"
  41
  42static struct vfp_double vfp_double_default_qnan = {
  43	.exponent	= 2047,
  44	.sign		= 0,
  45	.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
  46};
  47
  48static void vfp_double_dump(const char *str, struct vfp_double *d)
  49{
  50	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
  51		 str, d->sign != 0, d->exponent, d->significand);
  52}
  53
  54static void vfp_double_normalise_denormal(struct vfp_double *vd)
  55{
  56	int bits = 31 - fls(vd->significand >> 32);
  57	if (bits == 31)
  58		bits = 63 - fls(vd->significand);
  59
  60	vfp_double_dump("normalise_denormal: in", vd);
  61
  62	if (bits) {
  63		vd->exponent -= bits - 1;
  64		vd->significand <<= bits;
  65	}
  66
  67	vfp_double_dump("normalise_denormal: out", vd);
  68}
  69
  70u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
  71{
  72	u64 significand, incr;
  73	int exponent, shift, underflow;
  74	u32 rmode;
  75
  76	vfp_double_dump("pack: in", vd);
  77
  78	/*
  79	 * Infinities and NaNs are a special case.
  80	 */
  81	if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
  82		goto pack;
  83
  84	/*
  85	 * Special-case zero.
  86	 */
  87	if (vd->significand == 0) {
  88		vd->exponent = 0;
  89		goto pack;
  90	}
  91
  92	exponent = vd->exponent;
  93	significand = vd->significand;
  94
  95	shift = 32 - fls(significand >> 32);
  96	if (shift == 32)
  97		shift = 64 - fls(significand);
  98	if (shift) {
  99		exponent -= shift;
 100		significand <<= shift;
 101	}
 102
 103#ifdef DEBUG
 104	vd->exponent = exponent;
 105	vd->significand = significand;
 106	vfp_double_dump("pack: normalised", vd);
 107#endif
 108
 109	/*
 110	 * Tiny number?
 111	 */
 112	underflow = exponent < 0;
 113	if (underflow) {
 114		significand = vfp_shiftright64jamming(significand, -exponent);
 115		exponent = 0;
 116#ifdef DEBUG
 117		vd->exponent = exponent;
 118		vd->significand = significand;
 119		vfp_double_dump("pack: tiny number", vd);
 120#endif
 121		if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
 122			underflow = 0;
 123	}
 124
 125	/*
 126	 * Select rounding increment.
 127	 */
 128	incr = 0;
 129	rmode = fpscr & FPSCR_RMODE_MASK;
 130
 131	if (rmode == FPSCR_ROUND_NEAREST) {
 132		incr = 1ULL << VFP_DOUBLE_LOW_BITS;
 133		if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
 134			incr -= 1;
 135	} else if (rmode == FPSCR_ROUND_TOZERO) {
 136		incr = 0;
 137	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
 138		incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
 139
 140	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
 141
 142	/*
 143	 * Is our rounding going to overflow?
 144	 */
 145	if ((significand + incr) < significand) {
 146		exponent += 1;
 147		significand = (significand >> 1) | (significand & 1);
 148		incr >>= 1;
 149#ifdef DEBUG
 150		vd->exponent = exponent;
 151		vd->significand = significand;
 152		vfp_double_dump("pack: overflow", vd);
 153#endif
 154	}
 155
 156	/*
 157	 * If any of the low bits (which will be shifted out of the
 158	 * number) are non-zero, the result is inexact.
 159	 */
 160	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
 161		exceptions |= FPSCR_IXC;
 162
 163	/*
 164	 * Do our rounding.
 165	 */
 166	significand += incr;
 167
 168	/*
 169	 * Infinity?
 170	 */
 171	if (exponent >= 2046) {
 172		exceptions |= FPSCR_OFC | FPSCR_IXC;
 173		if (incr == 0) {
 174			vd->exponent = 2045;
 175			vd->significand = 0x7fffffffffffffffULL;
 176		} else {
 177			vd->exponent = 2047;		/* infinity */
 178			vd->significand = 0;
 179		}
 180	} else {
 181		if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
 182			exponent = 0;
 183		if (exponent || significand > 0x8000000000000000ULL)
 184			underflow = 0;
 185		if (underflow)
 186			exceptions |= FPSCR_UFC;
 187		vd->exponent = exponent;
 188		vd->significand = significand >> 1;
 189	}
 190
 191 pack:
 192	vfp_double_dump("pack: final", vd);
 193	{
 194		s64 d = vfp_double_pack(vd);
 195		pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
 196			 dd, d, exceptions);
 197		vfp_put_double(d, dd);
 198	}
 199	return exceptions;
 200}
 201
 202/*
 203 * Propagate the NaN, setting exceptions if it is signalling.
 204 * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 205 */
 206static u32
 207vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
 208		  struct vfp_double *vdm, u32 fpscr)
 209{
 210	struct vfp_double *nan;
 211	int tn, tm = 0;
 212
 213	tn = vfp_double_type(vdn);
 214
 215	if (vdm)
 216		tm = vfp_double_type(vdm);
 217
 218	if (fpscr & FPSCR_DEFAULT_NAN)
 219		/*
 220		 * Default NaN mode - always returns a quiet NaN
 221		 */
 222		nan = &vfp_double_default_qnan;
 223	else {
 224		/*
 225		 * Contemporary mode - select the first signalling
 226		 * NAN, or if neither are signalling, the first
 227		 * quiet NAN.
 228		 */
 229		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 230			nan = vdn;
 231		else
 232			nan = vdm;
 233		/*
 234		 * Make the NaN quiet.
 235		 */
 236		nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 237	}
 238
 239	*vdd = *nan;
 240
 241	/*
 242	 * If one was a signalling NAN, raise invalid operation.
 243	 */
 244	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 245}
 246
 247/*
 248 * Extended operations
 249 */
 250static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
 251{
 252	vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd);
 253	return 0;
 254}
 255
 256static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
 257{
 258	vfp_put_double(vfp_get_double(dm), dd);
 259	return 0;
 260}
 261
 262static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
 263{
 264	vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd);
 265	return 0;
 266}
 267
 268static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
 269{
 270	struct vfp_double vdm, vdd;
 271	int ret, tm;
 272
 273	vfp_double_unpack(&vdm, vfp_get_double(dm));
 274	tm = vfp_double_type(&vdm);
 275	if (tm & (VFP_NAN|VFP_INFINITY)) {
 276		struct vfp_double *vdp = &vdd;
 277
 278		if (tm & VFP_NAN)
 279			ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
 280		else if (vdm.sign == 0) {
 281 sqrt_copy:
 282			vdp = &vdm;
 283			ret = 0;
 284		} else {
 285 sqrt_invalid:
 286			vdp = &vfp_double_default_qnan;
 287			ret = FPSCR_IOC;
 288		}
 289		vfp_put_double(vfp_double_pack(vdp), dd);
 290		return ret;
 291	}
 292
 293	/*
 294	 * sqrt(+/- 0) == +/- 0
 295	 */
 296	if (tm & VFP_ZERO)
 297		goto sqrt_copy;
 298
 299	/*
 300	 * Normalise a denormalised number
 301	 */
 302	if (tm & VFP_DENORMAL)
 303		vfp_double_normalise_denormal(&vdm);
 304
 305	/*
 306	 * sqrt(<0) = invalid
 307	 */
 308	if (vdm.sign)
 309		goto sqrt_invalid;
 310
 311	vfp_double_dump("sqrt", &vdm);
 312
 313	/*
 314	 * Estimate the square root.
 315	 */
 316	vdd.sign = 0;
 317	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
 318	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
 319
 320	vfp_double_dump("sqrt estimate1", &vdd);
 321
 322	vdm.significand >>= 1 + (vdm.exponent & 1);
 323	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
 324
 325	vfp_double_dump("sqrt estimate2", &vdd);
 326
 327	/*
 328	 * And now adjust.
 329	 */
 330	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
 331		if (vdd.significand < 2) {
 332			vdd.significand = ~0ULL;
 333		} else {
 334			u64 termh, terml, remh, reml;
 335			vdm.significand <<= 2;
 336			mul64to128(&termh, &terml, vdd.significand, vdd.significand);
 337			sub128(&remh, &reml, vdm.significand, 0, termh, terml);
 338			while ((s64)remh < 0) {
 339				vdd.significand -= 1;
 340				shift64left(&termh, &terml, vdd.significand);
 341				terml |= 1;
 342				add128(&remh, &reml, remh, reml, termh, terml);
 343			}
 344			vdd.significand |= (remh | reml) != 0;
 345		}
 346	}
 347	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
 348
 349	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
 350}
 351
 352/*
 353 * Equal	:= ZC
 354 * Less than	:= N
 355 * Greater than	:= C
 356 * Unordered	:= CV
 357 */
 358static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
 359{
 360	s64 d, m;
 361	u32 ret = 0;
 362
 363	m = vfp_get_double(dm);
 364	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
 365		ret |= FPSCR_C | FPSCR_V;
 366		if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
 367			/*
 368			 * Signalling NaN, or signalling on quiet NaN
 369			 */
 370			ret |= FPSCR_IOC;
 371	}
 372
 373	d = vfp_get_double(dd);
 374	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
 375		ret |= FPSCR_C | FPSCR_V;
 376		if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
 377			/*
 378			 * Signalling NaN, or signalling on quiet NaN
 379			 */
 380			ret |= FPSCR_IOC;
 381	}
 382
 383	if (ret == 0) {
 384		if (d == m || vfp_double_packed_abs(d | m) == 0) {
 385			/*
 386			 * equal
 387			 */
 388			ret |= FPSCR_Z | FPSCR_C;
 389		} else if (vfp_double_packed_sign(d ^ m)) {
 390			/*
 391			 * different signs
 392			 */
 393			if (vfp_double_packed_sign(d))
 394				/*
 395				 * d is negative, so d < m
 396				 */
 397				ret |= FPSCR_N;
 398			else
 399				/*
 400				 * d is positive, so d > m
 401				 */
 402				ret |= FPSCR_C;
 403		} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
 404			/*
 405			 * d < m
 406			 */
 407			ret |= FPSCR_N;
 408		} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
 409			/*
 410			 * d > m
 411			 */
 412			ret |= FPSCR_C;
 413		}
 414	}
 415
 416	return ret;
 417}
 418
 419static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
 420{
 421	return vfp_compare(dd, 0, dm, fpscr);
 422}
 423
 424static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
 425{
 426	return vfp_compare(dd, 1, dm, fpscr);
 427}
 428
 429static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
 430{
 431	return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
 432}
 433
 434static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
 435{
 436	return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
 437}
 438
 439static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
 440{
 441	struct vfp_double vdm;
 442	struct vfp_single vsd;
 443	int tm;
 444	u32 exceptions = 0;
 445
 446	vfp_double_unpack(&vdm, vfp_get_double(dm));
 447
 448	tm = vfp_double_type(&vdm);
 449
 450	/*
 451	 * If we have a signalling NaN, signal invalid operation.
 452	 */
 453	if (tm == VFP_SNAN)
 454		exceptions = FPSCR_IOC;
 455
 456	if (tm & VFP_DENORMAL)
 457		vfp_double_normalise_denormal(&vdm);
 458
 459	vsd.sign = vdm.sign;
 460	vsd.significand = vfp_hi64to32jamming(vdm.significand);
 461
 462	/*
 463	 * If we have an infinity or a NaN, the exponent must be 255
 464	 */
 465	if (tm & (VFP_INFINITY|VFP_NAN)) {
 466		vsd.exponent = 255;
 467		if (tm == VFP_QNAN)
 468			vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 469		goto pack_nan;
 470	} else if (tm & VFP_ZERO)
 471		vsd.exponent = 0;
 472	else
 473		vsd.exponent = vdm.exponent - (1023 - 127);
 474
 475	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
 476
 477 pack_nan:
 478	vfp_put_float(vfp_single_pack(&vsd), sd);
 479	return exceptions;
 480}
 481
 482static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
 483{
 484	struct vfp_double vdm;
 485	u32 m = vfp_get_float(dm);
 486
 487	vdm.sign = 0;
 488	vdm.exponent = 1023 + 63 - 1;
 489	vdm.significand = (u64)m;
 490
 491	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
 492}
 493
 494static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
 495{
 496	struct vfp_double vdm;
 497	u32 m = vfp_get_float(dm);
 498
 499	vdm.sign = (m & 0x80000000) >> 16;
 500	vdm.exponent = 1023 + 63 - 1;
 501	vdm.significand = vdm.sign ? -m : m;
 502
 503	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
 504}
 505
 506static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
 507{
 508	struct vfp_double vdm;
 509	u32 d, exceptions = 0;
 510	int rmode = fpscr & FPSCR_RMODE_MASK;
 511	int tm;
 512
 513	vfp_double_unpack(&vdm, vfp_get_double(dm));
 514
 515	/*
 516	 * Do we have a denormalised number?
 517	 */
 518	tm = vfp_double_type(&vdm);
 519	if (tm & VFP_DENORMAL)
 520		exceptions |= FPSCR_IDC;
 521
 522	if (tm & VFP_NAN)
 523		vdm.sign = 0;
 524
 525	if (vdm.exponent >= 1023 + 32) {
 526		d = vdm.sign ? 0 : 0xffffffff;
 527		exceptions = FPSCR_IOC;
 528	} else if (vdm.exponent >= 1023 - 1) {
 529		int shift = 1023 + 63 - vdm.exponent;
 530		u64 rem, incr = 0;
 531
 532		/*
 533		 * 2^0 <= m < 2^32-2^8
 534		 */
 535		d = (vdm.significand << 1) >> shift;
 536		rem = vdm.significand << (65 - shift);
 537
 538		if (rmode == FPSCR_ROUND_NEAREST) {
 539			incr = 0x8000000000000000ULL;
 540			if ((d & 1) == 0)
 541				incr -= 1;
 542		} else if (rmode == FPSCR_ROUND_TOZERO) {
 543			incr = 0;
 544		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
 545			incr = ~0ULL;
 546		}
 547
 548		if ((rem + incr) < rem) {
 549			if (d < 0xffffffff)
 550				d += 1;
 551			else
 552				exceptions |= FPSCR_IOC;
 553		}
 554
 555		if (d && vdm.sign) {
 556			d = 0;
 557			exceptions |= FPSCR_IOC;
 558		} else if (rem)
 559			exceptions |= FPSCR_IXC;
 560	} else {
 561		d = 0;
 562		if (vdm.exponent | vdm.significand) {
 563			exceptions |= FPSCR_IXC;
 564			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
 565				d = 1;
 566			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
 567				d = 0;
 568				exceptions |= FPSCR_IOC;
 569			}
 570		}
 571	}
 572
 573	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 574
 575	vfp_put_float(d, sd);
 576
 577	return exceptions;
 578}
 579
 580static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
 581{
 582	return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
 583}
 584
 585static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
 586{
 587	struct vfp_double vdm;
 588	u32 d, exceptions = 0;
 589	int rmode = fpscr & FPSCR_RMODE_MASK;
 590	int tm;
 591
 592	vfp_double_unpack(&vdm, vfp_get_double(dm));
 593	vfp_double_dump("VDM", &vdm);
 594
 595	/*
 596	 * Do we have denormalised number?
 597	 */
 598	tm = vfp_double_type(&vdm);
 599	if (tm & VFP_DENORMAL)
 600		exceptions |= FPSCR_IDC;
 601
 602	if (tm & VFP_NAN) {
 603		d = 0;
 604		exceptions |= FPSCR_IOC;
 605	} else if (vdm.exponent >= 1023 + 32) {
 606		d = 0x7fffffff;
 607		if (vdm.sign)
 608			d = ~d;
 609		exceptions |= FPSCR_IOC;
 610	} else if (vdm.exponent >= 1023 - 1) {
 611		int shift = 1023 + 63 - vdm.exponent;	/* 58 */
 612		u64 rem, incr = 0;
 613
 614		d = (vdm.significand << 1) >> shift;
 615		rem = vdm.significand << (65 - shift);
 616
 617		if (rmode == FPSCR_ROUND_NEAREST) {
 618			incr = 0x8000000000000000ULL;
 619			if ((d & 1) == 0)
 620				incr -= 1;
 621		} else if (rmode == FPSCR_ROUND_TOZERO) {
 622			incr = 0;
 623		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
 624			incr = ~0ULL;
 625		}
 626
 627		if ((rem + incr) < rem && d < 0xffffffff)
 628			d += 1;
 629		if (d > 0x7fffffff + (vdm.sign != 0)) {
 630			d = 0x7fffffff + (vdm.sign != 0);
 631			exceptions |= FPSCR_IOC;
 632		} else if (rem)
 633			exceptions |= FPSCR_IXC;
 634
 635		if (vdm.sign)
 636			d = -d;
 637	} else {
 638		d = 0;
 639		if (vdm.exponent | vdm.significand) {
 640			exceptions |= FPSCR_IXC;
 641			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
 642				d = 1;
 643			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
 644				d = -1;
 645		}
 646	}
 647
 648	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 649
 650	vfp_put_float((s32)d, sd);
 651
 652	return exceptions;
 653}
 654
 655static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
 656{
 657	return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
 658}
 659
 660
 661static struct op fops_ext[32] = {
 662	[FEXT_TO_IDX(FEXT_FCPY)]	= { vfp_double_fcpy,   0 },
 663	[FEXT_TO_IDX(FEXT_FABS)]	= { vfp_double_fabs,   0 },
 664	[FEXT_TO_IDX(FEXT_FNEG)]	= { vfp_double_fneg,   0 },
 665	[FEXT_TO_IDX(FEXT_FSQRT)]	= { vfp_double_fsqrt,  0 },
 666	[FEXT_TO_IDX(FEXT_FCMP)]	= { vfp_double_fcmp,   OP_SCALAR },
 667	[FEXT_TO_IDX(FEXT_FCMPE)]	= { vfp_double_fcmpe,  OP_SCALAR },
 668	[FEXT_TO_IDX(FEXT_FCMPZ)]	= { vfp_double_fcmpz,  OP_SCALAR },
 669	[FEXT_TO_IDX(FEXT_FCMPEZ)]	= { vfp_double_fcmpez, OP_SCALAR },
 670	[FEXT_TO_IDX(FEXT_FCVT)]	= { vfp_double_fcvts,  OP_SCALAR|OP_SD },
 671	[FEXT_TO_IDX(FEXT_FUITO)]	= { vfp_double_fuito,  OP_SCALAR|OP_SM },
 672	[FEXT_TO_IDX(FEXT_FSITO)]	= { vfp_double_fsito,  OP_SCALAR|OP_SM },
 673	[FEXT_TO_IDX(FEXT_FTOUI)]	= { vfp_double_ftoui,  OP_SCALAR|OP_SD },
 674	[FEXT_TO_IDX(FEXT_FTOUIZ)]	= { vfp_double_ftouiz, OP_SCALAR|OP_SD },
 675	[FEXT_TO_IDX(FEXT_FTOSI)]	= { vfp_double_ftosi,  OP_SCALAR|OP_SD },
 676	[FEXT_TO_IDX(FEXT_FTOSIZ)]	= { vfp_double_ftosiz, OP_SCALAR|OP_SD },
 677};
 678
 679
 680
 681
 682static u32
 683vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
 684			  struct vfp_double *vdm, u32 fpscr)
 685{
 686	struct vfp_double *vdp;
 687	u32 exceptions = 0;
 688	int tn, tm;
 689
 690	tn = vfp_double_type(vdn);
 691	tm = vfp_double_type(vdm);
 692
 693	if (tn & tm & VFP_INFINITY) {
 694		/*
 695		 * Two infinities.  Are they different signs?
 696		 */
 697		if (vdn->sign ^ vdm->sign) {
 698			/*
 699			 * different signs -> invalid
 700			 */
 701			exceptions = FPSCR_IOC;
 702			vdp = &vfp_double_default_qnan;
 703		} else {
 704			/*
 705			 * same signs -> valid
 706			 */
 707			vdp = vdn;
 708		}
 709	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 710		/*
 711		 * One infinity and one number -> infinity
 712		 */
 713		vdp = vdn;
 714	} else {
 715		/*
 716		 * 'n' is a NaN of some type
 717		 */
 718		return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
 719	}
 720	*vdd = *vdp;
 721	return exceptions;
 722}
 723
 724static u32
 725vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
 726	       struct vfp_double *vdm, u32 fpscr)
 727{
 728	u32 exp_diff;
 729	u64 m_sig;
 730
 731	if (vdn->significand & (1ULL << 63) ||
 732	    vdm->significand & (1ULL << 63)) {
 733		pr_info("VFP: bad FP values in %s\n", __func__);
 734		vfp_double_dump("VDN", vdn);
 735		vfp_double_dump("VDM", vdm);
 736	}
 737
 738	/*
 739	 * Ensure that 'n' is the largest magnitude number.  Note that
 740	 * if 'n' and 'm' have equal exponents, we do not swap them.
 741	 * This ensures that NaN propagation works correctly.
 742	 */
 743	if (vdn->exponent < vdm->exponent) {
 744		struct vfp_double *t = vdn;
 745		vdn = vdm;
 746		vdm = t;
 747	}
 748
 749	/*
 750	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 751	 * infinity or a NaN here.
 752	 */
 753	if (vdn->exponent == 2047)
 754		return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
 755
 756	/*
 757	 * We have two proper numbers, where 'vdn' is the larger magnitude.
 758	 *
 759	 * Copy 'n' to 'd' before doing the arithmetic.
 760	 */
 761	*vdd = *vdn;
 762
 763	/*
 764	 * Align 'm' with the result.
 765	 */
 766	exp_diff = vdn->exponent - vdm->exponent;
 767	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
 768
 769	/*
 770	 * If the signs are different, we are really subtracting.
 771	 */
 772	if (vdn->sign ^ vdm->sign) {
 773		m_sig = vdn->significand - m_sig;
 774		if ((s64)m_sig < 0) {
 775			vdd->sign = vfp_sign_negate(vdd->sign);
 776			m_sig = -m_sig;
 777		} else if (m_sig == 0) {
 778			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 779				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 780		}
 781	} else {
 782		m_sig += vdn->significand;
 783	}
 784	vdd->significand = m_sig;
 785
 786	return 0;
 787}
 788
 789static u32
 790vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
 791		    struct vfp_double *vdm, u32 fpscr)
 792{
 793	vfp_double_dump("VDN", vdn);
 794	vfp_double_dump("VDM", vdm);
 795
 796	/*
 797	 * Ensure that 'n' is the largest magnitude number.  Note that
 798	 * if 'n' and 'm' have equal exponents, we do not swap them.
 799	 * This ensures that NaN propagation works correctly.
 800	 */
 801	if (vdn->exponent < vdm->exponent) {
 802		struct vfp_double *t = vdn;
 803		vdn = vdm;
 804		vdm = t;
 805		pr_debug("VFP: swapping M <-> N\n");
 806	}
 807
 808	vdd->sign = vdn->sign ^ vdm->sign;
 809
 810	/*
 811	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 812	 */
 813	if (vdn->exponent == 2047) {
 814		if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
 815			return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
 816		if ((vdm->exponent | vdm->significand) == 0) {
 817			*vdd = vfp_double_default_qnan;
 818			return FPSCR_IOC;
 819		}
 820		vdd->exponent = vdn->exponent;
 821		vdd->significand = 0;
 822		return 0;
 823	}
 824
 825	/*
 826	 * If 'm' is zero, the result is always zero.  In this case,
 827	 * 'n' may be zero or a number, but it doesn't matter which.
 828	 */
 829	if ((vdm->exponent | vdm->significand) == 0) {
 830		vdd->exponent = 0;
 831		vdd->significand = 0;
 832		return 0;
 833	}
 834
 835	/*
 836	 * We add 2 to the destination exponent for the same reason
 837	 * as the addition case - though this time we have +1 from
 838	 * each input operand.
 839	 */
 840	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
 841	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
 842
 843	vfp_double_dump("VDD", vdd);
 844	return 0;
 845}
 846
 847#define NEG_MULTIPLY	(1 << 0)
 848#define NEG_SUBTRACT	(1 << 1)
 849
 850static u32
 851vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
 852{
 853	struct vfp_double vdd, vdp, vdn, vdm;
 854	u32 exceptions;
 855
 856	vfp_double_unpack(&vdn, vfp_get_double(dn));
 857	if (vdn.exponent == 0 && vdn.significand)
 858		vfp_double_normalise_denormal(&vdn);
 859
 860	vfp_double_unpack(&vdm, vfp_get_double(dm));
 861	if (vdm.exponent == 0 && vdm.significand)
 862		vfp_double_normalise_denormal(&vdm);
 863
 864	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
 865	if (negate & NEG_MULTIPLY)
 866		vdp.sign = vfp_sign_negate(vdp.sign);
 867
 868	vfp_double_unpack(&vdn, vfp_get_double(dd));
 869	if (vdn.exponent == 0 && vdn.significand)
 870		vfp_double_normalise_denormal(&vdn);
 871	if (negate & NEG_SUBTRACT)
 872		vdn.sign = vfp_sign_negate(vdn.sign);
 873
 874	exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
 875
 876	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
 877}
 878
 879/*
 880 * Standard operations
 881 */
 882
 883/*
 884 * sd = sd + (sn * sm)
 885 */
 886static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
 887{
 888	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
 889}
 890
 891/*
 892 * sd = sd - (sn * sm)
 893 */
 894static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
 895{
 896	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
 897}
 898
 899/*
 900 * sd = -sd + (sn * sm)
 901 */
 902static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
 903{
 904	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
 905}
 906
 907/*
 908 * sd = -sd - (sn * sm)
 909 */
 910static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
 911{
 912	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 913}
 914
 915/*
 916 * sd = sn * sm
 917 */
 918static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
 919{
 920	struct vfp_double vdd, vdn, vdm;
 921	u32 exceptions;
 922
 923	vfp_double_unpack(&vdn, vfp_get_double(dn));
 924	if (vdn.exponent == 0 && vdn.significand)
 925		vfp_double_normalise_denormal(&vdn);
 926
 927	vfp_double_unpack(&vdm, vfp_get_double(dm));
 928	if (vdm.exponent == 0 && vdm.significand)
 929		vfp_double_normalise_denormal(&vdm);
 930
 931	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
 932	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
 933}
 934
 935/*
 936 * sd = -(sn * sm)
 937 */
 938static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
 939{
 940	struct vfp_double vdd, vdn, vdm;
 941	u32 exceptions;
 942
 943	vfp_double_unpack(&vdn, vfp_get_double(dn));
 944	if (vdn.exponent == 0 && vdn.significand)
 945		vfp_double_normalise_denormal(&vdn);
 946
 947	vfp_double_unpack(&vdm, vfp_get_double(dm));
 948	if (vdm.exponent == 0 && vdm.significand)
 949		vfp_double_normalise_denormal(&vdm);
 950
 951	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
 952	vdd.sign = vfp_sign_negate(vdd.sign);
 953
 954	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
 955}
 956
 957/*
 958 * sd = sn + sm
 959 */
 960static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
 961{
 962	struct vfp_double vdd, vdn, vdm;
 963	u32 exceptions;
 964
 965	vfp_double_unpack(&vdn, vfp_get_double(dn));
 966	if (vdn.exponent == 0 && vdn.significand)
 967		vfp_double_normalise_denormal(&vdn);
 968
 969	vfp_double_unpack(&vdm, vfp_get_double(dm));
 970	if (vdm.exponent == 0 && vdm.significand)
 971		vfp_double_normalise_denormal(&vdm);
 972
 973	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
 974
 975	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
 976}
 977
 978/*
 979 * sd = sn - sm
 980 */
 981static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
 982{
 983	struct vfp_double vdd, vdn, vdm;
 984	u32 exceptions;
 985
 986	vfp_double_unpack(&vdn, vfp_get_double(dn));
 987	if (vdn.exponent == 0 && vdn.significand)
 988		vfp_double_normalise_denormal(&vdn);
 989
 990	vfp_double_unpack(&vdm, vfp_get_double(dm));
 991	if (vdm.exponent == 0 && vdm.significand)
 992		vfp_double_normalise_denormal(&vdm);
 993
 994	/*
 995	 * Subtraction is like addition, but with a negated operand.
 996	 */
 997	vdm.sign = vfp_sign_negate(vdm.sign);
 998
 999	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
1000
1001	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
1002}
1003
1004/*
1005 * sd = sn / sm
1006 */
1007static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
1008{
1009	struct vfp_double vdd, vdn, vdm;
1010	u32 exceptions = 0;
1011	int tm, tn;
1012
1013	vfp_double_unpack(&vdn, vfp_get_double(dn));
1014	vfp_double_unpack(&vdm, vfp_get_double(dm));
1015
1016	vdd.sign = vdn.sign ^ vdm.sign;
1017
1018	tn = vfp_double_type(&vdn);
1019	tm = vfp_double_type(&vdm);
1020
1021	/*
1022	 * Is n a NAN?
1023	 */
1024	if (tn & VFP_NAN)
1025		goto vdn_nan;
1026
1027	/*
1028	 * Is m a NAN?
1029	 */
1030	if (tm & VFP_NAN)
1031		goto vdm_nan;
1032
1033	/*
1034	 * If n and m are infinity, the result is invalid
1035	 * If n and m are zero, the result is invalid
1036	 */
1037	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1038		goto invalid;
1039
1040	/*
1041	 * If n is infinity, the result is infinity
1042	 */
1043	if (tn & VFP_INFINITY)
1044		goto infinity;
1045
1046	/*
1047	 * If m is zero, raise div0 exceptions
1048	 */
1049	if (tm & VFP_ZERO)
1050		goto divzero;
1051
1052	/*
1053	 * If m is infinity, or n is zero, the result is zero
1054	 */
1055	if (tm & VFP_INFINITY || tn & VFP_ZERO)
1056		goto zero;
1057
1058	if (tn & VFP_DENORMAL)
1059		vfp_double_normalise_denormal(&vdn);
1060	if (tm & VFP_DENORMAL)
1061		vfp_double_normalise_denormal(&vdm);
1062
1063	/*
1064	 * Ok, we have two numbers, we can perform division.
1065	 */
1066	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
1067	vdm.significand <<= 1;
1068	if (vdm.significand <= (2 * vdn.significand)) {
1069		vdn.significand >>= 1;
1070		vdd.exponent++;
1071	}
1072	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
1073	if ((vdd.significand & 0x1ff) <= 2) {
1074		u64 termh, terml, remh, reml;
1075		mul64to128(&termh, &terml, vdm.significand, vdd.significand);
1076		sub128(&remh, &reml, vdn.significand, 0, termh, terml);
1077		while ((s64)remh < 0) {
1078			vdd.significand -= 1;
1079			add128(&remh, &reml, remh, reml, 0, vdm.significand);
1080		}
1081		vdd.significand |= (reml != 0);
1082	}
1083	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
1084
1085 vdn_nan:
1086	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
1087 pack:
1088	vfp_put_double(vfp_double_pack(&vdd), dd);
1089	return exceptions;
1090
1091 vdm_nan:
1092	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
1093	goto pack;
1094
1095 zero:
1096	vdd.exponent = 0;
1097	vdd.significand = 0;
1098	goto pack;
1099
1100 divzero:
1101	exceptions = FPSCR_DZC;
1102 infinity:
1103	vdd.exponent = 2047;
1104	vdd.significand = 0;
1105	goto pack;
1106
1107 invalid:
1108	vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd);
1109	return FPSCR_IOC;
1110}
1111
1112static struct op fops[16] = {
1113	[FOP_TO_IDX(FOP_FMAC)]	= { vfp_double_fmac,  0 },
1114	[FOP_TO_IDX(FOP_FNMAC)]	= { vfp_double_fnmac, 0 },
1115	[FOP_TO_IDX(FOP_FMSC)]	= { vfp_double_fmsc,  0 },
1116	[FOP_TO_IDX(FOP_FNMSC)]	= { vfp_double_fnmsc, 0 },
1117	[FOP_TO_IDX(FOP_FMUL)]	= { vfp_double_fmul,  0 },
1118	[FOP_TO_IDX(FOP_FNMUL)]	= { vfp_double_fnmul, 0 },
1119	[FOP_TO_IDX(FOP_FADD)]	= { vfp_double_fadd,  0 },
1120	[FOP_TO_IDX(FOP_FSUB)]	= { vfp_double_fsub,  0 },
1121	[FOP_TO_IDX(FOP_FDIV)]	= { vfp_double_fdiv,  0 },
1122};
1123
1124#define FREG_BANK(x)	((x) & 0x0c)
1125#define FREG_IDX(x)	((x) & 3)
1126
1127u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1128{
1129	u32 op = inst & FOP_MASK;
1130	u32 exceptions = 0;
1131	unsigned int dest;
1132	unsigned int dn = vfp_get_dn(inst);
1133	unsigned int dm;
1134	unsigned int vecitr, veclen, vecstride;
1135	struct op *fop;
1136
1137	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
1138
1139	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1140
1141	/*
1142	 * fcvtds takes an sN register number as destination, not dN.
1143	 * It also always operates on scalars.
1144	 */
1145	if (fop->flags & OP_SD)
1146		dest = vfp_get_sd(inst);
1147	else
1148		dest = vfp_get_dd(inst);
1149
1150	/*
1151	 * f[us]ito takes a sN operand, not a dN operand.
1152	 */
1153	if (fop->flags & OP_SM)
1154		dm = vfp_get_sm(inst);
1155	else
1156		dm = vfp_get_dm(inst);
1157
1158	/*
1159	 * If destination bank is zero, vector length is always '1'.
1160	 * ARM DDI0100F C5.1.3, C5.3.2.
1161	 */
1162	if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
1163		veclen = 0;
1164	else
1165		veclen = fpscr & FPSCR_LENGTH_MASK;
1166
1167	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1168		 (veclen >> FPSCR_LENGTH_BIT) + 1);
1169
1170	if (!fop->fn)
1171		goto invalid;
1172
1173	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1174		u32 except;
1175		char type;
1176
1177		type = fop->flags & OP_SD ? 's' : 'd';
1178		if (op == FOP_EXT)
1179			pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
1180				 vecitr >> FPSCR_LENGTH_BIT,
1181				 type, dest, dn, dm);
1182		else
1183			pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
1184				 vecitr >> FPSCR_LENGTH_BIT,
1185				 type, dest, dn, FOP_TO_IDX(op), dm);
1186
1187		except = fop->fn(dest, dn, dm, fpscr);
1188		pr_debug("VFP: itr%d: exceptions=%08x\n",
1189			 vecitr >> FPSCR_LENGTH_BIT, except);
1190
1191		exceptions |= except;
1192
1193		/*
1194		 * CHECK: It appears to be undefined whether we stop when
1195		 * we encounter an exception.  We continue.
1196		 */
1197		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
1198		dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
1199		if (FREG_BANK(dm) != 0)
1200			dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
1201	}
1202	return exceptions;
1203
1204 invalid:
1205	return ~0;
1206}

   1/*
   2 *  linux/arch/arm/vfp/vfpdouble.c
   3 *
   4 * This code is derived in part from John R. Housers softfloat library, which
   5 * carries the following notice:
   6 *
   7 * ===========================================================================
   8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9 * Arithmetic Package, Release 2.
  10 *
  11 * Written by John R. Hauser.  This work was made possible in part by the
  12 * International Computer Science Institute, located at Suite 600, 1947 Center
  13 * Street, Berkeley, California 94704.  Funding was partially provided by the
  14 * National Science Foundation under grant MIP-9311980.  The original version
  15 * of this code was written as part of a project to build a fixed-point vector
  16 * processor in collaboration with the University of California at Berkeley,
  17 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19 * arithmetic/softfloat.html'.
  20 *
  21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26 *
  27 * Derivative works are acceptable, even for commercial purposes, so long as
  28 * (1) they include prominent notice that the work is derivative, and (2) they
  29 * include prominent notice akin to these three paragraphs for those parts of
  30 * this code that are retained.
  31 * ===========================================================================
  32 */
  33#include <linux/kernel.h>
  34#include <linux/bitops.h>
  35
  36#include <asm/div64.h>
  37#include <asm/vfp.h>
  38
  39#include "vfpinstr.h"
  40#include "vfp.h"
  41
  42static struct vfp_double vfp_double_default_qnan = {
  43	.exponent	= 2047,
  44	.sign		= 0,
  45	.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
  46};
  47
  48static void vfp_double_dump(const char *str, struct vfp_double *d)
  49{
  50	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
  51		 str, d->sign != 0, d->exponent, d->significand);
  52}
  53
  54static void vfp_double_normalise_denormal(struct vfp_double *vd)
  55{
  56	int bits = 31 - fls(vd->significand >> 32);
  57	if (bits == 31)
  58		bits = 63 - fls(vd->significand);
  59
  60	vfp_double_dump("normalise_denormal: in", vd);
  61
  62	if (bits) {
  63		vd->exponent -= bits - 1;
  64		vd->significand <<= bits;
  65	}
  66
  67	vfp_double_dump("normalise_denormal: out", vd);
  68}
  69
  70u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
  71{
  72	u64 significand, incr;
  73	int exponent, shift, underflow;
  74	u32 rmode;
  75
  76	vfp_double_dump("pack: in", vd);
  77
  78	/*
  79	 * Infinities and NaNs are a special case.
  80	 */
  81	if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
  82		goto pack;
  83
  84	/*
  85	 * Special-case zero.
  86	 */
  87	if (vd->significand == 0) {
  88		vd->exponent = 0;
  89		goto pack;
  90	}
  91
  92	exponent = vd->exponent;
  93	significand = vd->significand;
  94
  95	shift = 32 - fls(significand >> 32);
  96	if (shift == 32)
  97		shift = 64 - fls(significand);
  98	if (shift) {
  99		exponent -= shift;
 100		significand <<= shift;
 101	}
 102
 103#ifdef DEBUG
 104	vd->exponent = exponent;
 105	vd->significand = significand;
 106	vfp_double_dump("pack: normalised", vd);
 107#endif
 108
 109	/*
 110	 * Tiny number?
 111	 */
 112	underflow = exponent < 0;
 113	if (underflow) {
 114		significand = vfp_shiftright64jamming(significand, -exponent);
 115		exponent = 0;
 116#ifdef DEBUG
 117		vd->exponent = exponent;
 118		vd->significand = significand;
 119		vfp_double_dump("pack: tiny number", vd);
 120#endif
 121		if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
 122			underflow = 0;
 123	}
 124
 125	/*
 126	 * Select rounding increment.
 127	 */
 128	incr = 0;
 129	rmode = fpscr & FPSCR_RMODE_MASK;
 130
 131	if (rmode == FPSCR_ROUND_NEAREST) {
 132		incr = 1ULL << VFP_DOUBLE_LOW_BITS;
 133		if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
 134			incr -= 1;
 135	} else if (rmode == FPSCR_ROUND_TOZERO) {
 136		incr = 0;
 137	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
 138		incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
 139
 140	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
 141
 142	/*
 143	 * Is our rounding going to overflow?
 144	 */
 145	if ((significand + incr) < significand) {
 146		exponent += 1;
 147		significand = (significand >> 1) | (significand & 1);
 148		incr >>= 1;
 149#ifdef DEBUG
 150		vd->exponent = exponent;
 151		vd->significand = significand;
 152		vfp_double_dump("pack: overflow", vd);
 153#endif
 154	}
 155
 156	/*
 157	 * If any of the low bits (which will be shifted out of the
 158	 * number) are non-zero, the result is inexact.
 159	 */
 160	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
 161		exceptions |= FPSCR_IXC;
 162
 163	/*
 164	 * Do our rounding.
 165	 */
 166	significand += incr;
 167
 168	/*
 169	 * Infinity?
 170	 */
 171	if (exponent >= 2046) {
 172		exceptions |= FPSCR_OFC | FPSCR_IXC;
 173		if (incr == 0) {
 174			vd->exponent = 2045;
 175			vd->significand = 0x7fffffffffffffffULL;
 176		} else {
 177			vd->exponent = 2047;		/* infinity */
 178			vd->significand = 0;
 179		}
 180	} else {
 181		if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
 182			exponent = 0;
 183		if (exponent || significand > 0x8000000000000000ULL)
 184			underflow = 0;
 185		if (underflow)
 186			exceptions |= FPSCR_UFC;
 187		vd->exponent = exponent;
 188		vd->significand = significand >> 1;
 189	}
 190
 191 pack:
 192	vfp_double_dump("pack: final", vd);
 193	{
 194		s64 d = vfp_double_pack(vd);
 195		pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
 196			 dd, d, exceptions);
 197		vfp_put_double(d, dd);
 198	}
 199	return exceptions;
 200}
 201
 202/*
 203 * Propagate the NaN, setting exceptions if it is signalling.
 204 * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 205 */
 206static u32
 207vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
 208		  struct vfp_double *vdm, u32 fpscr)
 209{
 210	struct vfp_double *nan;
 211	int tn, tm = 0;
 212
 213	tn = vfp_double_type(vdn);
 214
 215	if (vdm)
 216		tm = vfp_double_type(vdm);
 217
 218	if (fpscr & FPSCR_DEFAULT_NAN)
 219		/*
 220		 * Default NaN mode - always returns a quiet NaN
 221		 */
 222		nan = &vfp_double_default_qnan;
 223	else {
 224		/*
 225		 * Contemporary mode - select the first signalling
 226		 * NAN, or if neither are signalling, the first
 227		 * quiet NAN.
 228		 */
 229		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 230			nan = vdn;
 231		else
 232			nan = vdm;
 233		/*
 234		 * Make the NaN quiet.
 235		 */
 236		nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 237	}
 238
 239	*vdd = *nan;
 240
 241	/*
 242	 * If one was a signalling NAN, raise invalid operation.
 243	 */
 244	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 245}
 246
 247/*
 248 * Extended operations
 249 */
 250static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
 251{
 252	vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd);
 253	return 0;
 254}
 255
 256static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
 257{
 258	vfp_put_double(vfp_get_double(dm), dd);
 259	return 0;
 260}
 261
 262static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
 263{
 264	vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd);
 265	return 0;
 266}
 267
 268static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
 269{
 270	struct vfp_double vdm, vdd;
 271	int ret, tm;
 272
 273	vfp_double_unpack(&vdm, vfp_get_double(dm));
 274	tm = vfp_double_type(&vdm);
 275	if (tm & (VFP_NAN|VFP_INFINITY)) {
 276		struct vfp_double *vdp = &vdd;
 277
 278		if (tm & VFP_NAN)
 279			ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
 280		else if (vdm.sign == 0) {
 281 sqrt_copy:
 282			vdp = &vdm;
 283			ret = 0;
 284		} else {
 285 sqrt_invalid:
 286			vdp = &vfp_double_default_qnan;
 287			ret = FPSCR_IOC;
 288		}
 289		vfp_put_double(vfp_double_pack(vdp), dd);
 290		return ret;
 291	}
 292
 293	/*
 294	 * sqrt(+/- 0) == +/- 0
 295	 */
 296	if (tm & VFP_ZERO)
 297		goto sqrt_copy;
 298
 299	/*
 300	 * Normalise a denormalised number
 301	 */
 302	if (tm & VFP_DENORMAL)
 303		vfp_double_normalise_denormal(&vdm);
 304
 305	/*
 306	 * sqrt(<0) = invalid
 307	 */
 308	if (vdm.sign)
 309		goto sqrt_invalid;
 310
 311	vfp_double_dump("sqrt", &vdm);
 312
 313	/*
 314	 * Estimate the square root.
 315	 */
 316	vdd.sign = 0;
 317	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
 318	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
 319
 320	vfp_double_dump("sqrt estimate1", &vdd);
 321
 322	vdm.significand >>= 1 + (vdm.exponent & 1);
 323	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
 324
 325	vfp_double_dump("sqrt estimate2", &vdd);
 326
 327	/*
 328	 * And now adjust.
 329	 */
 330	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
 331		if (vdd.significand < 2) {
 332			vdd.significand = ~0ULL;
 333		} else {
 334			u64 termh, terml, remh, reml;
 335			vdm.significand <<= 2;
 336			mul64to128(&termh, &terml, vdd.significand, vdd.significand);
 337			sub128(&remh, &reml, vdm.significand, 0, termh, terml);
 338			while ((s64)remh < 0) {
 339				vdd.significand -= 1;
 340				shift64left(&termh, &terml, vdd.significand);
 341				terml |= 1;
 342				add128(&remh, &reml, remh, reml, termh, terml);
 343			}
 344			vdd.significand |= (remh | reml) != 0;
 345		}
 346	}
 347	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
 348
 349	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
 350}
 351
 352/*
 353 * Equal	:= ZC
 354 * Less than	:= N
 355 * Greater than	:= C
 356 * Unordered	:= CV
 357 */
 358static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
 359{
 360	s64 d, m;
 361	u32 ret = 0;
 362
 363	m = vfp_get_double(dm);
 364	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
 365		ret |= FPSCR_C | FPSCR_V;
 366		if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
 367			/*
 368			 * Signalling NaN, or signalling on quiet NaN
 369			 */
 370			ret |= FPSCR_IOC;
 371	}
 372
 373	d = vfp_get_double(dd);
 374	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
 375		ret |= FPSCR_C | FPSCR_V;
 376		if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
 377			/*
 378			 * Signalling NaN, or signalling on quiet NaN
 379			 */
 380			ret |= FPSCR_IOC;
 381	}
 382
 383	if (ret == 0) {
 384		if (d == m || vfp_double_packed_abs(d | m) == 0) {
 385			/*
 386			 * equal
 387			 */
 388			ret |= FPSCR_Z | FPSCR_C;
 389		} else if (vfp_double_packed_sign(d ^ m)) {
 390			/*
 391			 * different signs
 392			 */
 393			if (vfp_double_packed_sign(d))
 394				/*
 395				 * d is negative, so d < m
 396				 */
 397				ret |= FPSCR_N;
 398			else
 399				/*
 400				 * d is positive, so d > m
 401				 */
 402				ret |= FPSCR_C;
 403		} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
 404			/*
 405			 * d < m
 406			 */
 407			ret |= FPSCR_N;
 408		} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
 409			/*
 410			 * d > m
 411			 */
 412			ret |= FPSCR_C;
 413		}
 414	}
 415
 416	return ret;
 417}
 418
 419static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
 420{
 421	return vfp_compare(dd, 0, dm, fpscr);
 422}
 423
 424static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
 425{
 426	return vfp_compare(dd, 1, dm, fpscr);
 427}
 428
 429static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
 430{
 431	return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
 432}
 433
 434static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
 435{
 436	return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
 437}
 438
 439static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
 440{
 441	struct vfp_double vdm;
 442	struct vfp_single vsd;
 443	int tm;
 444	u32 exceptions = 0;
 445
 446	vfp_double_unpack(&vdm, vfp_get_double(dm));
 447
 448	tm = vfp_double_type(&vdm);
 449
 450	/*
 451	 * If we have a signalling NaN, signal invalid operation.
 452	 */
 453	if (tm == VFP_SNAN)
 454		exceptions = FPSCR_IOC;
 455
 456	if (tm & VFP_DENORMAL)
 457		vfp_double_normalise_denormal(&vdm);
 458
 459	vsd.sign = vdm.sign;
 460	vsd.significand = vfp_hi64to32jamming(vdm.significand);
 461
 462	/*
 463	 * If we have an infinity or a NaN, the exponent must be 255
 464	 */
 465	if (tm & (VFP_INFINITY|VFP_NAN)) {
 466		vsd.exponent = 255;
 467		if (tm == VFP_QNAN)
 468			vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 469		goto pack_nan;
 470	} else if (tm & VFP_ZERO)
 471		vsd.exponent = 0;
 472	else
 473		vsd.exponent = vdm.exponent - (1023 - 127);
 474
 475	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
 476
 477 pack_nan:
 478	vfp_put_float(vfp_single_pack(&vsd), sd);
 479	return exceptions;
 480}
 481
 482static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
 483{
 484	struct vfp_double vdm;
 485	u32 m = vfp_get_float(dm);
 486
 487	vdm.sign = 0;
 488	vdm.exponent = 1023 + 63 - 1;
 489	vdm.significand = (u64)m;
 490
 491	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
 492}
 493
 494static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
 495{
 496	struct vfp_double vdm;
 497	u32 m = vfp_get_float(dm);
 498
 499	vdm.sign = (m & 0x80000000) >> 16;
 500	vdm.exponent = 1023 + 63 - 1;
 501	vdm.significand = vdm.sign ? -m : m;
 502
 503	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
 504}
 505
 506static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
 507{
 508	struct vfp_double vdm;
 509	u32 d, exceptions = 0;
 510	int rmode = fpscr & FPSCR_RMODE_MASK;
 511	int tm;
 512
 513	vfp_double_unpack(&vdm, vfp_get_double(dm));
 514
 515	/*
 516	 * Do we have a denormalised number?
 517	 */
 518	tm = vfp_double_type(&vdm);
 519	if (tm & VFP_DENORMAL)
 520		exceptions |= FPSCR_IDC;
 521
 522	if (tm & VFP_NAN)
 523		vdm.sign = 0;
 524
 525	if (vdm.exponent >= 1023 + 32) {
 526		d = vdm.sign ? 0 : 0xffffffff;
 527		exceptions = FPSCR_IOC;
 528	} else if (vdm.exponent >= 1023 - 1) {
 529		int shift = 1023 + 63 - vdm.exponent;
 530		u64 rem, incr = 0;
 531
 532		/*
 533		 * 2^0 <= m < 2^32-2^8
 534		 */
 535		d = (vdm.significand << 1) >> shift;
 536		rem = vdm.significand << (65 - shift);
 537
 538		if (rmode == FPSCR_ROUND_NEAREST) {
 539			incr = 0x8000000000000000ULL;
 540			if ((d & 1) == 0)
 541				incr -= 1;
 542		} else if (rmode == FPSCR_ROUND_TOZERO) {
 543			incr = 0;
 544		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
 545			incr = ~0ULL;
 546		}
 547
 548		if ((rem + incr) < rem) {
 549			if (d < 0xffffffff)
 550				d += 1;
 551			else
 552				exceptions |= FPSCR_IOC;
 553		}
 554
 555		if (d && vdm.sign) {
 556			d = 0;
 557			exceptions |= FPSCR_IOC;
 558		} else if (rem)
 559			exceptions |= FPSCR_IXC;
 560	} else {
 561		d = 0;
 562		if (vdm.exponent | vdm.significand) {
 563			exceptions |= FPSCR_IXC;
 564			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
 565				d = 1;
 566			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
 567				d = 0;
 568				exceptions |= FPSCR_IOC;
 569			}
 570		}
 571	}
 572
 573	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 574
 575	vfp_put_float(d, sd);
 576
 577	return exceptions;
 578}
 579
 580static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
 581{
 582	return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
 583}
 584
 585static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
 586{
 587	struct vfp_double vdm;
 588	u32 d, exceptions = 0;
 589	int rmode = fpscr & FPSCR_RMODE_MASK;
 590	int tm;
 591
 592	vfp_double_unpack(&vdm, vfp_get_double(dm));
 593	vfp_double_dump("VDM", &vdm);
 594
 595	/*
 596	 * Do we have denormalised number?
 597	 */
 598	tm = vfp_double_type(&vdm);
 599	if (tm & VFP_DENORMAL)
 600		exceptions |= FPSCR_IDC;
 601
 602	if (tm & VFP_NAN) {
 603		d = 0;
 604		exceptions |= FPSCR_IOC;
 605	} else if (vdm.exponent >= 1023 + 32) {
 606		d = 0x7fffffff;
 607		if (vdm.sign)
 608			d = ~d;
 609		exceptions |= FPSCR_IOC;
 610	} else if (vdm.exponent >= 1023 - 1) {
 611		int shift = 1023 + 63 - vdm.exponent;	/* 58 */
 612		u64 rem, incr = 0;
 613
 614		d = (vdm.significand << 1) >> shift;
 615		rem = vdm.significand << (65 - shift);
 616
 617		if (rmode == FPSCR_ROUND_NEAREST) {
 618			incr = 0x8000000000000000ULL;
 619			if ((d & 1) == 0)
 620				incr -= 1;
 621		} else if (rmode == FPSCR_ROUND_TOZERO) {
 622			incr = 0;
 623		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
 624			incr = ~0ULL;
 625		}
 626
 627		if ((rem + incr) < rem && d < 0xffffffff)
 628			d += 1;
 629		if (d > 0x7fffffff + (vdm.sign != 0)) {
 630			d = 0x7fffffff + (vdm.sign != 0);
 631			exceptions |= FPSCR_IOC;
 632		} else if (rem)
 633			exceptions |= FPSCR_IXC;
 634
 635		if (vdm.sign)
 636			d = -d;
 637	} else {
 638		d = 0;
 639		if (vdm.exponent | vdm.significand) {
 640			exceptions |= FPSCR_IXC;
 641			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
 642				d = 1;
 643			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
 644				d = -1;
 645		}
 646	}
 647
 648	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 649
 650	vfp_put_float((s32)d, sd);
 651
 652	return exceptions;
 653}
 654
 655static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
 656{
 657	return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
 658}
 659
 660
 661static struct op fops_ext[32] = {
 662	[FEXT_TO_IDX(FEXT_FCPY)]	= { vfp_double_fcpy,   0 },
 663	[FEXT_TO_IDX(FEXT_FABS)]	= { vfp_double_fabs,   0 },
 664	[FEXT_TO_IDX(FEXT_FNEG)]	= { vfp_double_fneg,   0 },
 665	[FEXT_TO_IDX(FEXT_FSQRT)]	= { vfp_double_fsqrt,  0 },
 666	[FEXT_TO_IDX(FEXT_FCMP)]	= { vfp_double_fcmp,   OP_SCALAR },
 667	[FEXT_TO_IDX(FEXT_FCMPE)]	= { vfp_double_fcmpe,  OP_SCALAR },
 668	[FEXT_TO_IDX(FEXT_FCMPZ)]	= { vfp_double_fcmpz,  OP_SCALAR },
 669	[FEXT_TO_IDX(FEXT_FCMPEZ)]	= { vfp_double_fcmpez, OP_SCALAR },
 670	[FEXT_TO_IDX(FEXT_FCVT)]	= { vfp_double_fcvts,  OP_SCALAR|OP_SD },
 671	[FEXT_TO_IDX(FEXT_FUITO)]	= { vfp_double_fuito,  OP_SCALAR|OP_SM },
 672	[FEXT_TO_IDX(FEXT_FSITO)]	= { vfp_double_fsito,  OP_SCALAR|OP_SM },
 673	[FEXT_TO_IDX(FEXT_FTOUI)]	= { vfp_double_ftoui,  OP_SCALAR|OP_SD },
 674	[FEXT_TO_IDX(FEXT_FTOUIZ)]	= { vfp_double_ftouiz, OP_SCALAR|OP_SD },
 675	[FEXT_TO_IDX(FEXT_FTOSI)]	= { vfp_double_ftosi,  OP_SCALAR|OP_SD },
 676	[FEXT_TO_IDX(FEXT_FTOSIZ)]	= { vfp_double_ftosiz, OP_SCALAR|OP_SD },
 677};
 678
 679
 680
 681
 682static u32
 683vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
 684			  struct vfp_double *vdm, u32 fpscr)
 685{
 686	struct vfp_double *vdp;
 687	u32 exceptions = 0;
 688	int tn, tm;
 689
 690	tn = vfp_double_type(vdn);
 691	tm = vfp_double_type(vdm);
 692
 693	if (tn & tm & VFP_INFINITY) {
 694		/*
 695		 * Two infinities.  Are they different signs?
 696		 */
 697		if (vdn->sign ^ vdm->sign) {
 698			/*
 699			 * different signs -> invalid
 700			 */
 701			exceptions = FPSCR_IOC;
 702			vdp = &vfp_double_default_qnan;
 703		} else {
 704			/*
 705			 * same signs -> valid
 706			 */
 707			vdp = vdn;
 708		}
 709	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 710		/*
 711		 * One infinity and one number -> infinity
 712		 */
 713		vdp = vdn;
 714	} else {
 715		/*
 716		 * 'n' is a NaN of some type
 717		 */
 718		return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
 719	}
 720	*vdd = *vdp;
 721	return exceptions;
 722}
 723
 724static u32
 725vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
 726	       struct vfp_double *vdm, u32 fpscr)
 727{
 728	u32 exp_diff;
 729	u64 m_sig;
 730
 731	if (vdn->significand & (1ULL << 63) ||
 732	    vdm->significand & (1ULL << 63)) {
 733		pr_info("VFP: bad FP values in %s\n", __func__);
 734		vfp_double_dump("VDN", vdn);
 735		vfp_double_dump("VDM", vdm);
 736	}
 737
 738	/*
 739	 * Ensure that 'n' is the largest magnitude number.  Note that
 740	 * if 'n' and 'm' have equal exponents, we do not swap them.
 741	 * This ensures that NaN propagation works correctly.
 742	 */
 743	if (vdn->exponent < vdm->exponent) {
 744		struct vfp_double *t = vdn;
 745		vdn = vdm;
 746		vdm = t;
 747	}
 748
 749	/*
 750	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 751	 * infinity or a NaN here.
 752	 */
 753	if (vdn->exponent == 2047)
 754		return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
 755
 756	/*
 757	 * We have two proper numbers, where 'vdn' is the larger magnitude.
 758	 *
 759	 * Copy 'n' to 'd' before doing the arithmetic.
 760	 */
 761	*vdd = *vdn;
 762
 763	/*
 764	 * Align 'm' with the result.
 765	 */
 766	exp_diff = vdn->exponent - vdm->exponent;
 767	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
 768
 769	/*
 770	 * If the signs are different, we are really subtracting.
 771	 */
 772	if (vdn->sign ^ vdm->sign) {
 773		m_sig = vdn->significand - m_sig;
 774		if ((s64)m_sig < 0) {
 775			vdd->sign = vfp_sign_negate(vdd->sign);
 776			m_sig = -m_sig;
 777		} else if (m_sig == 0) {
 778			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 779				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 780		}
 781	} else {
 782		m_sig += vdn->significand;
 783	}
 784	vdd->significand = m_sig;
 785
 786	return 0;
 787}
 788
 789static u32
 790vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
 791		    struct vfp_double *vdm, u32 fpscr)
 792{
 793	vfp_double_dump("VDN", vdn);
 794	vfp_double_dump("VDM", vdm);
 795
 796	/*
 797	 * Ensure that 'n' is the largest magnitude number.  Note that
 798	 * if 'n' and 'm' have equal exponents, we do not swap them.
 799	 * This ensures that NaN propagation works correctly.
 800	 */
 801	if (vdn->exponent < vdm->exponent) {
 802		struct vfp_double *t = vdn;
 803		vdn = vdm;
 804		vdm = t;
 805		pr_debug("VFP: swapping M <-> N\n");
 806	}
 807
 808	vdd->sign = vdn->sign ^ vdm->sign;
 809
 810	/*
 811	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 812	 */
 813	if (vdn->exponent == 2047) {
 814		if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
 815			return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
 816		if ((vdm->exponent | vdm->significand) == 0) {
 817			*vdd = vfp_double_default_qnan;
 818			return FPSCR_IOC;
 819		}
 820		vdd->exponent = vdn->exponent;
 821		vdd->significand = 0;
 822		return 0;
 823	}
 824
 825	/*
 826	 * If 'm' is zero, the result is always zero.  In this case,
 827	 * 'n' may be zero or a number, but it doesn't matter which.
 828	 */
 829	if ((vdm->exponent | vdm->significand) == 0) {
 830		vdd->exponent = 0;
 831		vdd->significand = 0;
 832		return 0;
 833	}
 834
 835	/*
 836	 * We add 2 to the destination exponent for the same reason
 837	 * as the addition case - though this time we have +1 from
 838	 * each input operand.
 839	 */
 840	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
 841	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
 842
 843	vfp_double_dump("VDD", vdd);
 844	return 0;
 845}
 846
 847#define NEG_MULTIPLY	(1 << 0)
 848#define NEG_SUBTRACT	(1 << 1)
 849
 850static u32
 851vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
 852{
 853	struct vfp_double vdd, vdp, vdn, vdm;
 854	u32 exceptions;
 855
 856	vfp_double_unpack(&vdn, vfp_get_double(dn));
 857	if (vdn.exponent == 0 && vdn.significand)
 858		vfp_double_normalise_denormal(&vdn);
 859
 860	vfp_double_unpack(&vdm, vfp_get_double(dm));
 861	if (vdm.exponent == 0 && vdm.significand)
 862		vfp_double_normalise_denormal(&vdm);
 863
 864	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
 865	if (negate & NEG_MULTIPLY)
 866		vdp.sign = vfp_sign_negate(vdp.sign);
 867
 868	vfp_double_unpack(&vdn, vfp_get_double(dd));
 869	if (vdn.exponent == 0 && vdn.significand)
 870		vfp_double_normalise_denormal(&vdn);
 871	if (negate & NEG_SUBTRACT)
 872		vdn.sign = vfp_sign_negate(vdn.sign);
 873
 874	exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
 875
 876	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
 877}
 878
 879/*
 880 * Standard operations
 881 */
 882
 883/*
 884 * sd = sd + (sn * sm)
 885 */
 886static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
 887{
 888	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
 889}
 890
 891/*
 892 * sd = sd - (sn * sm)
 893 */
 894static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
 895{
 896	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
 897}
 898
 899/*
 900 * sd = -sd + (sn * sm)
 901 */
 902static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
 903{
 904	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
 905}
 906
 907/*
 908 * sd = -sd - (sn * sm)
 909 */
 910static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
 911{
 912	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 913}
 914
 915/*
 916 * sd = sn * sm
 917 */
 918static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
 919{
 920	struct vfp_double vdd, vdn, vdm;
 921	u32 exceptions;
 922
 923	vfp_double_unpack(&vdn, vfp_get_double(dn));
 924	if (vdn.exponent == 0 && vdn.significand)
 925		vfp_double_normalise_denormal(&vdn);
 926
 927	vfp_double_unpack(&vdm, vfp_get_double(dm));
 928	if (vdm.exponent == 0 && vdm.significand)
 929		vfp_double_normalise_denormal(&vdm);
 930
 931	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
 932	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
 933}
 934
 935/*
 936 * sd = -(sn * sm)
 937 */
 938static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
 939{
 940	struct vfp_double vdd, vdn, vdm;
 941	u32 exceptions;
 942
 943	vfp_double_unpack(&vdn, vfp_get_double(dn));
 944	if (vdn.exponent == 0 && vdn.significand)
 945		vfp_double_normalise_denormal(&vdn);
 946
 947	vfp_double_unpack(&vdm, vfp_get_double(dm));
 948	if (vdm.exponent == 0 && vdm.significand)
 949		vfp_double_normalise_denormal(&vdm);
 950
 951	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
 952	vdd.sign = vfp_sign_negate(vdd.sign);
 953
 954	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
 955}
 956
 957/*
 958 * sd = sn + sm
 959 */
 960static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
 961{
 962	struct vfp_double vdd, vdn, vdm;
 963	u32 exceptions;
 964
 965	vfp_double_unpack(&vdn, vfp_get_double(dn));
 966	if (vdn.exponent == 0 && vdn.significand)
 967		vfp_double_normalise_denormal(&vdn);
 968
 969	vfp_double_unpack(&vdm, vfp_get_double(dm));
 970	if (vdm.exponent == 0 && vdm.significand)
 971		vfp_double_normalise_denormal(&vdm);
 972
 973	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
 974
 975	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
 976}
 977
 978/*
 979 * sd = sn - sm
 980 */
 981static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
 982{
 983	struct vfp_double vdd, vdn, vdm;
 984	u32 exceptions;
 985
 986	vfp_double_unpack(&vdn, vfp_get_double(dn));
 987	if (vdn.exponent == 0 && vdn.significand)
 988		vfp_double_normalise_denormal(&vdn);
 989
 990	vfp_double_unpack(&vdm, vfp_get_double(dm));
 991	if (vdm.exponent == 0 && vdm.significand)
 992		vfp_double_normalise_denormal(&vdm);
 993
 994	/*
 995	 * Subtraction is like addition, but with a negated operand.
 996	 */
 997	vdm.sign = vfp_sign_negate(vdm.sign);
 998
 999	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
1000
1001	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
1002}
1003
1004/*
1005 * sd = sn / sm
1006 */
1007static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
1008{
1009	struct vfp_double vdd, vdn, vdm;
1010	u32 exceptions = 0;
1011	int tm, tn;
1012
1013	vfp_double_unpack(&vdn, vfp_get_double(dn));
1014	vfp_double_unpack(&vdm, vfp_get_double(dm));
1015
1016	vdd.sign = vdn.sign ^ vdm.sign;
1017
1018	tn = vfp_double_type(&vdn);
1019	tm = vfp_double_type(&vdm);
1020
1021	/*
1022	 * Is n a NAN?
1023	 */
1024	if (tn & VFP_NAN)
1025		goto vdn_nan;
1026
1027	/*
1028	 * Is m a NAN?
1029	 */
1030	if (tm & VFP_NAN)
1031		goto vdm_nan;
1032
1033	/*
1034	 * If n and m are infinity, the result is invalid
1035	 * If n and m are zero, the result is invalid
1036	 */
1037	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1038		goto invalid;
1039
1040	/*
1041	 * If n is infinity, the result is infinity
1042	 */
1043	if (tn & VFP_INFINITY)
1044		goto infinity;
1045
1046	/*
1047	 * If m is zero, raise div0 exceptions
1048	 */
1049	if (tm & VFP_ZERO)
1050		goto divzero;
1051
1052	/*
1053	 * If m is infinity, or n is zero, the result is zero
1054	 */
1055	if (tm & VFP_INFINITY || tn & VFP_ZERO)
1056		goto zero;
1057
1058	if (tn & VFP_DENORMAL)
1059		vfp_double_normalise_denormal(&vdn);
1060	if (tm & VFP_DENORMAL)
1061		vfp_double_normalise_denormal(&vdm);
1062
1063	/*
1064	 * Ok, we have two numbers, we can perform division.
1065	 */
1066	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
1067	vdm.significand <<= 1;
1068	if (vdm.significand <= (2 * vdn.significand)) {
1069		vdn.significand >>= 1;
1070		vdd.exponent++;
1071	}
1072	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
1073	if ((vdd.significand & 0x1ff) <= 2) {
1074		u64 termh, terml, remh, reml;
1075		mul64to128(&termh, &terml, vdm.significand, vdd.significand);
1076		sub128(&remh, &reml, vdn.significand, 0, termh, terml);
1077		while ((s64)remh < 0) {
1078			vdd.significand -= 1;
1079			add128(&remh, &reml, remh, reml, 0, vdm.significand);
1080		}
1081		vdd.significand |= (reml != 0);
1082	}
1083	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
1084
1085 vdn_nan:
1086	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
1087 pack:
1088	vfp_put_double(vfp_double_pack(&vdd), dd);
1089	return exceptions;
1090
1091 vdm_nan:
1092	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
1093	goto pack;
1094
1095 zero:
1096	vdd.exponent = 0;
1097	vdd.significand = 0;
1098	goto pack;
1099
1100 divzero:
1101	exceptions = FPSCR_DZC;
1102 infinity:
1103	vdd.exponent = 2047;
1104	vdd.significand = 0;
1105	goto pack;
1106
1107 invalid:
1108	vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd);
1109	return FPSCR_IOC;
1110}
1111
1112static struct op fops[16] = {
1113	[FOP_TO_IDX(FOP_FMAC)]	= { vfp_double_fmac,  0 },
1114	[FOP_TO_IDX(FOP_FNMAC)]	= { vfp_double_fnmac, 0 },
1115	[FOP_TO_IDX(FOP_FMSC)]	= { vfp_double_fmsc,  0 },
1116	[FOP_TO_IDX(FOP_FNMSC)]	= { vfp_double_fnmsc, 0 },
1117	[FOP_TO_IDX(FOP_FMUL)]	= { vfp_double_fmul,  0 },
1118	[FOP_TO_IDX(FOP_FNMUL)]	= { vfp_double_fnmul, 0 },
1119	[FOP_TO_IDX(FOP_FADD)]	= { vfp_double_fadd,  0 },
1120	[FOP_TO_IDX(FOP_FSUB)]	= { vfp_double_fsub,  0 },
1121	[FOP_TO_IDX(FOP_FDIV)]	= { vfp_double_fdiv,  0 },
1122};
1123
1124#define FREG_BANK(x)	((x) & 0x0c)
1125#define FREG_IDX(x)	((x) & 3)
1126
1127u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1128{
1129	u32 op = inst & FOP_MASK;
1130	u32 exceptions = 0;
1131	unsigned int dest;
1132	unsigned int dn = vfp_get_dn(inst);
1133	unsigned int dm;
1134	unsigned int vecitr, veclen, vecstride;
1135	struct op *fop;
1136
1137	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
1138
1139	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1140
1141	/*
1142	 * fcvtds takes an sN register number as destination, not dN.
1143	 * It also always operates on scalars.
1144	 */
1145	if (fop->flags & OP_SD)
1146		dest = vfp_get_sd(inst);
1147	else
1148		dest = vfp_get_dd(inst);
1149
1150	/*
1151	 * f[us]ito takes a sN operand, not a dN operand.
1152	 */
1153	if (fop->flags & OP_SM)
1154		dm = vfp_get_sm(inst);
1155	else
1156		dm = vfp_get_dm(inst);
1157
1158	/*
1159	 * If destination bank is zero, vector length is always '1'.
1160	 * ARM DDI0100F C5.1.3, C5.3.2.
1161	 */
1162	if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
1163		veclen = 0;
1164	else
1165		veclen = fpscr & FPSCR_LENGTH_MASK;
1166
1167	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1168		 (veclen >> FPSCR_LENGTH_BIT) + 1);
1169
1170	if (!fop->fn)
1171		goto invalid;
1172
1173	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1174		u32 except;
1175		char type;
1176
1177		type = fop->flags & OP_SD ? 's' : 'd';
1178		if (op == FOP_EXT)
1179			pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
1180				 vecitr >> FPSCR_LENGTH_BIT,
1181				 type, dest, dn, dm);
1182		else
1183			pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
1184				 vecitr >> FPSCR_LENGTH_BIT,
1185				 type, dest, dn, FOP_TO_IDX(op), dm);
1186
1187		except = fop->fn(dest, dn, dm, fpscr);
1188		pr_debug("VFP: itr%d: exceptions=%08x\n",
1189			 vecitr >> FPSCR_LENGTH_BIT, except);
1190
1191		exceptions |= except;
1192
1193		/*
1194		 * CHECK: It appears to be undefined whether we stop when
1195		 * we encounter an exception.  We continue.
1196		 */
1197		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
1198		dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
1199		if (FREG_BANK(dm) != 0)
1200			dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
1201	}
1202	return exceptions;
1203
1204 invalid:
1205	return ~0;
1206}