src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-02-16 20:13:02 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-02-16 20:13:02 +0000
commit	b60736ec1405bb0a8dd40989f67ef4c93da068ab (patch)
tree	5c43fbb7c9fc45f0f87e0e6795a86267dbd12f9d /compiler-rt/lib/builtins/divsf3.c
parent	cfca06d7963fa0909f90483b42a6d7d194d01e08 (diff)
download	src-b60736ec1405bb0a8dd40989f67ef4c93da068ab.tar.gz src-b60736ec1405bb0a8dd40989f67ef4c93da068ab.zip

Vendor import of llvm-project main 8e464dd76bef, the last commit beforevendor/llvm-project/llvmorg-12-init-17869-g8e464dd76bef

the upstream release/12.x branch was created.

Diffstat (limited to 'compiler-rt/lib/builtins/divsf3.c')

-rw-r--r--

compiler-rt/lib/builtins/divsf3.c

174

1 files changed, 5 insertions, 169 deletions

diff --git a/compiler-rt/lib/builtins/divsf3.c b/compiler-rt/lib/builtins/divsf3.c
index 593f93b45ac2..5744c015240b 100644
--- a/compiler-rt/lib/builtins/divsf3.c
+++ b/compiler-rt/lib/builtins/divsf3.c

@@ -9,181 +9,17 @@

// This file implements single-precision soft-float division

// with the IEEE-754 default rounding (to nearest, ties to even).

-// For simplicity, this implementation currently flushes denormals to zero.

-// It should be a fairly straightforward exercise to implement gradual

-// underflow with correct rounding.

-//

//===----------------------------------------------------------------------===//

#define SINGLE_PRECISION

-#include "fp_lib.h"

-COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) {

- const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;

- const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;

- const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;

- rep_t aSignificand = toRep(a) & significandMask;

- rep_t bSignificand = toRep(b) & significandMask;

- int scale = 0;

- // Detect if a or b is zero, denormal, infinity, or NaN.

- if (aExponent - 1U >= maxExponent - 1U ||

- bExponent - 1U >= maxExponent - 1U) {

- const rep_t aAbs = toRep(a) & absMask;

- const rep_t bAbs = toRep(b) & absMask;

- // NaN / anything = qNaN

- if (aAbs > infRep)

- return fromRep(toRep(a) | quietBit);

- // anything / NaN = qNaN

- if (bAbs > infRep)

- return fromRep(toRep(b) | quietBit);

- if (aAbs == infRep) {

- // infinity / infinity = NaN

- if (bAbs == infRep)

- return fromRep(qnanRep);

- // infinity / anything else = +/- infinity

- else

- return fromRep(aAbs | quotientSign);

- }

- // anything else / infinity = +/- 0

- if (bAbs == infRep)

- return fromRep(quotientSign);

- if (!aAbs) {

- // zero / zero = NaN

- if (!bAbs)

- return fromRep(qnanRep);

- // zero / anything else = +/- zero

- else

- return fromRep(quotientSign);

- }

- // anything else / zero = +/- infinity

- if (!bAbs)

- return fromRep(infRep | quotientSign);

- // One or both of a or b is denormal. The other (if applicable) is a

- // normal number. Renormalize one or both of a and b, and set scale to

- // include the necessary exponent adjustment.

- if (aAbs < implicitBit)

- scale += normalize(&aSignificand);

- if (bAbs < implicitBit)

- scale -= normalize(&bSignificand);

- }

- // Set the implicit significand bit. If we fell through from the

- // denormal path it was already set by normalize( ), but setting it twice

- // won't hurt anything.

- aSignificand |= implicitBit;

- bSignificand |= implicitBit;

- int quotientExponent = aExponent - bExponent + scale;

- // 0x7504F333 / 2^32 + 1 = 3/4 + 1/sqrt(2)

- // Align the significand of b as a Q31 fixed-point number in the range

- // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax

- // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This

- // is accurate to about 3.5 binary digits.

- uint32_t q31b = bSignificand << 8;

- uint32_t reciprocal = UINT32_C(0x7504f333) - q31b;

- // Now refine the reciprocal estimate using a Newton-Raphson iteration:

- //

- // x1 = x0 * (2 - x0 * b)

- //

- // This doubles the number of correct binary digits in the approximation

- // with each iteration.

- uint32_t correction;

- correction = -((uint64_t)reciprocal * q31b >> 32);

- reciprocal = (uint64_t)reciprocal * correction >> 31;

- correction = -((uint64_t)reciprocal * q31b >> 32);

- reciprocal = (uint64_t)reciprocal * correction >> 31;

- correction = -((uint64_t)reciprocal * q31b >> 32);

- reciprocal = (uint64_t)reciprocal * correction >> 31;

- // Adust the final 32-bit reciprocal estimate downward to ensure that it is

- // strictly smaller than the infinitely precise exact reciprocal. Because

- // the computation of the Newton-Raphson step is truncating at every step,

- // this adjustment is small; most of the work is already done.

- reciprocal -= 2;

- // The numerical reciprocal is accurate to within 2^-28, lies in the

- // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller

- // than the true reciprocal of b. Multiplying a by this reciprocal thus

- // gives a numerical q = a/b in Q24 with the following properties:

- //

- // 1. q < a/b

- // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)

- // 3. The error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes

- // from the fact that we truncate the product, and the 2^27 term

- // is the error in the reciprocal of b scaled by the maximum

- // possible value of a. As a consequence of this error bound,

- // either q or nextafter(q) is the correctly rounded.

- rep_t quotient = (uint64_t)reciprocal * (aSignificand << 1) >> 32;

- // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).

- // In either case, we are going to compute a residual of the form

- //

- // r = a - q*b

- //

- // We know from the construction of q that r satisfies:

- //

- // 0 <= r < ulp(q)*b

- //

- // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we

- // already have the correct result. The exact halfway case cannot occur.

- // We also take this time to right shift quotient if it falls in the [1,2)

- // range and adjust the exponent accordingly.

- rep_t residual;

- if (quotient < (implicitBit << 1)) {

- residual = (aSignificand << 24) - quotient * bSignificand;

- quotientExponent--;

- } else {

- quotient >>= 1;

- residual = (aSignificand << 23) - quotient * bSignificand;

- }

- const int writtenExponent = quotientExponent + exponentBias;

- if (writtenExponent >= maxExponent) {

- // If we have overflowed the exponent, return infinity.

- return fromRep(infRep | quotientSign);

- }

+#define NUMBER_OF_HALF_ITERATIONS 0

+#define NUMBER_OF_FULL_ITERATIONS 3

+#define USE_NATIVE_FULL_ITERATIONS

- else if (writtenExponent < 1) {

- if (writtenExponent == 0) {

- // Check whether the rounded result is normal.

- const bool round = (residual << 1) > bSignificand;

- // Clear the implicit bit.

- rep_t absResult = quotient & significandMask;

- // Round.

- absResult += round;

- if (absResult & ~significandMask) {

- // The rounded result is normal; return it.

- return fromRep(absResult | quotientSign);

- }

- // Flush denormals to zero. In the future, it would be nice to add

- // code to round them correctly.

- return fromRep(quotientSign);

- }

+#include "fp_div_impl.inc"

- else {

- const bool round = (residual << 1) > bSignificand;

- // Clear the implicit bit.

- rep_t absResult = quotient & significandMask;

- // Insert the exponent.

- absResult |= (rep_t)writtenExponent << significandBits;

- // Round.

- absResult += round;

- // Insert the sign and return.

- return fromRep(absResult | quotientSign);

- }

+COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); }

#if defined(__ARM_EABI__)

#if defined(COMPILER_RT_ARMHF_TARGET)