diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2018-11-30 21:39:47 +0000 | 
|---|---|---|
| committer | Rich Felker <dalias@aerifal.cx> | 2019-04-17 23:45:25 -0400 | 
| commit | e16f7b3c02e17d0ace779a11f0d53a9c05fdd434 (patch) | |
| tree | f60943599162fe681d5753aa6744b471ec4193a9 /src | |
| parent | 2a3210cf4abff0a69ff3e7adc66591dfe6ab2197 (diff) | |
| download | musl-e16f7b3c02e17d0ace779a11f0d53a9c05fdd434.tar.gz | |
math: new exp and exp2
from https://github.com/ARM-software/optimized-routines,
commit 04884bd04eac4b251da4026900010ea7d8850edc
TOINT_INTRINSICS and EXP_USE_TOINT_NARROW cases are unused.
The underflow exception is signaled if the result is in the subnormal
range even if the result is exact (e.g. exp2(-1023.0)).
code size change: -1672 bytes.
benchmark on x86_64 before, after, speedup:
-Os:
   exp rthruput:  12.73 ns/call  6.68 ns/call 1.91x
    exp latency:  45.78 ns/call 21.79 ns/call 2.1x
  exp2 rthruput:   6.35 ns/call  5.26 ns/call 1.21x
   exp2 latency:  26.00 ns/call 16.58 ns/call 1.57x
-O3:
   exp rthruput:  12.75 ns/call  6.73 ns/call 1.89x
    exp latency:  45.91 ns/call 21.80 ns/call 2.11x
  exp2 rthruput:   6.47 ns/call  5.40 ns/call 1.2x
   exp2 latency:  26.03 ns/call 16.54 ns/call 1.57x
Diffstat (limited to 'src')
| -rw-r--r-- | src/math/exp.c | 240 | ||||
| -rw-r--r-- | src/math/exp2.c | 466 | ||||
| -rw-r--r-- | src/math/exp_data.c | 182 | ||||
| -rw-r--r-- | src/math/exp_data.h | 26 | 
4 files changed, 434 insertions, 480 deletions
diff --git a/src/math/exp.c b/src/math/exp.c index 9ea672fa..b764d73c 100644 --- a/src/math/exp.c +++ b/src/math/exp.c @@ -1,134 +1,134 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */  /* - * ==================================================== - * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * Double-precision e^x function.   * - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* exp(x) - * Returns the exponential of x. - * - * Method - *   1. Argument reduction: - *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. - *      Given x, find r and integer k such that - * - *               x = k*ln2 + r,  |r| <= 0.5*ln2. - * - *      Here r will be represented as r = hi-lo for better - *      accuracy. - * - *   2. Approximation of exp(r) by a special rational function on - *      the interval [0,0.34658]: - *      Write - *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... - *      We use a special Remez algorithm on [0,0.34658] to generate - *      a polynomial of degree 5 to approximate R. The maximum error - *      of this polynomial approximation is bounded by 2**-59. In - *      other words, - *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 - *      (where z=r*r, and the values of P1 to P5 are listed below) - *      and - *          |                  5          |     -59 - *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2 - *          |                             | - *      The computation of exp(r) thus becomes - *                              2*r - *              exp(r) = 1 + ---------- - *                            R(r) - r - *                                 r*c(r) - *                     = 1 + r + ----------- (for better accuracy) - *                                2 - c(r) - *      where - *                              2       4             10 - *              c(r) = r - (P1*r  + P2*r  + ... + P5*r   ). - * - *   3. Scale back to obtain exp(x): - *      From step 1, we have - *         exp(x) = 2^k * exp(r) - * - * Special cases: - *      exp(INF) is INF, exp(NaN) is NaN; - *      exp(-INF) is 0, and - *      for finite argument, only exp(0)=1 is exact. - * - * Accuracy: - *      according to an error analysis, the error is always less than - *      1 ulp (unit in the last place). - * - * Misc. info. - *      For IEEE double - *          if x >  709.782712893383973096 then exp(x) overflows - *          if x < -745.133219101941108420 then exp(x) underflows + * Copyright (c) 2018, Arm Limited. + * SPDX-License-Identifier: MIT   */ +#include <math.h> +#include <stdint.h>  #include "libm.h" +#include "exp_data.h" -static const double -half[2] = {0.5,-0.5}, -ln2hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ -ln2lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ -invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ -P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ -P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ -P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ -P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ -P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ +#define N (1 << EXP_TABLE_BITS) +#define InvLn2N __exp_data.invln2N +#define NegLn2hiN __exp_data.negln2hiN +#define NegLn2loN __exp_data.negln2loN +#define Shift __exp_data.shift +#define T __exp_data.tab +#define C2 __exp_data.poly[5 - EXP_POLY_ORDER] +#define C3 __exp_data.poly[6 - EXP_POLY_ORDER] +#define C4 __exp_data.poly[7 - EXP_POLY_ORDER] +#define C5 __exp_data.poly[8 - EXP_POLY_ORDER] -double exp(double x) +/* Handle cases that may overflow or underflow when computing the result that +   is scale*(1+TMP) without intermediate rounding.  The bit representation of +   scale is in SBITS, however it has a computed exponent that may have +   overflown into the sign bit so that needs to be adjusted before using it as +   a double.  (int32_t)KI is the k used in the argument reduction and exponent +   adjustment of scale, positive k here means the result may overflow and +   negative k means the result may underflow.  */ +static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)  { -	double_t hi, lo, c, xx, y; -	int k, sign; -	uint32_t hx; - -	GET_HIGH_WORD(hx, x); -	sign = hx>>31; -	hx &= 0x7fffffff;  /* high word of |x| */ +	double_t scale, y; -	/* special cases */ -	if (hx >= 0x4086232b) {  /* if |x| >= 708.39... */ -		if (isnan(x)) -			return x; -		if (x > 709.782712893383973096) { -			/* overflow if x!=inf */ -			x *= 0x1p1023; -			return x; -		} -		if (x < -708.39641853226410622) { -			/* underflow if x!=-inf */ -			FORCE_EVAL((float)(-0x1p-149/x)); -			if (x < -745.13321910194110842) -				return 0; -		} +	if ((ki & 0x80000000) == 0) { +		/* k > 0, the exponent of scale might have overflowed by <= 460.  */ +		sbits -= 1009ull << 52; +		scale = asdouble(sbits); +		y = 0x1p1009 * (scale + scale * tmp); +		return eval_as_double(y); +	} +	/* k < 0, need special care in the subnormal range.  */ +	sbits += 1022ull << 52; +	scale = asdouble(sbits); +	y = scale + scale * tmp; +	if (y < 1.0) { +		/* Round y to the right precision before scaling it into the subnormal +		 range to avoid double rounding that can cause 0.5+E/2 ulp error where +		 E is the worst-case ulp error outside the subnormal range.  So this +		 is only useful if the goal is better than 1 ulp worst-case error.  */ +		double_t hi, lo; +		lo = scale - y + scale * tmp; +		hi = 1.0 + y; +		lo = 1.0 - hi + y + lo; +		y = eval_as_double(hi + lo) - 1.0; +		/* Avoid -0.0 with downward rounding.  */ +		if (WANT_ROUNDING && y == 0.0) +			y = 0.0; +		/* The underflow exception needs to be signaled explicitly.  */ +		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);  	} +	y = 0x1p-1022 * y; +	return eval_as_double(y); +} -	/* argument reduction */ -	if (hx > 0x3fd62e42) {  /* if |x| > 0.5 ln2 */ -		if (hx >= 0x3ff0a2b2)  /* if |x| >= 1.5 ln2 */ -			k = (int)(invln2*x + half[sign]); -		else -			k = 1 - sign - sign; -		hi = x - k*ln2hi;  /* k*ln2hi is exact here */ -		lo = k*ln2lo; -		x = hi - lo; -	} else if (hx > 0x3e300000)  {  /* if |x| > 2**-28 */ -		k = 0; -		hi = x; -		lo = 0; -	} else { -		/* inexact if x!=0 */ -		FORCE_EVAL(0x1p1023 + x); -		return 1 + x; +/* Top 12 bits of a double (sign and exponent bits).  */ +static inline uint32_t top12(double x) +{ +	return asuint64(x) >> 52; +} + +double exp(double x) +{ +	uint32_t abstop; +	uint64_t ki, idx, top, sbits; +	double_t kd, z, r, r2, scale, tail, tmp; + +	abstop = top12(x) & 0x7ff; +	if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { +		if (abstop - top12(0x1p-54) >= 0x80000000) +			/* Avoid spurious underflow for tiny x.  */ +			/* Note: 0 is common input.  */ +			return WANT_ROUNDING ? 1.0 + x : 1.0; +		if (abstop >= top12(1024.0)) { +			if (asuint64(x) == asuint64(-INFINITY)) +				return 0.0; +			if (abstop >= top12(INFINITY)) +				return 1.0 + x; +			if (asuint64(x) >> 63) +				return __math_uflow(0); +			else +				return __math_oflow(0); +		} +		/* Large x is special cased below.  */ +		abstop = 0;  	} -	/* x is now in primary range */ -	xx = x*x; -	c = x - xx*(P1+xx*(P2+xx*(P3+xx*(P4+xx*P5)))); -	y = 1 + (x*c/(2-c) - lo + hi); -	if (k == 0) -		return y; -	return scalbn(y, k); +	/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)].  */ +	/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N].  */ +	z = InvLn2N * x; +#if TOINT_INTRINSICS +	kd = roundtoint(z); +	ki = converttoint(z); +#elif EXP_USE_TOINT_NARROW +	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */ +	kd = eval_as_double(z + Shift); +	ki = asuint64(kd) >> 16; +	kd = (double_t)(int32_t)ki; +#else +	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */ +	kd = eval_as_double(z + Shift); +	ki = asuint64(kd); +	kd -= Shift; +#endif +	r = x + kd * NegLn2hiN + kd * NegLn2loN; +	/* 2^(k/N) ~= scale * (1 + tail).  */ +	idx = 2 * (ki % N); +	top = ki << (52 - EXP_TABLE_BITS); +	tail = asdouble(T[idx]); +	/* This is only a valid scale when -1023*N < k < 1024*N.  */ +	sbits = T[idx + 1] + top; +	/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */ +	/* Evaluation is optimized assuming superscalar pipelined execution.  */ +	r2 = r * r; +	/* Without fma the worst case error is 0.25/N ulp larger.  */ +	/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */ +	tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); +	if (predict_false(abstop == 0)) +		return specialcase(tmp, sbits, ki); +	scale = asdouble(sbits); +	/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there +	   is no spurious underflow here even without fma.  */ +	return eval_as_double(scale + scale * tmp);  } diff --git a/src/math/exp2.c b/src/math/exp2.c index e14adba5..e0ff54bd 100644 --- a/src/math/exp2.c +++ b/src/math/exp2.c @@ -1,375 +1,121 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_exp2.c */ -/*- - * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - *    notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - *    notice, this list of conditions and the following disclaimer in the - *    documentation and/or other materials provided with the distribution. +/* + * Double-precision 2^x function.   * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * Copyright (c) 2018, Arm Limited. + * SPDX-License-Identifier: MIT   */ +#include <math.h> +#include <stdint.h>  #include "libm.h" +#include "exp_data.h" -#define TBLSIZE 256 +#define N (1 << EXP_TABLE_BITS) +#define Shift __exp_data.exp2_shift +#define T __exp_data.tab +#define C1 __exp_data.exp2_poly[0] +#define C2 __exp_data.exp2_poly[1] +#define C3 __exp_data.exp2_poly[2] +#define C4 __exp_data.exp2_poly[3] +#define C5 __exp_data.exp2_poly[4] -static const double -redux = 0x1.8p52 / TBLSIZE, -P1    = 0x1.62e42fefa39efp-1, -P2    = 0x1.ebfbdff82c575p-3, -P3    = 0x1.c6b08d704a0a6p-5, -P4    = 0x1.3b2ab88f70400p-7, -P5    = 0x1.5d88003875c74p-10; +/* Handle cases that may overflow or underflow when computing the result that +   is scale*(1+TMP) without intermediate rounding.  The bit representation of +   scale is in SBITS, however it has a computed exponent that may have +   overflown into the sign bit so that needs to be adjusted before using it as +   a double.  (int32_t)KI is the k used in the argument reduction and exponent +   adjustment of scale, positive k here means the result may overflow and +   negative k means the result may underflow.  */ +static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) +{ +	double_t scale, y; -static const double tbl[TBLSIZE * 2] = { -/*  exp2(z + eps)          eps     */ -  0x1.6a09e667f3d5dp-1,  0x1.9880p-44, -  0x1.6b052fa751744p-1,  0x1.8000p-50, -  0x1.6c012750bd9fep-1, -0x1.8780p-45, -  0x1.6cfdcddd476bfp-1,  0x1.ec00p-46, -  0x1.6dfb23c651a29p-1, -0x1.8000p-50, -  0x1.6ef9298593ae3p-1, -0x1.c000p-52, -  0x1.6ff7df9519386p-1, -0x1.fd80p-45, -  0x1.70f7466f42da3p-1, -0x1.c880p-45, -  0x1.71f75e8ec5fc3p-1,  0x1.3c00p-46, -  0x1.72f8286eacf05p-1, -0x1.8300p-44, -  0x1.73f9a48a58152p-1, -0x1.0c00p-47, -  0x1.74fbd35d7ccfcp-1,  0x1.f880p-45, -  0x1.75feb564267f1p-1,  0x1.3e00p-47, -  0x1.77024b1ab6d48p-1, -0x1.7d00p-45, -  0x1.780694fde5d38p-1, -0x1.d000p-50, -  0x1.790b938ac1d00p-1,  0x1.3000p-49, -  0x1.7a11473eb0178p-1, -0x1.d000p-49, -  0x1.7b17b0976d060p-1,  0x1.0400p-45, -  0x1.7c1ed0130c133p-1,  0x1.0000p-53, -  0x1.7d26a62ff8636p-1, -0x1.6900p-45, -  0x1.7e2f336cf4e3bp-1, -0x1.2e00p-47, -  0x1.7f3878491c3e8p-1, -0x1.4580p-45, -  0x1.80427543e1b4ep-1,  0x1.3000p-44, -  0x1.814d2add1071ap-1,  0x1.f000p-47, -  0x1.82589994ccd7ep-1, -0x1.1c00p-45, -  0x1.8364c1eb942d0p-1,  0x1.9d00p-45, -  0x1.8471a4623cab5p-1,  0x1.7100p-43, -  0x1.857f4179f5bbcp-1,  0x1.2600p-45, -  0x1.868d99b4491afp-1, -0x1.2c40p-44, -  0x1.879cad931a395p-1, -0x1.3000p-45, -  0x1.88ac7d98a65b8p-1, -0x1.a800p-45, -  0x1.89bd0a4785800p-1, -0x1.d000p-49, -  0x1.8ace5422aa223p-1,  0x1.3280p-44, -  0x1.8be05bad619fap-1,  0x1.2b40p-43, -  0x1.8cf3216b54383p-1, -0x1.ed00p-45, -  0x1.8e06a5e08664cp-1, -0x1.0500p-45, -  0x1.8f1ae99157807p-1,  0x1.8280p-45, -  0x1.902fed0282c0ep-1, -0x1.cb00p-46, -  0x1.9145b0b91ff96p-1, -0x1.5e00p-47, -  0x1.925c353aa2ff9p-1,  0x1.5400p-48, -  0x1.93737b0cdc64ap-1,  0x1.7200p-46, -  0x1.948b82b5f98aep-1, -0x1.9000p-47, -  0x1.95a44cbc852cbp-1,  0x1.5680p-45, -  0x1.96bdd9a766f21p-1, -0x1.6d00p-44, -  0x1.97d829fde4e2ap-1, -0x1.1000p-47, -  0x1.98f33e47a23a3p-1,  0x1.d000p-45, -  0x1.9a0f170ca0604p-1, -0x1.8a40p-44, -  0x1.9b2bb4d53ff89p-1,  0x1.55c0p-44, -  0x1.9c49182a3f15bp-1,  0x1.6b80p-45, -  0x1.9d674194bb8c5p-1, -0x1.c000p-49, -  0x1.9e86319e3238ep-1,  0x1.7d00p-46, -  0x1.9fa5e8d07f302p-1,  0x1.6400p-46, -  0x1.a0c667b5de54dp-1, -0x1.5000p-48, -  0x1.a1e7aed8eb8f6p-1,  0x1.9e00p-47, -  0x1.a309bec4a2e27p-1,  0x1.ad80p-45, -  0x1.a42c980460a5dp-1, -0x1.af00p-46, -  0x1.a5503b23e259bp-1,  0x1.b600p-47, -  0x1.a674a8af46213p-1,  0x1.8880p-44, -  0x1.a799e1330b3a7p-1,  0x1.1200p-46, -  0x1.a8bfe53c12e8dp-1,  0x1.6c00p-47, -  0x1.a9e6b5579fcd2p-1, -0x1.9b80p-45, -  0x1.ab0e521356fb8p-1,  0x1.b700p-45, -  0x1.ac36bbfd3f381p-1,  0x1.9000p-50, -  0x1.ad5ff3a3c2780p-1,  0x1.4000p-49, -  0x1.ae89f995ad2a3p-1, -0x1.c900p-45, -  0x1.afb4ce622f367p-1,  0x1.6500p-46, -  0x1.b0e07298db790p-1,  0x1.fd40p-45, -  0x1.b20ce6c9a89a9p-1,  0x1.2700p-46, -  0x1.b33a2b84f1a4bp-1,  0x1.d470p-43, -  0x1.b468415b747e7p-1, -0x1.8380p-44, -  0x1.b59728de5593ap-1,  0x1.8000p-54, -  0x1.b6c6e29f1c56ap-1,  0x1.ad00p-47, -  0x1.b7f76f2fb5e50p-1,  0x1.e800p-50, -  0x1.b928cf22749b2p-1, -0x1.4c00p-47, -  0x1.ba5b030a10603p-1, -0x1.d700p-47, -  0x1.bb8e0b79a6f66p-1,  0x1.d900p-47, -  0x1.bcc1e904bc1ffp-1,  0x1.2a00p-47, -  0x1.bdf69c3f3a16fp-1, -0x1.f780p-46, -  0x1.bf2c25bd71db8p-1, -0x1.0a00p-46, -  0x1.c06286141b2e9p-1, -0x1.1400p-46, -  0x1.c199bdd8552e0p-1,  0x1.be00p-47, -  0x1.c2d1cd9fa64eep-1, -0x1.9400p-47, -  0x1.c40ab5fffd02fp-1, -0x1.ed00p-47, -  0x1.c544778fafd15p-1,  0x1.9660p-44, -  0x1.c67f12e57d0cbp-1, -0x1.a100p-46, -  0x1.c7ba88988c1b6p-1, -0x1.8458p-42, -  0x1.c8f6d9406e733p-1, -0x1.a480p-46, -  0x1.ca3405751c4dfp-1,  0x1.b000p-51, -  0x1.cb720dcef9094p-1,  0x1.1400p-47, -  0x1.ccb0f2e6d1689p-1,  0x1.0200p-48, -  0x1.cdf0b555dc412p-1,  0x1.3600p-48, -  0x1.cf3155b5bab3bp-1, -0x1.6900p-47, -  0x1.d072d4a0789bcp-1,  0x1.9a00p-47, -  0x1.d1b532b08c8fap-1, -0x1.5e00p-46, -  0x1.d2f87080d8a85p-1,  0x1.d280p-46, -  0x1.d43c8eacaa203p-1,  0x1.1a00p-47, -  0x1.d5818dcfba491p-1,  0x1.f000p-50, -  0x1.d6c76e862e6a1p-1, -0x1.3a00p-47, -  0x1.d80e316c9834ep-1, -0x1.cd80p-47, -  0x1.d955d71ff6090p-1,  0x1.4c00p-48, -  0x1.da9e603db32aep-1,  0x1.f900p-48, -  0x1.dbe7cd63a8325p-1,  0x1.9800p-49, -  0x1.dd321f301b445p-1, -0x1.5200p-48, -  0x1.de7d5641c05bfp-1, -0x1.d700p-46, -  0x1.dfc97337b9aecp-1, -0x1.6140p-46, -  0x1.e11676b197d5ep-1,  0x1.b480p-47, -  0x1.e264614f5a3e7p-1,  0x1.0ce0p-43, -  0x1.e3b333b16ee5cp-1,  0x1.c680p-47, -  0x1.e502ee78b3fb4p-1, -0x1.9300p-47, -  0x1.e653924676d68p-1, -0x1.5000p-49, -  0x1.e7a51fbc74c44p-1, -0x1.7f80p-47, -  0x1.e8f7977cdb726p-1, -0x1.3700p-48, -  0x1.ea4afa2a490e8p-1,  0x1.5d00p-49, -  0x1.eb9f4867ccae4p-1,  0x1.61a0p-46, -  0x1.ecf482d8e680dp-1,  0x1.5500p-48, -  0x1.ee4aaa2188514p-1,  0x1.6400p-51, -  0x1.efa1bee615a13p-1, -0x1.e800p-49, -  0x1.f0f9c1cb64106p-1, -0x1.a880p-48, -  0x1.f252b376bb963p-1, -0x1.c900p-45, -  0x1.f3ac948dd7275p-1,  0x1.a000p-53, -  0x1.f50765b6e4524p-1, -0x1.4f00p-48, -  0x1.f6632798844fdp-1,  0x1.a800p-51, -  0x1.f7bfdad9cbe38p-1,  0x1.abc0p-48, -  0x1.f91d802243c82p-1, -0x1.4600p-50, -  0x1.fa7c1819e908ep-1, -0x1.b0c0p-47, -  0x1.fbdba3692d511p-1, -0x1.0e00p-51, -  0x1.fd3c22b8f7194p-1, -0x1.0de8p-46, -  0x1.fe9d96b2a23eep-1,  0x1.e430p-49, -  0x1.0000000000000p+0,  0x0.0000p+0, -  0x1.00b1afa5abcbep+0, -0x1.3400p-52, -  0x1.0163da9fb3303p+0, -0x1.2170p-46, -  0x1.02168143b0282p+0,  0x1.a400p-52, -  0x1.02c9a3e77806cp+0,  0x1.f980p-49, -  0x1.037d42e11bbcap+0, -0x1.7400p-51, -  0x1.04315e86e7f89p+0,  0x1.8300p-50, -  0x1.04e5f72f65467p+0, -0x1.a3f0p-46, -  0x1.059b0d315855ap+0, -0x1.2840p-47, -  0x1.0650a0e3c1f95p+0,  0x1.1600p-48, -  0x1.0706b29ddf71ap+0,  0x1.5240p-46, -  0x1.07bd42b72a82dp+0, -0x1.9a00p-49, -  0x1.0874518759bd0p+0,  0x1.6400p-49, -  0x1.092bdf66607c8p+0, -0x1.0780p-47, -  0x1.09e3ecac6f383p+0, -0x1.8000p-54, -  0x1.0a9c79b1f3930p+0,  0x1.fa00p-48, -  0x1.0b5586cf988fcp+0, -0x1.ac80p-48, -  0x1.0c0f145e46c8ap+0,  0x1.9c00p-50, -  0x1.0cc922b724816p+0,  0x1.5200p-47, -  0x1.0d83b23395dd8p+0, -0x1.ad00p-48, -  0x1.0e3ec32d3d1f3p+0,  0x1.bac0p-46, -  0x1.0efa55fdfa9a6p+0, -0x1.4e80p-47, -  0x1.0fb66affed2f0p+0, -0x1.d300p-47, -  0x1.1073028d7234bp+0,  0x1.1500p-48, -  0x1.11301d0125b5bp+0,  0x1.c000p-49, -  0x1.11edbab5e2af9p+0,  0x1.6bc0p-46, -  0x1.12abdc06c31d5p+0,  0x1.8400p-49, -  0x1.136a814f2047dp+0, -0x1.ed00p-47, -  0x1.1429aaea92de9p+0,  0x1.8e00p-49, -  0x1.14e95934f3138p+0,  0x1.b400p-49, -  0x1.15a98c8a58e71p+0,  0x1.5300p-47, -  0x1.166a45471c3dfp+0,  0x1.3380p-47, -  0x1.172b83c7d5211p+0,  0x1.8d40p-45, -  0x1.17ed48695bb9fp+0, -0x1.5d00p-47, -  0x1.18af9388c8d93p+0, -0x1.c880p-46, -  0x1.1972658375d66p+0,  0x1.1f00p-46, -  0x1.1a35beb6fcba7p+0,  0x1.0480p-46, -  0x1.1af99f81387e3p+0, -0x1.7390p-43, -  0x1.1bbe084045d54p+0,  0x1.4e40p-45, -  0x1.1c82f95281c43p+0, -0x1.a200p-47, -  0x1.1d4873168b9b2p+0,  0x1.3800p-49, -  0x1.1e0e75eb44031p+0,  0x1.ac00p-49, -  0x1.1ed5022fcd938p+0,  0x1.1900p-47, -  0x1.1f9c18438cdf7p+0, -0x1.b780p-46, -  0x1.2063b88628d8fp+0,  0x1.d940p-45, -  0x1.212be3578a81ep+0,  0x1.8000p-50, -  0x1.21f49917ddd41p+0,  0x1.b340p-45, -  0x1.22bdda2791323p+0,  0x1.9f80p-46, -  0x1.2387a6e7561e7p+0, -0x1.9c80p-46, -  0x1.2451ffb821427p+0,  0x1.2300p-47, -  0x1.251ce4fb2a602p+0, -0x1.3480p-46, -  0x1.25e85711eceb0p+0,  0x1.2700p-46, -  0x1.26b4565e27d16p+0,  0x1.1d00p-46, -  0x1.2780e341de00fp+0,  0x1.1ee0p-44, -  0x1.284dfe1f5633ep+0, -0x1.4c00p-46, -  0x1.291ba7591bb30p+0, -0x1.3d80p-46, -  0x1.29e9df51fdf09p+0,  0x1.8b00p-47, -  0x1.2ab8a66d10e9bp+0, -0x1.27c0p-45, -  0x1.2b87fd0dada3ap+0,  0x1.a340p-45, -  0x1.2c57e39771af9p+0, -0x1.0800p-46, -  0x1.2d285a6e402d9p+0, -0x1.ed00p-47, -  0x1.2df961f641579p+0, -0x1.4200p-48, -  0x1.2ecafa93e2ecfp+0, -0x1.4980p-45, -  0x1.2f9d24abd8822p+0, -0x1.6300p-46, -  0x1.306fe0a31b625p+0, -0x1.2360p-44, -  0x1.31432edeea50bp+0, -0x1.0df8p-40, -  0x1.32170fc4cd7b8p+0, -0x1.2480p-45, -  0x1.32eb83ba8e9a2p+0, -0x1.5980p-45, -  0x1.33c08b2641766p+0,  0x1.ed00p-46, -  0x1.3496266e3fa27p+0, -0x1.c000p-50, -  0x1.356c55f929f0fp+0, -0x1.0d80p-44, -  0x1.36431a2de88b9p+0,  0x1.2c80p-45, -  0x1.371a7373aaa39p+0,  0x1.0600p-45, -  0x1.37f26231e74fep+0, -0x1.6600p-46, -  0x1.38cae6d05d838p+0, -0x1.ae00p-47, -  0x1.39a401b713ec3p+0, -0x1.4720p-43, -  0x1.3a7db34e5a020p+0,  0x1.8200p-47, -  0x1.3b57fbfec6e95p+0,  0x1.e800p-44, -  0x1.3c32dc313a8f2p+0,  0x1.f800p-49, -  0x1.3d0e544ede122p+0, -0x1.7a00p-46, -  0x1.3dea64c1234bbp+0,  0x1.6300p-45, -  0x1.3ec70df1c4eccp+0, -0x1.8a60p-43, -  0x1.3fa4504ac7e8cp+0, -0x1.cdc0p-44, -  0x1.40822c367a0bbp+0,  0x1.5b80p-45, -  0x1.4160a21f72e95p+0,  0x1.ec00p-46, -  0x1.423fb27094646p+0, -0x1.3600p-46, -  0x1.431f5d950a920p+0,  0x1.3980p-45, -  0x1.43ffa3f84b9ebp+0,  0x1.a000p-48, -  0x1.44e0860618919p+0, -0x1.6c00p-48, -  0x1.45c2042a7d201p+0, -0x1.bc00p-47, -  0x1.46a41ed1d0016p+0, -0x1.2800p-46, -  0x1.4786d668b3326p+0,  0x1.0e00p-44, -  0x1.486a2b5c13c00p+0, -0x1.d400p-45, -  0x1.494e1e192af04p+0,  0x1.c200p-47, -  0x1.4a32af0d7d372p+0, -0x1.e500p-46, -  0x1.4b17dea6db801p+0,  0x1.7800p-47, -  0x1.4bfdad53629e1p+0, -0x1.3800p-46, -  0x1.4ce41b817c132p+0,  0x1.0800p-47, -  0x1.4dcb299fddddbp+0,  0x1.c700p-45, -  0x1.4eb2d81d8ab96p+0, -0x1.ce00p-46, -  0x1.4f9b2769d2d02p+0,  0x1.9200p-46, -  0x1.508417f4531c1p+0, -0x1.8c00p-47, -  0x1.516daa2cf662ap+0, -0x1.a000p-48, -  0x1.5257de83f51eap+0,  0x1.a080p-43, -  0x1.5342b569d4edap+0, -0x1.6d80p-45, -  0x1.542e2f4f6ac1ap+0, -0x1.2440p-44, -  0x1.551a4ca5d94dbp+0,  0x1.83c0p-43, -  0x1.56070dde9116bp+0,  0x1.4b00p-45, -  0x1.56f4736b529dep+0,  0x1.15a0p-43, -  0x1.57e27dbe2c40ep+0, -0x1.9e00p-45, -  0x1.58d12d497c76fp+0, -0x1.3080p-45, -  0x1.59c0827ff0b4cp+0,  0x1.dec0p-43, -  0x1.5ab07dd485427p+0, -0x1.4000p-51, -  0x1.5ba11fba87af4p+0,  0x1.0080p-44, -  0x1.5c9268a59460bp+0, -0x1.6c80p-45, -  0x1.5d84590998e3fp+0,  0x1.69a0p-43, -  0x1.5e76f15ad20e1p+0, -0x1.b400p-46, -  0x1.5f6a320dcebcap+0,  0x1.7700p-46, -  0x1.605e1b976dcb8p+0,  0x1.6f80p-45, -  0x1.6152ae6cdf715p+0,  0x1.1000p-47, -  0x1.6247eb03a5531p+0, -0x1.5d00p-46, -  0x1.633dd1d1929b5p+0, -0x1.2d00p-46, -  0x1.6434634ccc313p+0, -0x1.a800p-49, -  0x1.652b9febc8efap+0, -0x1.8600p-45, -  0x1.6623882553397p+0,  0x1.1fe0p-40, -  0x1.671c1c708328ep+0, -0x1.7200p-44, -  0x1.68155d44ca97ep+0,  0x1.6800p-49, -  0x1.690f4b19e9471p+0, -0x1.9780p-45, -}; +	if ((ki & 0x80000000) == 0) { +		/* k > 0, the exponent of scale might have overflowed by 1.  */ +		sbits -= 1ull << 52; +		scale = asdouble(sbits); +		y = 2 * (scale + scale * tmp); +		return eval_as_double(y); +	} +	/* k < 0, need special care in the subnormal range.  */ +	sbits += 1022ull << 52; +	scale = asdouble(sbits); +	y = scale + scale * tmp; +	if (y < 1.0) { +		/* Round y to the right precision before scaling it into the subnormal +		   range to avoid double rounding that can cause 0.5+E/2 ulp error where +		   E is the worst-case ulp error outside the subnormal range.  So this +		   is only useful if the goal is better than 1 ulp worst-case error.  */ +		double_t hi, lo; +		lo = scale - y + scale * tmp; +		hi = 1.0 + y; +		lo = 1.0 - hi + y + lo; +		y = eval_as_double(hi + lo) - 1.0; +		/* Avoid -0.0 with downward rounding.  */ +		if (WANT_ROUNDING && y == 0.0) +			y = 0.0; +		/* The underflow exception needs to be signaled explicitly.  */ +		fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022); +	} +	y = 0x1p-1022 * y; +	return eval_as_double(y); +} + +/* Top 12 bits of a double (sign and exponent bits).  */ +static inline uint32_t top12(double x) +{ +	return asuint64(x) >> 52; +} -/* - * exp2(x): compute the base 2 exponential of x - * - * Accuracy: Peak error < 0.503 ulp for normalized results. - * - * Method: (accurate tables) - * - *   Reduce x: - *     x = k + y, for integer k and |y| <= 1/2. - *     Thus we have exp2(x) = 2**k * exp2(y). - * - *   Reduce y: - *     y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE. - *     Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]), - *     with |z - eps[i]| <= 2**-9 + 2**-39 for the table used. - * - *   We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via - *   a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61. - *   The values in exp2t[] and eps[] are chosen such that - *   exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such - *   that exp2t[i] is accurate to 2**-64. - * - *   Note that the range of i is +-TBLSIZE/2, so we actually index the tables - *   by i0 = i + TBLSIZE/2.  For cache efficiency, exp2t[] and eps[] are - *   virtual tables, interleaved in the real table tbl[]. - * - *   This method is due to Gal, with many details due to Gal and Bachelis: - * - *      Gal, S. and Bachelis, B.  An Accurate Elementary Mathematical Library - *      for the IEEE Floating Point Standard.  TOMS 17(1), 26-46 (1991). - */  double exp2(double x)  { -	double_t r, t, z; -	uint32_t ix, i0; -	union {double f; uint64_t i;} u = {x}; -	union {uint32_t u; int32_t i;} k; +	uint32_t abstop; +	uint64_t ki, idx, top, sbits; +	double_t kd, r, r2, scale, tail, tmp; -	/* Filter out exceptional cases. */ -	ix = u.i>>32 & 0x7fffffff; -	if (ix >= 0x408ff000) {  /* |x| >= 1022 or nan */ -		if (ix >= 0x40900000 && u.i>>63 == 0) {  /* x >= 1024 or nan */ -			/* overflow */ -			x *= 0x1p1023; -			return x; -		} -		if (ix >= 0x7ff00000)  /* -inf or -nan */ -			return -1/x; -		if (u.i>>63) {  /* x <= -1022 */ -			/* underflow */ -			if (x <= -1075 || x - 0x1p52 + 0x1p52 != x) -				FORCE_EVAL((float)(-0x1p-149/x)); -			if (x <= -1075) -				return 0; +	abstop = top12(x) & 0x7ff; +	if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { +		if (abstop - top12(0x1p-54) >= 0x80000000) +			/* Avoid spurious underflow for tiny x.  */ +			/* Note: 0 is common input.  */ +			return WANT_ROUNDING ? 1.0 + x : 1.0; +		if (abstop >= top12(1024.0)) { +			if (asuint64(x) == asuint64(-INFINITY)) +				return 0.0; +			if (abstop >= top12(INFINITY)) +				return 1.0 + x; +			if (!(asuint64(x) >> 63)) +				return __math_oflow(0); +			else if (asuint64(x) >= asuint64(-1075.0)) +				return __math_uflow(0);  		} -	} else if (ix < 0x3c900000) {  /* |x| < 0x1p-54 */ -		return 1.0 + x; +		if (2 * asuint64(x) > 2 * asuint64(928.0)) +			/* Large x is special cased below.  */ +			abstop = 0;  	} -	/* Reduce x, computing z, i0, and k. */ -	u.f = x + redux; -	i0 = u.i; -	i0 += TBLSIZE / 2; -	k.u = i0 / TBLSIZE * TBLSIZE; -	k.i /= TBLSIZE; -	i0 %= TBLSIZE; -	u.f -= redux; -	z = x - u.f; - -	/* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ -	t = tbl[2*i0];       /* exp2t[i0] */ -	z -= tbl[2*i0 + 1];  /* eps[i0]   */ -	r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5)))); - -	return scalbn(r, k.i); +	/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */ +	/* x = k/N + r, with int k and r in [-1/2N, 1/2N].  */ +	kd = eval_as_double(x + Shift); +	ki = asuint64(kd); /* k.  */ +	kd -= Shift; /* k/N for int k.  */ +	r = x - kd; +	/* 2^(k/N) ~= scale * (1 + tail).  */ +	idx = 2 * (ki % N); +	top = ki << (52 - EXP_TABLE_BITS); +	tail = asdouble(T[idx]); +	/* This is only a valid scale when -1023*N < k < 1024*N.  */ +	sbits = T[idx + 1] + top; +	/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */ +	/* Evaluation is optimized assuming superscalar pipelined execution.  */ +	r2 = r * r; +	/* Without fma the worst case error is 0.5/N ulp larger.  */ +	/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */ +	tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); +	if (predict_false(abstop == 0)) +		return specialcase(tmp, sbits, ki); +	scale = asdouble(sbits); +	/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there +	   is no spurious underflow here even without fma.  */ +	return eval_as_double(scale + scale * tmp);  } diff --git a/src/math/exp_data.c b/src/math/exp_data.c new file mode 100644 index 00000000..21be0146 --- /dev/null +++ b/src/math/exp_data.c @@ -0,0 +1,182 @@ +/* + * Shared data between exp, exp2 and pow. + * + * Copyright (c) 2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "exp_data.h" + +#define N (1 << EXP_TABLE_BITS) + +const struct exp_data __exp_data = { +// N/ln2 +.invln2N = 0x1.71547652b82fep0 * N, +// -ln2/N +.negln2hiN = -0x1.62e42fefa0000p-8, +.negln2loN = -0x1.cf79abc9e3b3ap-47, +// Used for rounding when !TOINT_INTRINSICS +#if EXP_USE_TOINT_NARROW +.shift = 0x1800000000.8p0, +#else +.shift = 0x1.8p52, +#endif +// exp polynomial coefficients. +.poly = { +// abs error: 1.555*2^-66 +// ulp error: 0.509 (0.511 without fma) +// if |x| < ln2/256+eps +// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65 +// abs error if |x| < ln2/128: 1.7145*2^-56 +0x1.ffffffffffdbdp-2, +0x1.555555555543cp-3, +0x1.55555cf172b91p-5, +0x1.1111167a4d017p-7, +}, +.exp2_shift = 0x1.8p52 / N, +// exp2 polynomial coefficients. +.exp2_poly = { +// abs error: 1.2195*2^-65 +// ulp error: 0.507 (0.511 without fma) +// if |x| < 1/256 +// abs error if |x| < 1/128: 1.9941*2^-56 +0x1.62e42fefa39efp-1, +0x1.ebfbdff82c424p-3, +0x1.c6b08d70cf4b5p-5, +0x1.3b2abd24650ccp-7, +0x1.5d7e09b4e3a84p-10, +}, +// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N) +// tab[2*k] = asuint64(T[k]) +// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N +.tab = { +0x0, 0x3ff0000000000000, +0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335, +0xbc7160139cd8dc5d, 0x3fefec9a3e778061, +0xbc905e7a108766d1, 0x3fefe315e86e7f85, +0x3c8cd2523567f613, 0x3fefd9b0d3158574, +0xbc8bce8023f98efa, 0x3fefd06b29ddf6de, +0x3c60f74e61e6c861, 0x3fefc74518759bc8, +0x3c90a3e45b33d399, 0x3fefbe3ecac6f383, +0x3c979aa65d837b6d, 0x3fefb5586cf9890f, +0x3c8eb51a92fdeffc, 0x3fefac922b7247f7, +0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2, +0xbc6a033489906e0b, 0x3fef9b66affed31b, +0xbc9556522a2fbd0e, 0x3fef9301d0125b51, +0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc, +0xbc91c923b9d5f416, 0x3fef829aaea92de0, +0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51, +0xbc801b15eaa59348, 0x3fef72b83c7d517b, +0xbc8f1ff055de323d, 0x3fef6af9388c8dea, +0x3c8b898c3f1353bf, 0x3fef635beb6fcb75, +0xbc96d99c7611eb26, 0x3fef5be084045cd4, +0x3c9aecf73e3a2f60, 0x3fef54873168b9aa, +0xbc8fe782cb86389d, 0x3fef4d5022fcd91d, +0x3c8a6f4144a6c38d, 0x3fef463b88628cd6, +0x3c807a05b0e4047d, 0x3fef3f49917ddc96, +0x3c968efde3a8a894, 0x3fef387a6e756238, +0x3c875e18f274487d, 0x3fef31ce4fb2a63f, +0x3c80472b981fe7f2, 0x3fef2b4565e27cdd, +0xbc96b87b3f71085e, 0x3fef24dfe1f56381, +0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1, +0xbc3d219b1a6fbffa, 0x3fef187fd0dad990, +0x3c8b3782720c0ab4, 0x3fef1285a6e4030b, +0x3c6e149289cecb8f, 0x3fef0cafa93e2f56, +0x3c834d754db0abb6, 0x3fef06fe0a31b715, +0x3c864201e2ac744c, 0x3fef0170fc4cd831, +0x3c8fdd395dd3f84a, 0x3feefc08b26416ff, +0xbc86a3803b8e5b04, 0x3feef6c55f929ff1, +0xbc924aedcc4b5068, 0x3feef1a7373aa9cb, +0xbc9907f81b512d8e, 0x3feeecae6d05d866, +0xbc71d1e83e9436d2, 0x3feee7db34e59ff7, +0xbc991919b3ce1b15, 0x3feee32dc313a8e5, +0x3c859f48a72a4c6d, 0x3feedea64c123422, +0xbc9312607a28698a, 0x3feeda4504ac801c, +0xbc58a78f4817895b, 0x3feed60a21f72e2a, +0xbc7c2c9b67499a1b, 0x3feed1f5d950a897, +0x3c4363ed60c2ac11, 0x3feece086061892d, +0x3c9666093b0664ef, 0x3feeca41ed1d0057, +0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0, +0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de, +0x3c7690cebb7aafb0, 0x3feebfdad5362a27, +0x3c931dbdeb54e077, 0x3feebcb299fddd0d, +0xbc8f94340071a38e, 0x3feeb9b2769d2ca7, +0xbc87deccdc93a349, 0x3feeb6daa2cf6642, +0xbc78dec6bd0f385f, 0x3feeb42b569d4f82, +0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f, +0x3c93350518fdd78e, 0x3feeaf4736b527da, +0x3c7b98b72f8a9b05, 0x3feead12d497c7fd, +0x3c9063e1e21c5409, 0x3feeab07dd485429, +0x3c34c7855019c6ea, 0x3feea9268a5946b7, +0x3c9432e62b64c035, 0x3feea76f15ad2148, +0xbc8ce44a6199769f, 0x3feea5e1b976dc09, +0xbc8c33c53bef4da8, 0x3feea47eb03a5585, +0xbc845378892be9ae, 0x3feea34634ccc320, +0xbc93cedd78565858, 0x3feea23882552225, +0x3c5710aa807e1964, 0x3feea155d44ca973, +0xbc93b3efbf5e2228, 0x3feea09e667f3bcd, +0xbc6a12ad8734b982, 0x3feea012750bdabf, +0xbc6367efb86da9ee, 0x3fee9fb23c651a2f, +0xbc80dc3d54e08851, 0x3fee9f7df9519484, +0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74, +0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174, +0xbc8619321e55e68a, 0x3fee9feb564267c9, +0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f, +0xbc7b32dcb94da51d, 0x3feea11473eb0187, +0x3c94ecfd5467c06b, 0x3feea1ed0130c132, +0x3c65ebe1abd66c55, 0x3feea2f336cf4e62, +0xbc88a1c52fb3cf42, 0x3feea427543e1a12, +0xbc9369b6f13b3734, 0x3feea589994cce13, +0xbc805e843a19ff1e, 0x3feea71a4623c7ad, +0xbc94d450d872576e, 0x3feea8d99b4492ed, +0x3c90ad675b0e8a00, 0x3feeaac7d98a6699, +0x3c8db72fc1f0eab4, 0x3feeace5422aa0db, +0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c, +0x3c7bf68359f35f44, 0x3feeb1ae99157736, +0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6, +0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5, +0xbc6c23f97c90b959, 0x3feeba44cbc8520f, +0xbc92434322f4f9aa, 0x3feebd829fde4e50, +0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba, +0x3c71affc2b91ce27, 0x3feec49182a3f090, +0x3c6dd235e10a73bb, 0x3feec86319e32323, +0xbc87c50422622263, 0x3feecc667b5de565, +0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33, +0xbc91bbd1d3bcbb15, 0x3feed503b23e255d, +0x3c90cc319cee31d2, 0x3feed99e1330b358, +0x3c8469846e735ab3, 0x3feede6b5579fdbf, +0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a, +0x3c8c1a7792cb3387, 0x3feee89f995ad3ad, +0xbc907b8f4ad1d9fa, 0x3feeee07298db666, +0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb, +0xbc90a40e3da6f640, 0x3feef9728de5593a, +0xbc68d6f438ad9334, 0x3feeff76f2fb5e47, +0xbc91eee26b588a35, 0x3fef05b030a1064a, +0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2, +0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09, +0x3c736eae30af0cb3, 0x3fef199bdd85529c, +0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a, +0x3c84e08fd10959ac, 0x3fef27f12e57d14b, +0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5, +0x3c676b2c6c921968, 0x3fef3720dcef9069, +0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa, +0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c, +0xbc900dae3875a949, 0x3fef4f87080d89f2, +0x3c74a385a63d07a7, 0x3fef5818dcfba487, +0xbc82919e2040220f, 0x3fef60e316c98398, +0x3c8e5a50d5c192ac, 0x3fef69e603db3285, +0x3c843a59ac016b4b, 0x3fef7321f301b460, +0xbc82d52107b43e1f, 0x3fef7c97337b9b5f, +0xbc892ab93b470dc9, 0x3fef864614f5a129, +0x3c74b604603a88d3, 0x3fef902ee78b3ff6, +0x3c83c5ec519d7271, 0x3fef9a51fbc74c83, +0xbc8ff7128fd391f0, 0x3fefa4afa2a490da, +0xbc8dae98e223747d, 0x3fefaf482d8e67f1, +0x3c8ec3bc41aa2008, 0x3fefba1bee615a27, +0x3c842b94c3a9eb32, 0x3fefc52b376bba97, +0x3c8a64a931d185ee, 0x3fefd0765b6e4540, +0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14, +0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8, +0x3c5305c14160cc89, 0x3feff3c22b8f71f1, +}, +}; diff --git a/src/math/exp_data.h b/src/math/exp_data.h new file mode 100644 index 00000000..3e24bac5 --- /dev/null +++ b/src/math/exp_data.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#ifndef _EXP_DATA_H +#define _EXP_DATA_H + +#include <features.h> +#include <stdint.h> + +#define EXP_TABLE_BITS 7 +#define EXP_POLY_ORDER 5 +#define EXP_USE_TOINT_NARROW 0 +#define EXP2_POLY_ORDER 5 +extern hidden const struct exp_data { +	double invln2N; +	double shift; +	double negln2hiN; +	double negln2loN; +	double poly[4]; /* Last four coefficients.  */ +	double exp2_shift; +	double exp2_poly[EXP2_POLY_ORDER]; +	uint64_t tab[2*(1 << EXP_TABLE_BITS)]; +} __exp_data; + +#endif  | 
