From bbbf045ce96fe5daae7e220487dc44c9d971bd9d Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 17 Nov 2012 23:22:41 +0100
Subject: math: exp.c clean up

overflow and underflow was incorrect when the result was not stored.
an optimization for the 0.5*ln2 < |x| < 1.5*ln2 domain was removed.
did various cleanups around static constants and made the comments
consistent with the code.
---
 src/math/exp.c | 121 +++++++++++++++++++++++----------------------------------
 1 file changed, 49 insertions(+), 72 deletions(-)

diff --git a/src/math/exp.c b/src/math/exp.c
index 29bf9609..5c0edee4 100644
--- a/src/math/exp.c
+++ b/src/math/exp.c
@@ -25,7 +25,7 @@
  *      the interval [0,0.34658]:
  *      Write
  *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
- *      We use a special Remes algorithm on [0,0.34658] to generate
+ *      We use a special Remez algorithm on [0,0.34658] to generate
  *      a polynomial of degree 5 to approximate R. The maximum error
  *      of this polynomial approximation is bounded by 2**-59. In
  *      other words,
@@ -36,15 +36,15 @@
  *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
  *          |                             |
  *      The computation of exp(r) thus becomes
- *                             2*r
- *              exp(r) = 1 + -------
- *                            R - r
- *                                 r*R1(r)
+ *                              2*r
+ *              exp(r) = 1 + ----------
+ *                            R(r) - r
+ *                                 r*c(r)
  *                     = 1 + r + ----------- (for better accuracy)
- *                                2 - R1(r)
+ *                                2 - c(r)
  *      where
- *                               2       4             10
- *              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
+ *                              2       4             10
+ *              c(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
  *
  *   3. Scale back to obtain exp(x):
  *      From step 1, we have
@@ -61,27 +61,16 @@
  *
  * Misc. info.
  *      For IEEE double
- *          if x >  7.09782712893383973096e+02 then exp(x) overflow
- *          if x < -7.45133219101941108420e+02 then exp(x) underflow
- *
- * Constants:
- * The hexadecimal values are the intended ones for the following
- * constants. The decimal values may be used, provided that the
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
+ *          if x >  709.782712893383973096 then exp(x) overflows
+ *          if x < -745.133219101941108420 then exp(x) underflows
  */
 
 #include "libm.h"
 
 static const double
-halF[2] = {0.5,-0.5,},
-huge    = 1.0e+300,
-o_threshold =  7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
-u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
-ln2HI[2]   = { 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
-              -6.93147180369123816490e-01},/* 0xbfe62e42, 0xfee00000 */
-ln2LO[2]   = { 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
-              -1.90821492927058770002e-10},/* 0xbdea39ef, 0x35793c76 */
+half[2] = {0.5,-0.5},
+ln2hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
+ln2lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
 P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
 P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
@@ -89,68 +78,56 @@ P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
 P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
 P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
 
-static const volatile double
-twom1000 = 9.33263618503218878990e-302; /* 2**-1000=0x01700000,0 */
-
 double exp(double x)
 {
-	double y,hi=0.0,lo=0.0,c,t,twopk;
-	int32_t k=0,xsb;
+	double hi, lo, c, z;
+	int k, sign;
 	uint32_t hx;
 
 	GET_HIGH_WORD(hx, x);
-	xsb = (hx>>31)&1;  /* sign bit of x */
+	sign = hx>>31;
 	hx &= 0x7fffffff;  /* high word of |x| */
 
-	/* filter out non-finite argument */
-	if (hx >= 0x40862E42) {  /* if |x| >= 709.78... */
-		if (hx >= 0x7ff00000) {
-			uint32_t lx;
-	
-			GET_LOW_WORD(lx,x);
-			if (((hx&0xfffff)|lx) != 0)  /* NaN */
-				 return x+x;
-			return xsb==0 ? x : 0.0;  /* exp(+-inf)={inf,0} */
+	/* special cases */
+	if (hx >= 0x40862e42) {  /* if |x| >= 709.78... */
+		if (isnan(x))
+			return x;
+		if (x > 709.782712893383973096) {
+			/* overflow if x!=inf */
+			STRICT_ASSIGN(double, x, 0x1p1023 * x);
+			return x;
+		}
+		if (x < -745.13321910194110842) {
+			/* underflow if x!=-inf */
+			STRICT_ASSIGN(double, x, 0x1p-1000 / -x * 0x1p-1000);
+			return x;
 		}
-		if (x > o_threshold)
-			return huge*huge; /* overflow */
-		if (x < u_threshold)
-			return twom1000*twom1000; /* underflow */
 	}
 
 	/* argument reduction */
-	if (hx > 0x3fd62e42) {  /* if  |x| > 0.5 ln2 */
-		if (hx < 0x3FF0A2B2) {  /* and |x| < 1.5 ln2 */
-			hi = x-ln2HI[xsb];
-			lo = ln2LO[xsb];
-			k = 1 - xsb - xsb;
-		} else {
-			k  = (int)(invln2*x+halF[xsb]);
-			t  = k;
-			hi = x - t*ln2HI[0];  /* t*ln2HI is exact here */
-			lo = t*ln2LO[0];
-		}
+	if (hx > 0x3fd62e42) {  /* if |x| > 0.5 ln2 */
+		if (hx < 0x3ff0a2b2)  /* if |x| < 1.5 ln2 */
+			k = 1 - sign - sign; /* optimization */
+		else
+			k = (int)(invln2*x + half[sign]);
+		hi = x - k*ln2hi;  /* k*ln2hi is exact here */
+		lo = k*ln2lo;
 		STRICT_ASSIGN(double, x, hi - lo);
-	} else if(hx < 0x3e300000)  {  /* |x| < 2**-28 */
-		/* raise inexact */
-		if (huge+x > 1.0)
-			return 1.0+x;
-	} else
+	} else if (hx > 0x3e300000)  {  /* if |x| > 2**-28 */
 		k = 0;
+		hi = x;
+		lo = 0;
+	} else {
+		/* inexact if x!=0 */
+		FORCE_EVAL(0x1p1023 + x);
+		return 1 + x;
+	}
 
 	/* x is now in primary range */
-	t  = x*x;
-	if (k >= -1021)
-		INSERT_WORDS(twopk, 0x3ff00000+(k<<20), 0);
-	else
-		INSERT_WORDS(twopk, 0x3ff00000+((k+1000)<<20), 0);
-	c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
+	z = x*x;
+	c = x - z*(P1+z*(P2+z*(P3+z*(P4+z*P5))));
+	x = 1 + ((x*c/(2-c) - lo) + hi);
 	if (k == 0)
-		return 1.0 - ((x*c)/(c-2.0) - x);
-	y = 1.0-((lo-(x*c)/(2.0-c))-hi);
-	if (k < -1021)
-		return y*twopk*twom1000;
-	if (k == 1024)
-		return y*2.0*0x1p1023;
-	return y*twopk;
+		return x;
+	return scalbn(x, k);
 }
-- 
cgit v1.2.1


From 159c7655d06f04aa56a57385d633699c4c63d72c Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 17 Nov 2012 23:39:39 +0100
Subject: math: cleanup exp2.c exp2f.c and exp2l.c

* old code relied on sign extension on right shift
* exp2l ld64 wrapper was wrong
* use scalbn instead of bithacks
---
 src/math/exp2.c  | 53 +++++++++++++++++++++-----------------------------
 src/math/exp2f.c | 30 ++++++++++++++--------------
 src/math/exp2l.c | 59 +++++++++++++++++++-------------------------------------
 3 files changed, 56 insertions(+), 86 deletions(-)

diff --git a/src/math/exp2.c b/src/math/exp2.c
index 08c21a66..8e252280 100644
--- a/src/math/exp2.c
+++ b/src/math/exp2.c
@@ -27,11 +27,9 @@
 
 #include "libm.h"
 
-#define TBLBITS 8
-#define TBLSIZE (1 << TBLBITS)
+#define TBLSIZE 256
 
 static const double
-huge  = 0x1p1000,
 redux = 0x1.8p52 / TBLSIZE,
 P1    = 0x1.62e42fefa39efp-1,
 P2    = 0x1.ebfbdff82c575p-3,
@@ -39,8 +37,6 @@ P3    = 0x1.c6b08d704a0a6p-5,
 P4    = 0x1.3b2ab88f70400p-7,
 P5    = 0x1.5d88003875c74p-10;
 
-static const volatile double twom1000 = 0x1p-1000;
-
 static const double tbl[TBLSIZE * 2] = {
 /*  exp2(z + eps)          eps     */
   0x1.6a09e667f3d5dp-1,  0x1.9880p-44,
@@ -334,25 +330,28 @@ static const double tbl[TBLSIZE * 2] = {
  */
 double exp2(double x)
 {
-	double r, t, twopk, twopkp1000, z;
-	uint32_t hx, ix, lx, i0;
-	int k;
+	double r, t, z;
+	uint32_t hx, ix, i0;
+	union {uint32_t u; int32_t i;} k;
 
 	/* Filter out exceptional cases. */
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	if (ix >= 0x40900000) {        /* |x| >= 1024 */
 		if (ix >= 0x7ff00000) {
-			GET_LOW_WORD(lx, x);
-			if (((ix & 0xfffff) | lx) != 0 || (hx & 0x80000000) == 0)
-				return x + x; /* x is NaN or +Inf */
-			else
-				return 0.0;   /* x is -Inf */
+			GET_LOW_WORD(ix, x);
+			if (hx == 0xfff00000 && ix == 0) /* -inf */
+				return 0;
+			return x;
+		}
+		if (x >= 1024) {
+			STRICT_ASSIGN(double, x, x * 0x1p1023);
+			return x;
+		}
+		if (x <= -1075) {
+			STRICT_ASSIGN(double, x, 0x1p-1000*0x1p-1000);
+			return x;
 		}
-		if (x >= 0x1.0p10)
-			return huge * huge; /* overflow */
-		if (x <= -0x1.0ccp10)
-			return twom1000 * twom1000; /* underflow */
 	} else if (ix < 0x3c900000) {  /* |x| < 0x1p-54 */
 		return 1.0 + x;
 	}
@@ -361,24 +360,16 @@ double exp2(double x)
 	STRICT_ASSIGN(double, t, x + redux);
 	GET_LOW_WORD(i0, t);
 	i0 += TBLSIZE / 2;
-	k = (i0 >> TBLBITS) << 20;
-	i0 = (i0 & (TBLSIZE - 1)) << 1;
+	k.u = i0 / TBLSIZE * TBLSIZE;
+	k.i /= TBLSIZE;
+	i0 %= TBLSIZE;
 	t -= redux;
 	z = x - t;
 
 	/* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
-	t = tbl[i0];       /* exp2t[i0] */
-	z -= tbl[i0 + 1];  /* eps[i0]   */
-	if (k >= -1021 << 20)
-		INSERT_WORDS(twopk, 0x3ff00000 + k, 0);
-	else
-		INSERT_WORDS(twopkp1000, 0x3ff00000 + k + (1000 << 20), 0);
+	t = tbl[2*i0];       /* exp2t[i0] */
+	z -= tbl[2*i0 + 1];  /* eps[i0]   */
 	r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
 
-	/* Scale by 2**(k>>20). */
-	if (k < -1021 << 20)
-		return r * twopkp1000 * twom1000;
-	if (k == 1024 << 20)
-		return r * 2.0 * 0x1p1023;
-	return r * twopk;
+	return scalbn(r, k.i);
 }
diff --git a/src/math/exp2f.c b/src/math/exp2f.c
index 55c22eac..279f32de 100644
--- a/src/math/exp2f.c
+++ b/src/math/exp2f.c
@@ -27,19 +27,15 @@
 
 #include "libm.h"
 
-#define TBLBITS 4
-#define TBLSIZE (1 << TBLBITS)
+#define TBLSIZE 16
 
 static const float
-huge  = 0x1p100f,
 redux = 0x1.8p23f / TBLSIZE,
 P1    = 0x1.62e430p-1f,
 P2    = 0x1.ebfbe0p-3f,
 P3    = 0x1.c6b348p-5f,
 P4    = 0x1.3b2c9cp-7f;
 
-static const volatile float twom100 = 0x1p-100f;
-
 static const double exp2ft[TBLSIZE] = {
   0x1.6a09e667f3bcdp-1,
   0x1.7a11473eb0187p-1,
@@ -89,23 +85,25 @@ float exp2f(float x)
 {
 	double tv, twopk, u, z;
 	float t;
-	uint32_t hx, ix, i0;
-	int32_t k;
+	uint32_t hx, ix, i0, k;
 
 	/* Filter out exceptional cases. */
 	GET_FLOAT_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	if (ix >= 0x43000000) {  /* |x| >= 128 */
 		if (ix >= 0x7f800000) {
-			if ((ix & 0x7fffff) != 0 || (hx & 0x80000000) == 0)
-				return x + x; /* x is NaN or +Inf */
-			else
-				return 0.0;   /* x is -Inf */
+			if (hx == 0xff800000) /* -inf */
+				return 0;
+			return x;
+		}
+		if (x >= 128) {
+			STRICT_ASSIGN(float, x, x * 0x1p127);
+			return x;
+		}
+		if (x <= -150) {
+			STRICT_ASSIGN(float, x, 0x1p-100*0x1p-100);
+			return x;
 		}
-		if (x >= 0x1.0p7f)
-			return huge * huge;   /* overflow */
-		if (x <= -0x1.2cp7f)
-			return twom100 * twom100; /* underflow */
 	} else if (ix <= 0x33000000) {  /* |x| <= 0x1p-25 */
 		return 1.0f + x;
 	}
@@ -114,7 +112,7 @@ float exp2f(float x)
 	STRICT_ASSIGN(float, t, x + redux);
 	GET_FLOAT_WORD(i0, t);
 	i0 += TBLSIZE / 2;
-	k = (i0 >> TBLBITS) << 20;
+	k = (i0 / TBLSIZE) << 20;
 	i0 &= TBLSIZE - 1;
 	t -= redux;
 	z = x - t;
diff --git a/src/math/exp2l.c b/src/math/exp2l.c
index b587308f..145c20fe 100644
--- a/src/math/exp2l.c
+++ b/src/math/exp2l.c
@@ -30,7 +30,7 @@
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 long double exp2l(long double x)
 {
-	return exp2l(x);
+	return exp2(x);
 }
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 
@@ -40,10 +40,6 @@ long double exp2l(long double x)
 #define BIAS    (LDBL_MAX_EXP - 1)
 #define EXPMASK (BIAS + LDBL_MAX_EXP)
 
-static const long double huge = 0x1p10000L;
-/* XXX Prevent gcc from erroneously constant folding this. */
-static const volatile long double twom10000 = 0x1p-10000L;
-
 static const double
 redux = 0x1.8p63 / TBLSIZE,
 P1    = 0x1.62e42fefa39efp-1,
@@ -208,27 +204,28 @@ static const double tbl[TBLSIZE * 2] = {
 long double exp2l(long double x)
 {
 	union IEEEl2bits u, v;
-	long double r, twopk, twopkp10000, z;
+	long double r, z;
 	uint32_t hx, ix, i0;
-	int k;
+	union {uint32_t u; int32_t i;} k;
 
 	/* Filter out exceptional cases. */
 	u.e = x;
 	hx = u.xbits.expsign;
 	ix = hx & EXPMASK;
 	if (ix >= BIAS + 14) {  /* |x| >= 16384 or x is NaN */
-		if (ix == BIAS + LDBL_MAX_EXP) {
-			if (u.xbits.man != 1ULL << 63 || (hx & 0x8000) == 0)
-				return x + x;  /* x is +Inf or NaN */
-			return 0.0;  /* x is -Inf */
+		if (ix == EXPMASK) {
+			if (u.xbits.man == 1ULL << 63 && hx == 0xffff) /* -inf */
+				return 0;
+			return x;
+		}
+		if (x >= 16384) {
+			x *= 0x1p16383L;
+			return x;
 		}
-		if (x >= 16384)
-			return huge * huge;  /* overflow */
 		if (x <= -16446)
-			return twom10000 * twom10000;  /* underflow */
-	} else if (ix <= BIAS - 66) {  /* |x| < 0x1p-66 */
-		return 1.0 + x;
-	}
+			return 0x1p-10000L*0x1p-10000L;
+	} else if (ix < BIAS - 64)  /* |x| < 0x1p-64 */
+		return 1 + x;
 
 	/*
 	 * Reduce x, computing z, i0, and k. The low bits of x + redux
@@ -240,38 +237,22 @@ long double exp2l(long double x)
 	 * Then the low-order word of x + redux is 0x000abc12,
 	 * We split this into k = 0xabc and i0 = 0x12 (adjusted to
 	 * index into the table), then we compute z = 0x0.003456p0.
-	 *
-	 * XXX If the exponent is negative, the computation of k depends on
-	 *     '>>' doing sign extension.
 	 */
 	u.e = x + redux;
 	i0 = u.bits.manl + TBLSIZE / 2;
-	k = (int)i0 >> TBLBITS;
-	i0 = (i0 & (TBLSIZE - 1)) << 1;
+	k.u = i0 / TBLSIZE * TBLSIZE;
+	k.i /= TBLSIZE;
+	i0 %= TBLSIZE;
 	u.e -= redux;
 	z = x - u.e;
-	v.xbits.man = 1ULL << 63;
-	if (k >= LDBL_MIN_EXP) {
-		v.xbits.expsign = LDBL_MAX_EXP - 1 + k;
-		twopk = v.e;
-	} else {
-		v.xbits.expsign = LDBL_MAX_EXP - 1 + k + 10000;
-		twopkp10000 = v.e;
-	}
 
 	/* Compute r = exp2l(y) = exp2lt[i0] * p(z). */
-	long double t_hi = tbl[i0];
-	long double t_lo = tbl[i0 + 1];
+	long double t_hi = tbl[2*i0];
+	long double t_lo = tbl[2*i0 + 1];
 	/* XXX This gives > 1 ulp errors outside of FE_TONEAREST mode */
 	r = t_lo + (t_hi + t_lo) * z * (P1 + z * (P2 + z * (P3 + z * (P4
 	     + z * (P5 + z * P6))))) + t_hi;
 
-	/* Scale by 2**k. */
-	if (k >= LDBL_MIN_EXP) {
-		if (k == LDBL_MAX_EXP)
-			return r * 2.0 * 0x1p16383L;
-		return r * twopk;
-	}
-	return r * twopkp10000 * twom10000;
+	return scalbnl(r, k.i);
 }
 #endif
-- 
cgit v1.2.1


From ab1772c597ba8fe0c26400256b12d7a4df23880e Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sun, 18 Nov 2012 03:42:09 +0100
Subject: math: expf.c cleanup

similar to exp.c cleanup: use scalbnf, don't return excess precision,
drop some optimizatoins.
exp.c was changed to be more consistent with expf.c code.
---
 src/math/exp.c  | 20 ++++++------
 src/math/expf.c | 98 ++++++++++++++++++++++++++-------------------------------
 2 files changed, 55 insertions(+), 63 deletions(-)

diff --git a/src/math/exp.c b/src/math/exp.c
index 5c0edee4..5ec8f8a7 100644
--- a/src/math/exp.c
+++ b/src/math/exp.c
@@ -80,7 +80,7 @@ P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
 
 double exp(double x)
 {
-	double hi, lo, c, z;
+	double hi, lo, c, xx;
 	int k, sign;
 	uint32_t hx;
 
@@ -92,24 +92,26 @@ double exp(double x)
 	if (hx >= 0x40862e42) {  /* if |x| >= 709.78... */
 		if (isnan(x))
 			return x;
+		if (hx == 0x7ff00000 && sign) /* -inf */
+			return 0;
 		if (x > 709.782712893383973096) {
 			/* overflow if x!=inf */
 			STRICT_ASSIGN(double, x, 0x1p1023 * x);
 			return x;
 		}
 		if (x < -745.13321910194110842) {
-			/* underflow if x!=-inf */
-			STRICT_ASSIGN(double, x, 0x1p-1000 / -x * 0x1p-1000);
+			/* underflow */
+			STRICT_ASSIGN(double, x, 0x1p-1000 * 0x1p-1000);
 			return x;
 		}
 	}
 
 	/* argument reduction */
 	if (hx > 0x3fd62e42) {  /* if |x| > 0.5 ln2 */
-		if (hx < 0x3ff0a2b2)  /* if |x| < 1.5 ln2 */
-			k = 1 - sign - sign; /* optimization */
-		else
+		if (hx >= 0x3ff0a2b2)  /* if |x| >= 1.5 ln2 */
 			k = (int)(invln2*x + half[sign]);
+		else
+			k = 1 - sign - sign;
 		hi = x - k*ln2hi;  /* k*ln2hi is exact here */
 		lo = k*ln2lo;
 		STRICT_ASSIGN(double, x, hi - lo);
@@ -124,9 +126,9 @@ double exp(double x)
 	}
 
 	/* x is now in primary range */
-	z = x*x;
-	c = x - z*(P1+z*(P2+z*(P3+z*(P4+z*P5))));
-	x = 1 + ((x*c/(2-c) - lo) + hi);
+	xx = x*x;
+	c = x - xx*(P1+xx*(P2+xx*(P3+xx*(P4+xx*P5))));
+	x = 1 + (x*c/(2-c) - lo + hi);
 	if (k == 0)
 		return x;
 	return scalbn(x, k);
diff --git a/src/math/expf.c b/src/math/expf.c
index 3c3e2ab9..8aefc917 100644
--- a/src/math/expf.c
+++ b/src/math/expf.c
@@ -16,79 +16,69 @@
 #include "libm.h"
 
 static const float
-halF[2] = {0.5,-0.5,},
-huge    = 1.0e+30,
-o_threshold =  8.8721679688e+01,  /* 0x42b17180 */
-u_threshold = -1.0397208405e+02,  /* 0xc2cff1b5 */
-ln2HI[2]   = { 6.9314575195e-01,  /* 0x3f317200 */
-              -6.9314575195e-01,},/* 0xbf317200 */
-ln2LO[2]   = { 1.4286067653e-06,  /* 0x35bfbe8e */
-              -1.4286067653e-06,},/* 0xb5bfbe8e */
-invln2 = 1.4426950216e+00,        /* 0x3fb8aa3b */
+half[2] = {0.5,-0.5},
+ln2hi   = 6.9314575195e-1f,  /* 0x3f317200 */
+ln2lo   = 1.4286067653e-6f,  /* 0x35bfbe8e */
+invln2  = 1.4426950216e+0f,  /* 0x3fb8aa3b */
 /*
  * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
  * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
  */
-P1 =  1.6666625440e-1, /*  0xaaaa8f.0p-26 */
-P2 = -2.7667332906e-3; /* -0xb55215.0p-32 */
-
-static const volatile float twom100 = 7.8886090522e-31; /* 2**-100=0x0d800000 */
+P1 =  1.6666625440e-1f, /*  0xaaaa8f.0p-26 */
+P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
 
 float expf(float x)
 {
-	float y,hi=0.0,lo=0.0,c,t,twopk;
-	int32_t k=0,xsb;
+	float hi, lo, c, xx;
+	int k, sign;
 	uint32_t hx;
 
 	GET_FLOAT_WORD(hx, x);
-	xsb = (hx>>31)&1;  /* sign bit of x */
+	sign = hx >> 31;   /* sign bit of x */
 	hx &= 0x7fffffff;  /* high word of |x| */
 
-	/* filter out non-finite argument */
-	if (hx >= 0x42b17218) {  /* if |x|>=88.721... */
+	/* special cases */
+	if (hx >= 0x42b17218) {  /* if |x| >= 88.722839f or NaN */
 		if (hx > 0x7f800000)  /* NaN */
-			return x+x;
-		if (hx == 0x7f800000)  /* exp(+-inf)={inf,0} */
-			return xsb==0 ? x : 0.0;
-		if (x > o_threshold)
-			return huge*huge; /* overflow */
-		if (x < u_threshold)
-			return twom100*twom100; /* underflow */
+			return x;
+		if (!sign) {
+			/* overflow if x!=inf */
+			STRICT_ASSIGN(float, x, x * 0x1p127f);
+			return x;
+		}
+		if (hx == 0x7f800000)  /* -inf */
+			return 0;
+		if (hx >= 0x42cff1b5) { /* x <= -103.972084f */
+			/* underflow */
+			STRICT_ASSIGN(float, x, 0x1p-100f*0x1p-100f);
+			return x;
+		}
 	}
 
 	/* argument reduction */
-	if (hx > 0x3eb17218) {  /* if  |x| > 0.5 ln2 */
-		if (hx < 0x3F851592) {  /* and |x| < 1.5 ln2 */
-			hi = x-ln2HI[xsb];
-			lo = ln2LO[xsb];
-			k = 1 - xsb - xsb;
-		} else {
-			k  = invln2*x + halF[xsb];
-			t  = k;
-			hi = x - t*ln2HI[0];  /* t*ln2HI is exact here */
-			lo = t*ln2LO[0];
-		}
+	if (hx > 0x3eb17218) {  /* if |x| > 0.5 ln2 */
+		if (hx > 0x3f851592)  /* if |x| > 1.5 ln2 */
+			k = invln2*x + half[sign];
+		else
+			k = 1 - sign - sign;
+		hi = x - k*ln2hi;  /* k*ln2hi is exact here */
+		lo = k*ln2lo;
 		STRICT_ASSIGN(float, x, hi - lo);
-	} else if(hx < 0x39000000)  {  /* |x|<2**-14 */
-		/* raise inexact */
-		if (huge+x > 1.0f)
-			return 1.0f + x;
-	} else
+	} else if (hx > 0x39000000) {  /* |x| > 2**-14 */
 		k = 0;
+		hi = x;
+		lo = 0;
+	} else {
+		/* raise inexact */
+		FORCE_EVAL(0x1p127f + x);
+		return 1 + x;
+	}
 
 	/* x is now in primary range */
-	t = x*x;
-	if (k >= -125)
-		SET_FLOAT_WORD(twopk, 0x3f800000+(k<<23));
-	else
-		SET_FLOAT_WORD(twopk, 0x3f800000+((k+100)<<23));
-	c  = x - t*(P1+t*P2);
+	xx = x*x;
+	c = x - xx*(P1+xx*P2);
+	x = 1 + (x*c/(2-c) - lo + hi);
 	if (k == 0)
-		return 1.0f - ((x*c)/(c - 2.0f) - x);
-	y = 1.0f - ((lo - (x*c)/(2.0f - c)) - hi);
-	if (k < -125)
-		return y*twopk*twom100;
-	if (k == 128)
-		return y*2.0f*0x1p127f;
-	return y*twopk;
+		return x;
+	return scalbnf(x, k);
 }
-- 
cgit v1.2.1


From e93a0fe49dcc2ff9728faa8920df75009b9f46dc Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sun, 18 Nov 2012 03:49:16 +0100
Subject: math: expl.c cleanup

raise overflow and underflow when necessary, fix various comments.
---
 src/math/expl.c | 43 +++++++++++++++++++------------------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/src/math/expl.c b/src/math/expl.c
index b289ffec..50a04297 100644
--- a/src/math/expl.c
+++ b/src/math/expl.c
@@ -35,7 +35,7 @@
  *     x    k  f
  *    e  = 2  e.
  *
- * A Pade' form of degree 2/3 is used to approximate exp(f) - 1
+ * A Pade' form of degree 5/6 is used to approximate exp(f) - 1
  * in the basic range [-0.5 ln 2, 0.5 ln 2].
  *
  *
@@ -86,42 +86,37 @@ static const long double Q[4] = {
  2.0000000000000000000897E0L,
 };
 static const long double
-C1 = 6.9314575195312500000000E-1L,
-C2 = 1.4286068203094172321215E-6L,
-MAXLOGL = 1.1356523406294143949492E4L,
-MINLOGL = -1.13994985314888605586758E4L,
-LOG2EL = 1.4426950408889634073599E0L;
+LN2HI = 6.9314575195312500000000E-1L,
+LN2LO = 1.4286068203094172321215E-6L,
+LOG2E = 1.4426950408889634073599E0L;
 
 long double expl(long double x)
 {
 	long double px, xx;
-	int n;
+	int k;
 
 	if (isnan(x))
 		return x;
-	if (x > MAXLOGL)
-		return INFINITY;
-	if (x < MINLOGL)
-		return 0.0;
+	if (x > 11356.5234062941439488L) /* x > ln(2^16384 - 0.5) */
+		return x * 0x1p16383L;
+	if (x < -11399.4985314888605581L) /* x < ln(2^-16446) */
+		return 0x1p-10000L * 0x1p-10000L;
 
-	/* Express e**x = e**g 2**n
-	 *   = e**g e**(n loge(2))
-	 *   = e**(g + n loge(2))
+	/* Express e**x = e**f 2**k
+	 *   = e**(f + k ln(2))
 	 */
-	px = floorl(LOG2EL * x + 0.5); /* floor() truncates toward -infinity. */
-	n = px;
-	x -= px * C1;
-	x -= px * C2;
+	px = floorl(LOG2E * x + 0.5);
+	k = px;
+	x -= px * LN2HI;
+	x -= px * LN2LO;
 
-	/* rational approximation for exponential
-	 * of the fractional part:
-	 * e**x =  1 + 2x P(x**2)/(Q(x**2) - P(x**2))
+	/* rational approximation of the fractional part:
+	 * e**x =  1 + 2x P(x**2)/(Q(x**2) - x P(x**2))
 	 */
 	xx = x * x;
 	px = x * __polevll(xx, P, 2);
-	x =  px/(__polevll(xx, Q, 3) - px);
+	x = px/(__polevll(xx, Q, 3) - px);
 	x = 1.0 + 2.0 * x;
-	x = scalbnl(x, n);
-	return x;
+	return scalbnl(x, k);
 }
 #endif
-- 
cgit v1.2.1


From a764db9a086ca036d4fc9181f96d19ab312a6560 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sun, 18 Nov 2012 03:52:29 +0100
Subject: math: use float constants in exp10f.c

use the 'f' suffix when a float constant is not representable
---
 src/math/exp10f.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/math/exp10f.c b/src/math/exp10f.c
index b697eb9c..5fd1af9c 100644
--- a/src/math/exp10f.c
+++ b/src/math/exp10f.c
@@ -5,7 +5,7 @@
 float exp10f(float x)
 {
 	static const float p10[] = {
-		1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
+		1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f,
 		1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7
 	};
 	float n, y = modff(x, &n);
-- 
cgit v1.2.1