From faea4c9937d36b17e53fdc7d5a254d7e936e1755 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 11 Dec 2012 22:44:36 +0100
Subject: make CMPLX macros available in complex.h in non-c11 mode as well

---
 include/complex.h   | 2 --
 src/internal/libm.h | 8 --------
 2 files changed, 10 deletions(-)

diff --git a/include/complex.h b/include/complex.h
index 8206e026..13a45c57 100644
--- a/include/complex.h
+++ b/include/complex.h
@@ -115,11 +115,9 @@ long double creall(long double complex);
 #define __CMPLX(x, y, t) \
 	((union { _Complex t __z; t __xy[2]; }){.__xy = {(x),(y)}}.__z)
 
-#if __STDC_VERSION__ >= 201112L
 #define CMPLX(x, y) __CMPLX(x, y, double)
 #define CMPLXF(x, y) __CMPLX(x, y, float)
 #define CMPLXL(x, y) __CMPLX(x, y, long double)
-#endif
 
 #ifdef __cplusplus
 }
diff --git a/src/internal/libm.h b/src/internal/libm.h
index 46c4b564..64cc8388 100644
--- a/src/internal/libm.h
+++ b/src/internal/libm.h
@@ -168,12 +168,4 @@ long double __p1evll(long double, const long double *, int);
 #define STRICT_ASSIGN(type, lval, rval) ((lval) = (type)(rval))
 #endif
 
-/* complex */
-
-#ifndef CMPLX
-#define CMPLX(x, y) __CMPLX(x, y, double)
-#define CMPLXF(x, y) __CMPLX(x, y, float)
-#define CMPLXL(x, y) __CMPLX(x, y, long double)
-#endif
-
 #endif
-- 
cgit v1.2.1


From 64623cd59a5e72c6322548bca3827a75d5d11918 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 11 Dec 2012 22:57:39 +0100
Subject: math: remove long double version of bessel functions from math.h

j0l,j1l,jnl,y0l,j1l,jnl are gnu extensions, bsd and posix do not
have them.
noone seems to use them and there is no plan to implement them any
time soon so we shouldn't declare them in math.h.
---
 include/math.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/include/math.h b/include/math.h
index b44738d7..19108795 100644
--- a/include/math.h
+++ b/include/math.h
@@ -399,14 +399,6 @@ float       ynf(int, float);
 #ifdef _GNU_SOURCE
 long double lgammal_r(long double, int*);
 
-long double j0l(long double);
-long double j1l(long double);
-long double jnl(int, long double);
-
-long double y0l(long double);
-long double y1l(long double);
-long double ynl(int, long double);
-
 void        sincos(double, double*, double*);
 void        sincosf(float, float*, float*);
 void        sincosl(long double, long double*, long double*);
-- 
cgit v1.2.1


From 482ccd2f7497a79ca83e998f54e823e7cedaaa6e Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 11 Dec 2012 23:06:20 +0100
Subject: math: rewrite inverse hyperbolic functions to be simpler/smaller

modifications:
* avoid unsigned->signed integer conversion
* do not handle special cases when they work correctly anyway
* more strict threshold values (0x1p26 instead of 0x1p28 etc)
* smaller code, cleaner branching logic
* same precision as the old code:
    acosh(x) has up to 2ulp error in [1,1.125]
    asinh(x) has up to 1.6ulp error in [0.125,0.5], [-0.5,-0.125]
    atanh(x) has up to 1.7ulp error in [0.125,0.5], [-0.5,-0.125]
---
 src/math/acosh.c  | 61 ++++++++++------------------------------------
 src/math/acoshf.c | 47 +++++++++---------------------------
 src/math/acoshl.c | 58 ++++++++++----------------------------------
 src/math/asinh.c  | 69 ++++++++++++++++------------------------------------
 src/math/asinhf.c | 62 ++++++++++++++++-------------------------------
 src/math/asinhl.c | 72 +++++++++++++++++++------------------------------------
 src/math/atanh.c  | 67 ++++++++++++---------------------------------------
 src/math/atanhf.c | 50 +++++++++++---------------------------
 src/math/atanhl.c | 69 ++++++++++++++--------------------------------------
 9 files changed, 149 insertions(+), 406 deletions(-)

diff --git a/src/math/acosh.c b/src/math/acosh.c
index 15f51c6e..4ce9b3d1 100644
--- a/src/math/acosh.c
+++ b/src/math/acosh.c
@@ -1,54 +1,19 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_acosh.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- *
- */
-/* acosh(x)
- * Method :
- *      Based on
- *              acosh(x) = log [ x + sqrt(x*x-1) ]
- *      we have
- *              acosh(x) := log(x)+ln2, if x is large; else
- *              acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
- *              acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
- *
- * Special cases:
- *      acosh(x) is NaN with signal if x<1.
- *      acosh(NaN) is NaN without signal.
- */
-
 #include "libm.h"
 
-static const double
-ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
-
+/* acosh(x) = log(x + sqrt(x*x-1)) */
 double acosh(double x)
 {
-	double t;
-	int32_t hx;
-	uint32_t lx;
+	union {double f; uint64_t i;} u = {.f = x};
+	unsigned e = u.i >> 52 & 0x7ff;
+
+	/* x < 1 domain error is handled in the called functions */
 
-	EXTRACT_WORDS(hx, lx, x);
-	if (hx < 0x3ff00000) {  /* x < 1 */
-		return (x-x)/(x-x);
-	} else if (hx >= 0x41b00000) {  /* x > 2**28 */
-		if (hx >= 0x7ff00000)  /* x is inf of NaN */
-			return x+x;
-		return log(x) + ln2;   /* acosh(huge) = log(2x) */
-	} else if ((hx-0x3ff00000 | lx) == 0) {
-		return 0.0;            /* acosh(1) = 0 */
-	} else if (hx > 0x40000000) {  /* 2**28 > x > 2 */
-		t = x*x;
-		return log(2.0*x - 1.0/(x+sqrt(t-1.0)));
-	} else {                /* 1 < x < 2 */
-		t = x-1.0;
-		return log1p(t + sqrt(2.0*t+t*t));
-	}
+	if (e < 0x3ff + 1)
+		/* |x| < 2, up to 2ulp error in [1,1.125] */
+		return log1p(x-1 + sqrt((x-1)*(x-1)+2*(x-1)));
+	if (e < 0x3ff + 26)
+		/* |x| < 0x1p26 */
+		return log(2*x - 1/(x+sqrt(x*x-1)));
+	/* |x| >= 0x1p26 or nan */
+	return log(x) + 0.693147180559945309417232121458176568;
 }
diff --git a/src/math/acoshf.c b/src/math/acoshf.c
index 0f7aae2a..4596085e 100644
--- a/src/math/acoshf.c
+++ b/src/math/acoshf.c
@@ -1,42 +1,17 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_acoshf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
-static const float
-ln2 = 6.9314718246e-01; /* 0x3f317218 */
-
+/* acosh(x) = log(x + sqrt(x*x-1)) */
 float acoshf(float x)
 {
-	float t;
-	int32_t hx;
+	union {float f; int32_t i;} u = {.f = x};
 
-	GET_FLOAT_WORD(hx, x);
-	if (hx < 0x3f800000) {  /* x < 1 */
-		return (x-x)/(x-x);
-	} else if (hx >= 0x4d800000) {  /* x > 2**28 */
-		if (hx >= 0x7f800000)  /* x is inf of NaN */
-			return x + x;
-		return logf(x) + ln2;  /* acosh(huge)=log(2x) */
-	} else if (hx == 0x3f800000) {
-		return 0.0f;  /* acosh(1) = 0 */
-	} else if (hx > 0x40000000) {  /* 2**28 > x > 2 */
-		t = x*x;
-		return logf(2.0f*x - 1.0f/(x+sqrtf(t-1.0f)));
-	} else {                /* 1 < x < 2 */
-		t = x-1.0f;
-		return log1pf(t + sqrtf(2.0f*t+t*t));
-	}
+	if (u.i < 0x3f800000+(1<<23))
+		/* x < 2, invalid if x < 1 or nan */
+		/* up to 2ulp error in [1,1.125] */
+		return log1pf(x-1 + sqrtf((x-1)*(x-1)+2*(x-1)));
+	if (u.i < 0x3f800000+(12<<23))
+		/* x < 0x1p12 */
+		return logf(2*x - 1/(x+sqrtf(x*x-1)));
+	/* x >= 0x1p12 */
+	return logf(x) + 0.693147180559945309417232121458176568f;
 }
diff --git a/src/math/acoshl.c b/src/math/acoshl.c
index a4024516..472c71cb 100644
--- a/src/math/acoshl.c
+++ b/src/math/acoshl.c
@@ -1,28 +1,3 @@
-/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_acoshl.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* acoshl(x)
- * Method :
- *      Based on
- *              acoshl(x) = logl [ x + sqrtl(x*x-1) ]
- *      we have
- *              acoshl(x) := logl(x)+ln2,       if x is large; else
- *              acoshl(x) := logl(2x-1/(sqrtl(x*x-1)+x)) if x>2; else
- *              acoshl(x) := log1pl(t+sqrtl(2.0*t+t*t)); where t=x-1.
- *
- * Special cases:
- *      acoshl(x) is NaN with signal if x<1.
- *      acoshl(NaN) is NaN without signal.
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -31,29 +6,20 @@ long double acoshl(long double x)
 	return acosh(x);
 }
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-static const long double
-ln2 = 6.931471805599453094287e-01L; /* 0x3FFE, 0xB17217F7, 0xD1CF79AC */
-
+/* acosh(x) = log(x + sqrt(x*x-1)) */
 long double acoshl(long double x)
 {
-	long double t;
-	uint32_t se,i0,i1;
+	union {
+		long double f;
+		struct{uint64_t m; int16_t se; uint16_t pad;} i;
+	} u = {.f = x};
 
-	GET_LDOUBLE_WORDS(se, i0, i1, x);
-	if (se < 0x3fff || se & 0x8000) {  /* x < 1 */
-		return (x-x)/(x-x);
-	} else if (se >= 0x401d) {  /* x > 2**30 */
-		if (se >= 0x7fff)  /* x is inf or NaN */
-			return x+x;
-		return logl(x) + ln2;  /* acoshl(huge) = logl(2x) */
-	} else if (((se-0x3fff)|i0|i1) == 0) {
-		return 0.0;            /* acosh(1) = 0 */
-	} else if (se > 0x4000) {  /* x > 2 */
-		t = x*x;
-		return logl(2.0*x - 1.0/(x + sqrtl(t - 1.0)));
-	}
-	/* 1 < x <= 2 */
-	t = x - 1.0;
-	return log1pl(t + sqrtl(2.0*t + t*t));
+	if (u.i.se < 0x3fff + 1)
+		/* x < 2, invalid if x < 1 or nan */
+		return log1pl(x-1 + sqrtl((x-1)*(x-1)+2*(x-1)));
+	if (u.i.se < 0x3fff + 32)
+		/* x < 0x1p32 */
+		return logl(2*x - 1/(x+sqrtl(x*x-1)));
+	return logl(x) + 0.693147180559945309417232121458176568L;
 }
 #endif
diff --git a/src/math/asinh.c b/src/math/asinh.c
index 11bbd71a..4152dc39 100644
--- a/src/math/asinh.c
+++ b/src/math/asinh.c
@@ -1,55 +1,28 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_asinh.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* asinh(x)
- * Method :
- *      Based on
- *              asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
- *      we have
- *      asinh(x) := x  if  1+x*x=1,
- *               := sign(x)*(log(x)+ln2)) for large |x|, else
- *               := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
- *               := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
- */
-
 #include "libm.h"
 
-static const double
-ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
-huge= 1.00000000000000000000e+300;
-
+/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
 double asinh(double x)
 {
-	double t,w;
-	int32_t hx,ix;
+	union {double f; uint64_t i;} u = {.f = x};
+	unsigned e = u.i >> 52 & 0x7ff;
+	unsigned s = u.i >> 63;
 
-	GET_HIGH_WORD(hx, x);
-	ix = hx & 0x7fffffff;
-	if (ix >= 0x7ff00000)   /* x is inf or NaN */
-		return x+x;
-	if (ix < 0x3e300000) {  /* |x| < 2**-28 */
-		/* return x inexact except 0 */
-		if (huge+x > 1.0)
-			return x;
-	}
-	if (ix > 0x41b00000) {  /* |x| > 2**28 */
-		w = log(fabs(x)) + ln2;
-	} else if (ix > 0x40000000) {  /* 2**28 > |x| > 2.0 */
-		t = fabs(x);
-		w = log(2.0*t + 1.0/(sqrt(x*x+1.0)+t));
-	} else {                /* 2.0 > |x| > 2**-28 */
-		t = x*x;
-		w =log1p(fabs(x) + t/(1.0+sqrt(1.0+t)));
+	/* |x| */
+	u.i &= (uint64_t)-1/2;
+	x = u.f;
+
+	if (e >= 0x3ff + 26) {
+		/* |x| >= 0x1p26 or inf or nan */
+		x = log(x) + 0.693147180559945309417232121458176568;
+	} else if (e >= 0x3ff + 1) {
+		/* |x| >= 2 */
+		x = log(2*x + 1/(sqrt(x*x+1)+x));
+	} else if (e >= 0x3ff - 26) {
+		/* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */
+		x = log1p(x + x*x/(sqrt(x*x+1)+1));
+	} else {
+		/* |x| < 0x1p-26, raise inexact if x != 0 */
+		FORCE_EVAL(x + 0x1p1000);
 	}
-	if (hx > 0)
-		return w;
-	return -w;
+	return s ? -x : x;
 }
diff --git a/src/math/asinhf.c b/src/math/asinhf.c
index efe3af94..fc9f0911 100644
--- a/src/math/asinhf.c
+++ b/src/math/asinhf.c
@@ -1,48 +1,28 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_asinhf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
-static const float
-ln2 = 6.9314718246e-01, /* 0x3f317218 */
-huge= 1.0000000000e+30;
-
+/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
 float asinhf(float x)
 {
-	float t,w;
-	int32_t hx,ix;
+	union {float f; uint32_t i;} u = {.f = x};
+	uint32_t i = u.i & 0x7fffffff;
+	unsigned s = u.i >> 31;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
-	if (ix >= 0x7f800000)   /* x is inf or NaN */
-		return x+x;
-	if (ix < 0x31800000) {  /* |x| < 2**-28 */
-		/* return x inexact except 0 */
-		if (huge+x > 1.0f)
-			return x;
-	}
-	if (ix > 0x4d800000) {  /* |x| > 2**28 */
-		w = logf(fabsf(x)) + ln2;
-	} else if (ix > 0x40000000) {  /* 2**28 > |x| > 2.0 */
-		t = fabsf(x);
-		w = logf(2.0f*t + 1.0f/(sqrtf(x*x+1.0f)+t));
-	} else {                /* 2.0 > |x| > 2**-28 */
-		t = x*x;
-		w =log1pf(fabsf(x) + t/(1.0f+sqrtf(1.0f+t)));
+	/* |x| */
+	u.i = i;
+	x = u.f;
+
+	if (i >= 0x3f800000 + (12<<23)) {
+		/* |x| >= 0x1p12 or inf or nan */
+		x = logf(x) + 0.693147180559945309417232121458176568f;
+	} else if (i >= 0x3f800000 + (1<<23)) {
+		/* |x| >= 2 */
+		x = logf(2*x + 1/(sqrtf(x*x+1)+x));
+	} else if (i >= 0x3f800000 - (12<<23)) {
+		/* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */
+		x = log1pf(x + x*x/(sqrtf(x*x+1)+1));
+	} else {
+		/* |x| < 0x1p-12, raise inexact if x!=0 */
+		FORCE_EVAL(x + 0x1p120f);
 	}
-	if (hx > 0)
-		return w;
-	return -w;
+	return s ? -x : x;
 }
diff --git a/src/math/asinhl.c b/src/math/asinhl.c
index dc5dd71f..db966246 100644
--- a/src/math/asinhl.c
+++ b/src/math/asinhl.c
@@ -1,25 +1,3 @@
-/* origin: OpenBSD /usr/src/lib/libm/src/ld80/s_asinhl.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* asinhl(x)
- * Method :
- *      Based on
- *              asinhl(x) = signl(x) * logl [ |x| + sqrtl(x*x+1) ]
- *      we have
- *      asinhl(x) := x  if  1+x*x=1,
- *                := signl(x)*(logl(x)+ln2)) for large |x|, else
- *                := signl(x)*logl(2|x|+1/(|x|+sqrtl(x*x+1))) if|x|>2, else
- *                := signl(x)*log1pl(|x| + x^2/(1 + sqrtl(1+x^2)))
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -28,35 +6,33 @@ long double asinhl(long double x)
 	return asinh(x);
 }
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-static const long double
-ln2  = 6.931471805599453094287e-01L, /* 0x3FFE, 0xB17217F7, 0xD1CF79AC */
-huge = 1.000000000000000000e+4900L;
-
+/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
 long double asinhl(long double x)
 {
-	long double t,w;
-	int32_t hx,ix;
+	union {
+		long double f;
+		struct{uint64_t m; uint16_t se; uint16_t pad;} i;
+	} u = {.f = x};
+	unsigned e = u.i.se & 0x7fff;
+	unsigned s = u.i.se >> 15;
 
-	GET_LDOUBLE_EXP(hx, x);
-	ix = hx & 0x7fff;
-	if (ix == 0x7fff)
-		return x + x;   /* x is inf or NaN */
-	if (ix < 0x3fde) {      /* |x| < 2**-34 */
-		/* return x, raise inexact if x != 0 */
-		if (huge+x > 1.0)
-			return x;
-	}
-	if (ix > 0x4020) {      /* |x| > 2**34 */
-		w = logl(fabsl(x)) + ln2;
-	} else if (ix > 0x4000) { /* 2**34 > |x| > 2.0 */
-		t = fabsl(x);
-		w = logl(2.0*t + 1.0/(sqrtl(x*x + 1.0) + t));
-	} else {                /* 2.0 > |x| > 2**-28 */
-		t = x*x;
-		w =log1pl(fabsl(x) + t/(1.0 + sqrtl(1.0 + t)));
+	/* |x| */
+	u.i.se = e;
+	x = u.f;
+
+	if (e >= 0x3fff + 32) {
+		/* |x| >= 0x1p32 or inf or nan */
+		x = logl(x) + 0.693147180559945309417232121458176568L;
+	} else if (e >= 0x3fff + 1) {
+		/* |x| >= 2 */
+		x = logl(2*x + 1/(sqrtl(x*x+1)+x));
+	} else if (e >= 0x3fff - 32) {
+		/* |x| >= 0x1p-32 */
+		x = log1pl(x + x*x/(sqrtl(x*x+1)+1));
+	} else {
+		/* |x| < 0x1p-32, raise inexact if x!=0 */
+		FORCE_EVAL(x + 0x1p1000);
 	}
-	if (hx & 0x8000)
-		return -w;
-	return w;
+	return s ? -x : x;
 }
 #endif
diff --git a/src/math/atanh.c b/src/math/atanh.c
index dbe241d1..84a84c69 100644
--- a/src/math/atanh.c
+++ b/src/math/atanh.c
@@ -1,58 +1,21 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_atanh.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- *
- */
-/* atanh(x)
- * Method :
- *    1.Reduced x to positive by atanh(-x) = -atanh(x)
- *    2.For x>=0.5
- *                  1              2x                          x
- *      atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
- *                  2             1 - x                      1 - x
- *
- *      For x<0.5
- *      atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
- *
- * Special cases:
- *      atanh(x) is NaN if |x| > 1 with signal;
- *      atanh(NaN) is that NaN with no signal;
- *      atanh(+-1) is +-INF with signal.
- *
- */
-
 #include "libm.h"
 
-static const double huge = 1e300;
-
+/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
 double atanh(double x)
 {
-	double t;
-	int32_t hx,ix;
-	uint32_t lx;
+	union {double f; uint64_t i;} u = {.f = x};
+	unsigned e = u.i >> 52 & 0x7ff;
+	unsigned s = u.i >> 63;
+
+	/* |x| */
+	u.i &= (uint64_t)-1/2;
+	x = u.f;
 
-	EXTRACT_WORDS(hx, lx, x);
-	ix = hx & 0x7fffffff;
-	if ((ix | ((lx|-lx)>>31)) > 0x3ff00000)  /* |x| > 1 */
-		return (x-x)/(x-x);
-	if (ix == 0x3ff00000)
-		return x/0.0;
-	if (ix < 0x3e300000 && (huge+x) > 0.0)   /* x < 2**-28 */
-		return x;
-	SET_HIGH_WORD(x, ix);
-	if (ix < 0x3fe00000) {                   /* x < 0.5 */
-		t = x+x;
-		t = 0.5*log1p(t + t*x/(1.0-x));
-	} else
-		t = 0.5*log1p((x+x)/(1.0-x));
-	if (hx >= 0)
-		return t;
-	return -t;
+	if (e < 0x3ff - 1) {
+		/* |x| < 0.5, up to 1.7ulp error */
+		x = 0.5*log1p(2*x + 2*x*x/(1-x));
+	} else {
+		x = 0.5*log1p(2*x/(1-x));
+	}
+	return s ? -x : x;
 }
diff --git a/src/math/atanhf.c b/src/math/atanhf.c
index 2be780bb..ca106efc 100644
--- a/src/math/atanhf.c
+++ b/src/math/atanhf.c
@@ -1,42 +1,20 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_atanhf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
 #include "libm.h"
 
-static const float huge = 1e30;
-
+/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
 float atanhf(float x)
 {
-	float t;
-	int32_t hx,ix;
+	union {float f; uint32_t i;} u = {.f = x};
+	unsigned s = u.i >> 31;
+
+	/* |x| */
+	u.i &= 0x7fffffff;
+	x = u.f;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
-	if (ix > 0x3f800000)                   /* |x| > 1 */
-		return (x-x)/(x-x);
-	if (ix == 0x3f800000)
-		return x/0.0f;
-	if (ix < 0x31800000 && huge+x > 0.0f)  /* x < 2**-28 */
-		return x;
-	SET_FLOAT_WORD(x, ix);
-	if (ix < 0x3f000000) {                 /* x < 0.5 */
-		t = x+x;
-		t = 0.5f*log1pf(t + t*x/(1.0f-x));
-	} else
-		t = 0.5f*log1pf((x+x)/(1.0f-x));
-	if (hx >= 0)
-		return t;
-	return -t;
+	if (u.i < 0x3f800000 - (1<<23)) {
+		/* |x| < 0.5, up to 1.7ulp error */
+		x = 0.5f*log1pf(2*x + 2*x*x/(1-x));
+	} else {
+		x = 0.5f*log1pf(2*x/(1-x));
+	}
+	return s ? -x : x;
 }
diff --git a/src/math/atanhl.c b/src/math/atanhl.c
index 931bae32..b4c5e58b 100644
--- a/src/math/atanhl.c
+++ b/src/math/atanhl.c
@@ -1,31 +1,3 @@
-/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_atanh.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* atanhl(x)
- * Method :
- *    1.Reduced x to positive by atanh(-x) = -atanh(x)
- *    2.For x>=0.5
- *                   1              2x                          x
- *      atanhl(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
- *                   2             1 - x                      1 - x
- *
- *      For x<0.5
- *      atanhl(x) = 0.5*log1pl(2x+2x*x/(1-x))
- *
- * Special cases:
- *      atanhl(x) is NaN if |x| > 1 with signal;
- *      atanhl(NaN) is that NaN with no signal;
- *      atanhl(+-1) is +-INF with signal.
- */
-
 #include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -34,31 +6,26 @@ long double atanhl(long double x)
 	return atanh(x);
 }
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-static const long double huge = 1e4900L;
-
+/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
 long double atanhl(long double x)
 {
-	long double t;
-	int32_t ix;
-	uint32_t se,i0,i1;
+	union {
+		long double f;
+		struct{uint64_t m; uint16_t se; uint16_t pad;} i;
+	} u = {.f = x};
+	unsigned e = u.i.se & 0x7fff;
+	unsigned s = u.i.se >> 15;
+
+	/* |x| */
+	u.i.se = e;
+	x = u.f;
 
-	GET_LDOUBLE_WORDS(se, i0, i1, x);
-	ix = se & 0x7fff;
-	if ((ix+((((i0&0x7fffffff)|i1)|(-((i0&0x7fffffff)|i1)))>>31)) > 0x3fff)
-		/* |x| > 1 */
-		return (x-x)/(x-x);
-	if (ix == 0x3fff)
-		return x/0.0;
-	if (ix < 0x3fe3 && huge+x > 0.0)  /* x < 2**-28 */
-		return x;
-	SET_LDOUBLE_EXP(x, ix);
-	if (ix < 0x3ffe) {  /* x < 0.5 */
-		t = x + x;
-		t = 0.5*log1pl(t + t*x/(1.0 - x));
-	} else
-		t = 0.5*log1pl((x + x)/(1.0 - x));
-	if (se <= 0x7fff)
-		return t;
-	return -t;
+	if (e < 0x3fff - 1) {
+		/* |x| < 0.5, up to 1.7ulp error */
+		x = 0.5*log1pl(2*x + 2*x*x/(1-x));
+	} else {
+		x = 0.5*log1pl(2*x/(1-x));
+	}
+	return s ? -x : x;
 }
 #endif
-- 
cgit v1.2.1


From b12a73d5bf595b7fbb73db30c6bb144078e86ef5 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 11 Dec 2012 23:56:59 +0100
Subject: math: clean up inverse trigonometric functions

modifications:
* avoid unsigned->signed conversions
* removed various volatile hacks
* use FORCE_EVAL when evaluating only for side-effects
* factor out R() rational approximation instead of manual inline
* __invtrigl.h now only provides __invtrigl_R, __pio2_hi and __pio2_lo
* use 2*pio2_hi, 2*pio2_lo instead of pi_hi, pi_lo

otherwise the logic is not changed, long double versions will
need a revisit when a genaral long double cleanup happens
---
 src/math/__invtrigl.c | 41 +++++++--------------------
 src/math/__invtrigl.h | 66 +++----------------------------------------
 src/math/acos.c       | 74 ++++++++++++++++++++++++-------------------------
 src/math/acosf.c      | 77 +++++++++++++++++++++++++--------------------------
 src/math/acosl.c      | 61 +++++++++++++++++-----------------------
 src/math/asin.c       | 71 ++++++++++++++++++++++++-----------------------
 src/math/asinf.c      | 51 +++++++++++++++++-----------------
 src/math/asinl.c      | 61 +++++++++++++++++-----------------------
 src/math/atan.c       | 32 +++++++++------------
 src/math/atan2l.c     | 40 +++++++++++++-------------
 src/math/atanf.c      | 28 +++++++++----------
 src/math/atanl.c      | 33 +++++++++-------------
 12 files changed, 258 insertions(+), 377 deletions(-)

diff --git a/src/math/__invtrigl.c b/src/math/__invtrigl.c
index a51330e2..f2d33d3e 100644
--- a/src/math/__invtrigl.c
+++ b/src/math/__invtrigl.c
@@ -1,36 +1,8 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.c */
-/*-
- * Copyright (c) 2008 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
 #include "__invtrigl.h"
 
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
 
-/* coefficients used in asinl() and acosl() */
-const long double
+static const long double
 pS0 =  1.66666666666666666631e-01L,
 pS1 = -4.16313987993683104320e-01L,
 pS2 =  3.69068046323246813704e-01L,
@@ -44,9 +16,16 @@ qS3 = -1.68285799854822427013e+00L,
 qS4 =  3.90699412641738801874e-01L,
 qS5 = -3.14365703596053263322e-02L;
 
-const long double pi_hi = 3.1415926535897932384626433832795L;
-const long double pi_lo = -5.01655761266833202345e-20L;
 const long double pio2_hi = 1.57079632679489661926L;
 const long double pio2_lo = -2.50827880633416601173e-20L;
 
+/* used in asinl() and acosl() */
+/* R(x^2) is a rational approximation of (asin(x)-x)/x^3 with Remez algorithm */
+long double __invtrigl_R(long double z)
+{
+	long double p, q;
+	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*(pS5+z*pS6))))));
+	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*(qS4+z*qS5))));
+	return p/q;
+}
 #endif
diff --git a/src/math/__invtrigl.h b/src/math/__invtrigl.h
index 22748b68..cac465c2 100644
--- a/src/math/__invtrigl.h
+++ b/src/math/__invtrigl.h
@@ -1,68 +1,10 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.h */
-/*-
- * Copyright (c) 2008 David Schultz <das@FreeBSD.ORG>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "libm.h"
+#include <float.h>
 
 #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-
-#define BIAS            (LDBL_MAX_EXP - 1)
-#define MANH_SIZE       LDBL_MANH_SIZE
-
-/* Constants shared by the long double inverse trig functions. */
-#define pS0     __pS0
-#define pS1     __pS1
-#define pS2     __pS2
-#define pS3     __pS3
-#define pS4     __pS4
-#define pS5     __pS5
-#define pS6     __pS6
-#define qS1     __qS1
-#define qS2     __qS2
-#define qS3     __qS3
-#define qS4     __qS4
-#define qS5     __qS5
-#define pi_hi   __pi_hi
-#define pi_lo   __pi_lo
+/* shared by acosl, asinl and atan2l */
 #define pio2_hi __pio2_hi
 #define pio2_lo __pio2_lo
+extern const long double pio2_hi, pio2_lo;
 
-extern const long double pS0, pS1, pS2, pS3, pS4, pS5, pS6;
-extern const long double qS1, qS2, qS3, qS4, qS5;
-extern const long double pi_hi, pi_lo, pio2_hi, pio2_lo;
-
-static long double P(long double x)
-{
-	return x * (pS0 + x * (pS1 + x * (pS2 + x * (pS3 +
-		x * (pS4 + x * (pS5 + x * pS6))))));
-}
-
-static long double Q(long double x)
-{
-	return 1.0 + x * (qS1 + x * (qS2 + x * (qS3 + x * (qS4 + x * qS5))));
-}
-
+long double __invtrigl_R(long double z);
 #endif
diff --git a/src/math/acos.c b/src/math/acos.c
index 0eb15bed..be95d25e 100644
--- a/src/math/acos.c
+++ b/src/math/acos.c
@@ -36,12 +36,8 @@
 #include "libm.h"
 
 static const double
-pi      = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
-pio2_hi = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */
-// FIXME
-static const volatile double
-pio2_lo = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */
-static const double
+pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
+pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
 pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
 pS2 =  2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
@@ -53,49 +49,53 @@ qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
 qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 
+static double R(double z)
+{
+	double p, q;
+	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+	return p/q;
+}
+
 double acos(double x)
 {
-	double z,p,q,r,w,s,c,df;
-	int32_t hx,ix;
+	double z,w,s,c,df;
+	uint32_t hx,ix;
 
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
-	if (ix >= 0x3ff00000) {  /* |x| >= 1 */
+	/* |x| >= 1 or nan */
+	if (ix >= 0x3ff00000) {
 		uint32_t lx;
 
 		GET_LOW_WORD(lx,x);
-		if ((ix-0x3ff00000 | lx) == 0) {  /* |x|==1 */
-			if (hx > 0) return 0.0;  /* acos(1) = 0  */
-			return pi + 2.0*pio2_lo; /* acos(-1)= pi */
+		if ((ix-0x3ff00000 | lx) == 0) {
+			/* acos(1)=0, acos(-1)=pi */
+			if (hx >> 31)
+				return 2*pio2_hi + 0x1p-1000;
+			return 0;
 		}
-		return (x-x)/(x-x);  /* acos(|x|>1) is NaN */
+		return 0/(x-x);
 	}
-	if (ix < 0x3fe00000) {   /* |x| < 0.5 */
+	/* |x| < 0.5 */
+	if (ix < 0x3fe00000) {
 		if (ix <= 0x3c600000)  /* |x| < 2**-57 */
-			return pio2_hi + pio2_lo;
-		z = x*x;
-		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-		r = p/q;
-		return pio2_hi - (x - (pio2_lo-x*r));
-	} else if (hx < 0) {     /* x < -0.5 */
+			return pio2_hi + 0x1p-1000;
+		return pio2_hi - (x - (pio2_lo-x*R(x*x)));
+	}
+	/* x < -0.5 */
+	if (hx >> 31) {
 		z = (1.0+x)*0.5;
-		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-		s = sqrt(z);
-		r = p/q;
-		w = r*s-pio2_lo;
-		return pi - 2.0*(s+w);
-	} else {                 /* x > 0.5 */
-		z = (1.0-x)*0.5;
 		s = sqrt(z);
-		df = s;
-		SET_LOW_WORD(df,0);
-		c  = (z-df*df)/(s+df);
-		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-		r = p/q;
-		w = r*s+c;
-		return 2.0*(df+w);
+		w = R(z)*s-pio2_lo;
+		return 2*(pio2_hi - (s+w));
 	}
+	/* x > 0.5 */
+	z = (1.0-x)*0.5;
+	s = sqrt(z);
+	df = s;
+	SET_LOW_WORD(df,0);
+	c = (z-df*df)/(s+df);
+	w = R(z)*s+c;
+	return 2*(df+w);
 }
diff --git a/src/math/acosf.c b/src/math/acosf.c
index d875f3ef..5d7c0270 100644
--- a/src/math/acosf.c
+++ b/src/math/acosf.c
@@ -16,59 +16,56 @@
 #include "libm.h"
 
 static const float
-pi      = 3.1415925026e+00, /* 0x40490fda */
-pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */
-static const volatile float
-pio2_lo = 7.5497894159e-08; /* 0x33a22168 */
-static const float
+pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */
+pio2_lo = 7.5497894159e-08, /* 0x33a22168 */
 pS0 =  1.6666586697e-01,
 pS1 = -4.2743422091e-02,
 pS2 = -8.6563630030e-03,
 qS1 = -7.0662963390e-01;
 
+static float R(float z)
+{
+	float p, q;
+	p = z*(pS0+z*(pS1+z*pS2));
+	q = 1.0f+z*qS1;
+	return p/q;
+}
+
 float acosf(float x)
 {
-	float z,p,q,r,w,s,c,df;
-	int32_t hx,ix;
+	float z,w,s,c,df;
+	uint32_t hx,ix;
 
 	GET_FLOAT_WORD(hx, x);
 	ix = hx & 0x7fffffff;
-	if (ix >= 0x3f800000) {  /* |x| >= 1 */
-		if (ix == 0x3f800000) {  /* |x| == 1 */
-			if (hx > 0) return 0.0f;  /* acos(1) = 0 */
-			return pi + 2.0f*pio2_lo;  /* acos(-1)= pi */
+	/* |x| >= 1 or nan */
+	if (ix >= 0x3f800000) {
+		if (ix == 0x3f800000) {
+			if (hx >> 31)
+				return 2*pio2_hi + 0x1p-120f;
+			return 0;
 		}
-		return (x-x)/(x-x);  /* acos(|x|>1) is NaN */
+		return 0/(x-x);
 	}
-	if (ix < 0x3f000000) {   /* |x| < 0.5 */
+	/* |x| < 0.5 */
+	if (ix < 0x3f000000) {
 		if (ix <= 0x32800000) /* |x| < 2**-26 */
-			return pio2_hi + pio2_lo;
-		z = x*x;
-		p = z*(pS0+z*(pS1+z*pS2));
-		q = 1.0f+z*qS1;
-		r = p/q;
-		return pio2_hi - (x - (pio2_lo-x*r));
-	} else if (hx < 0) {     /* x < -0.5 */
-		z = (1.0f+x)*0.5f;
-		p = z*(pS0+z*(pS1+z*pS2));
-		q = 1.0f+z*qS1;
-		s = sqrtf(z);
-		r = p/q;
-		w = r*s-pio2_lo;
-		return pi - 2.0f*(s+w);
-	} else {                 /* x > 0.5 */
-		int32_t idf;
-
-		z = (1.0f-x)*0.5f;
+			return pio2_hi + 0x1p-120f;
+		return pio2_hi - (x - (pio2_lo-x*R(x*x)));
+	}
+	/* x < -0.5 */
+	if (hx >> 31) {
+		z = (1+x)*0.5f;
 		s = sqrtf(z);
-		df = s;
-		GET_FLOAT_WORD(idf,df);
-		SET_FLOAT_WORD(df,idf&0xfffff000);
-		c  = (z-df*df)/(s+df);
-		p = z*(pS0+z*(pS1+z*pS2));
-		q = 1.0f+z*qS1;
-		r = p/q;
-		w = r*s+c;
-		return 2.0f*(df+w);
+		w = R(z)*s-pio2_lo;
+		return 2*(pio2_hi - (s+w));
 	}
+	/* x > 0.5 */
+	z = (1-x)*0.5f;
+	s = sqrtf(z);
+	GET_FLOAT_WORD(hx,s);
+	SET_FLOAT_WORD(df,hx&0xfffff000);
+	c = (z-df*df)/(s+df);
+	w = R(z)*s+c;
+	return 2*(df+w);
 }
diff --git a/src/math/acosl.c b/src/math/acosl.c
index 83857d49..9e9b01e4 100644
--- a/src/math/acosl.c
+++ b/src/math/acosl.c
@@ -23,55 +23,46 @@ long double acosl(long double x)
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #include "__invtrigl.h"
-#define ACOS_CONST      (BIAS - 65)     /* 2**-65 */
 
 long double acosl(long double x)
 {
 	union IEEEl2bits u;
-	long double z, p, q, r, w, s, c, df;
+	long double z, w, s, c, df;
 	int16_t expsign, expt;
 	u.e = x;
 	expsign = u.xbits.expsign;
 	expt = expsign & 0x7fff;
-	if (expt >= BIAS) {        /* |x| >= 1 */
-		if (expt == BIAS &&
+	/* |x| >= 1 or nan */
+	if (expt >= 0x3fff) {
+		if (expt == 0x3fff &&
 			((u.bits.manh & ~LDBL_NBIT) | u.bits.manl) == 0) {
 			if (expsign > 0)
-				return 0.0;  /* acos(1) = 0 */
-			else
-				// FIXME
-				return pi_hi + 2.0 * pio2_lo;  /* acos(-1)= pi */
+				return 0;  /* acos(1) = 0 */
+			return 2*pio2_hi + 0x1p-1000;  /* acos(-1)= pi */
 		}
-		return (x - x) / (x - x);  /* acos(|x|>1) is NaN */
+		return 0/(x-x);  /* acos(|x|>1) is NaN */
 	}
-	if (expt < BIAS - 1) {     /* |x| < 0.5 */
-		if (expt < ACOS_CONST)
-			return pio2_hi + pio2_lo;  /* x tiny: acosl=pi/2 */
-		z = x * x;
-		p = P(z);
-		q = Q(z);
-		r = p / q;
-		return pio2_hi - (x - (pio2_lo - x * r));
-	} else if (expsign < 0) {  /* x < -0.5 */
+	/* |x| < 0.5 */
+	if (expt < 0x3fff - 1) {
+		if (expt < 0x3fff - 65)
+			return pio2_hi + 0x1p-1000;  /* x < 0x1p-65: acosl(x)=pi/2 */
+		return pio2_hi - (x - (pio2_lo - x * __invtrigl_R(x*x)));
+	}
+	/* x < -0.5 */
+	if (expsign < 0) {
 		z = (1.0 + x) * 0.5;
-		p = P(z);
-		q = Q(z);
-		s = sqrtl(z);
-		r = p / q;
-		w = r * s - pio2_lo;
-		return pi_hi - 2.0 * (s + w);
-	} else {                   /* x > 0.5 */
-		z = (1.0 - x) * 0.5;
 		s = sqrtl(z);
-		u.e = s;
-		u.bits.manl = 0;
-		df = u.e;
-		c = (z - df * df) / (s + df);
-		p = P(z);
-		q = Q(z);
-		r = p / q;
-		w = r * s + c;
-		return 2.0 * (df + w);
+		w = __invtrigl_R(z) * s - pio2_lo;
+		return 2*(pio2_hi - (s + w));
 	}
+	/* x > 0.5 */
+	z = (1.0 - x) * 0.5;
+	s = sqrtl(z);
+	u.e = s;
+	u.bits.manl = 0;
+	df = u.e;
+	c = (z - df * df) / (s + df);
+	w = __invtrigl_R(z) * s + c;
+	return 2*(df + w);
 }
 #endif
diff --git a/src/math/asin.c b/src/math/asin.c
index e3d8b250..a1906b08 100644
--- a/src/math/asin.c
+++ b/src/math/asin.c
@@ -42,10 +42,8 @@
 #include "libm.h"
 
 static const double
-huge = 1.000e+300,
 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
-pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
 /* coefficients for R(x^2) */
 pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
@@ -58,51 +56,54 @@ qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
 qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 
+static double R(double z)
+{
+	double p, q;
+	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
+	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+	return p/q;
+}
+
 double asin(double x)
 {
-	double t=0.0,w,p,q,c,r,s;
-	int32_t hx,ix;
+	double z,r,s;
+	uint32_t hx,ix;
 
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
-	if (ix >= 0x3ff00000) {           /* |x|>= 1 */
+	/* |x| >= 1 or nan */
+	if (ix >= 0x3ff00000) {
 		uint32_t lx;
-
 		GET_LOW_WORD(lx, x);
 		if ((ix-0x3ff00000 | lx) == 0)
 			/* asin(1) = +-pi/2 with inexact */
-			return x*pio2_hi + x*pio2_lo;
-		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */
-	} else if (ix < 0x3fe00000) {  /* |x|<0.5 */
-		if (ix < 0x3e500000) {  /* if |x| < 2**-26 */
-			if (huge+x > 1.0)
-				return x; /* return x with inexact if x!=0*/
+			return x*pio2_hi + 0x1p-1000;
+		return 0/(x-x);
+	}
+	/* |x| < 0.5 */
+	if (ix < 0x3fe00000) {
+		if (ix < 0x3e500000) {
+			/* |x|<0x1p-26, return x with inexact if x!=0*/
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
-		t = x*x;
-		p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
-		q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
-		w = p/q;
-		return x + x*w;
+		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
-	w = 1.0 - fabs(x);
-	t = w*0.5;
-	p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
-	q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
-	s = sqrt(t);
-	if (ix >= 0x3FEF3333) {  /* if |x| > 0.975 */
-		w = p/q;
-		t = pio2_hi-(2.0*(s+s*w)-pio2_lo);
+	z = (1 - fabs(x))*0.5;
+	s = sqrt(z);
+	r = R(z);
+	if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
+		x = pio2_hi-(2*(s+s*r)-pio2_lo);
 	} else {
-		w = s;
-		SET_LOW_WORD(w,0);
-		c = (t-w*w)/(s+w);
-		r = p/q;
-		p = 2.0*s*r-(pio2_lo-2.0*c);
-		q = pio4_hi - 2.0*w;
-		t = pio4_hi - (p-q);
+		double f,c;
+		/* f+c = sqrt(z) */
+		f = s;
+		SET_LOW_WORD(f,0);
+		c = (z-f*f)/(s+f);
+		x = 0.5*pio2_hi - (2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));
 	}
-	if (hx > 0)
-		return t;
-	return -t;
+	if (hx >> 31)
+		return -x;
+	return x;
 }
diff --git a/src/math/asinf.c b/src/math/asinf.c
index c1c42c7b..462bf043 100644
--- a/src/math/asinf.c
+++ b/src/math/asinf.c
@@ -12,52 +12,51 @@
  * is preserved.
  * ====================================================
  */
-
 #include "libm.h"
 
+static const double
+pio2 = 1.570796326794896558e+00;
+
 static const float
-huge = 1.000e+30,
 /* coefficients for R(x^2) */
 pS0 =  1.6666586697e-01,
 pS1 = -4.2743422091e-02,
 pS2 = -8.6563630030e-03,
 qS1 = -7.0662963390e-01;
 
-static const double
-pio2 = 1.570796326794896558e+00;
+static float R(float z)
+{
+	float p, q;
+	p = z*(pS0+z*(pS1+z*pS2));
+	q = 1.0f+z*qS1;
+	return p/q;
+}
 
 float asinf(float x)
 {
 	double s;
-	float t,w,p,q;
-	int32_t hx,ix;
+	float z;
+	uint32_t hx,ix;
 
 	GET_FLOAT_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	if (ix >= 0x3f800000) {  /* |x| >= 1 */
 		if (ix == 0x3f800000)  /* |x| == 1 */
-			return x*pio2;  /* asin(+-1) = +-pi/2 with inexact */
-		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */
-	} else if (ix < 0x3f000000) {  /* |x|<0.5 */
+			return x*pio2 + 0x1p-120f;  /* asin(+-1) = +-pi/2 with inexact */
+		return 0/(x-x);  /* asin(|x|>1) is NaN */
+	}
+	if (ix < 0x3f000000) {  /* |x| < 0.5 */
 		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			if (huge+x > 1.0f)
-				return x; /* return x with inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return x; /* return x with inexact if x!=0 */
 		}
-		t = x*x;
-		p = t*(pS0+t*(pS1+t*pS2));
-		q = 1.0f+t*qS1;
-		w = p/q;
-		return x + x*w;
+		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
-	w = 1.0f - fabsf(x);
-	t = w*0.5f;
-	p = t*(pS0+t*(pS1+t*pS2));
-	q = 1.0f+t*qS1;
-	s = sqrt(t);
-	w = p/q;
-	t = pio2-2.0*(s+s*w);
-	if (hx > 0)
-		return t;
-	return -t;
+	z = (1 - fabsf(x))*0.5f;
+	s = sqrt(z);
+	x = pio2 - 2*(s+s*R(z));
+	if (hx >> 31)
+		return -x;
+	return x;
 }
diff --git a/src/math/asinl.c b/src/math/asinl.c
index 7572767c..0ef9853b 100644
--- a/src/math/asinl.c
+++ b/src/math/asinl.c
@@ -23,60 +23,49 @@ long double asinl(long double x)
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #include "__invtrigl.h"
-static const long double huge = 1.000e+300;
-static const long double pio4_hi = 7.85398163397448309628e-01L;
-#define ASIN_LINEAR     (BIAS - 32)     /* 2**-32 */
 /* 0.95 */
-#define THRESH  ((0xe666666666666666ULL>>(64-(MANH_SIZE-1)))|LDBL_NBIT)
+#define THRESH  ((0xe666666666666666ULL>>(64-(LDBL_MANH_SIZE-1)))|LDBL_NBIT)
 
 long double asinl(long double x)
 {
 	union IEEEl2bits u;
-	long double t=0.0,w,p,q,c,r,s;
-	int16_t expsign, expt;
+	long double z,r,s;
+	uint16_t expsign, expt;
 
 	u.e = x;
 	expsign = u.xbits.expsign;
 	expt = expsign & 0x7fff;
-	if (expt >= BIAS) {          /* |x|>= 1 */
-		if (expt == BIAS &&
+	if (expt >= 0x3fff) {   /* |x| >= 1 or nan */
+		if (expt == 0x3fff &&
 		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl) == 0)
-			/* asin(1)=+-pi/2 with inexact */
-			return x*pio2_hi + x*pio2_lo;
-		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */
-	} else if (expt < BIAS-1) {  /* |x|<0.5 */
-		if (expt < ASIN_LINEAR) {  /* if |x| is small, asinl(x)=x */
+			/* asin(+-1)=+-pi/2 with inexact */
+			return x*pio2_hi + 0x1p-1000;
+		return 0/(x-x);
+	}
+	if (expt < 0x3fff - 1) {  /* |x| < 0.5 */
+		if (expt < 0x3fff - 32) {  /* |x|<0x1p-32, asinl(x)=x */
 			/* return x with inexact if x!=0 */
-			if (huge+x > 1.0)
-				return x;
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
-		t = x*x;
-		p = P(t);
-		q = Q(t);
-		w = p/q;
-		return x + x*w;
+		return x + x*__invtrigl_R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
-	w = 1.0 - fabsl(x);
-	t = w*0.5;
-	p = P(t);
-	q = Q(t);
-	s = sqrtl(t);
+	z = (1.0 - fabsl(x))*0.5;
+	s = sqrtl(z);
+	r = __invtrigl_R(z);
 	if (u.bits.manh >= THRESH) { /* if |x| is close to 1 */
-		w = p/q;
-		t = pio2_hi-(2.0*(s+s*w)-pio2_lo);
+		x = pio2_hi - (2*(s+s*r)-pio2_lo);
 	} else {
+		long double f, c;
 		u.e = s;
 		u.bits.manl = 0;
-		w = u.e;
-		c = (t-w*w)/(s+w);
-		r = p/q;
-		p = 2.0*s*r-(pio2_lo-2.0*c);
-		q = pio4_hi-2.0*w;
-		t = pio4_hi-(p-q);
+		f = u.e;
+		c = (z-f*f)/(s+f);
+		x = 0.5*pio2_hi-(2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));
 	}
-	if (expsign > 0)
-		return t;
-	return -t;
+	if (expsign>>15)
+		return -x;
+	return x;
 }
 #endif
diff --git a/src/math/atan.c b/src/math/atan.c
index f34551c7..7fd8a3b8 100644
--- a/src/math/atan.c
+++ b/src/math/atan.c
@@ -60,32 +60,26 @@ static const double aT[] = {
   1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
 };
 
-static const double huge = 1.0e300;
-
 double atan(double x)
 {
 	double w,s1,s2,z;
-	int32_t ix,hx,id;
+	uint32_t ix,sign;
+	int id;
 
-	GET_HIGH_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_HIGH_WORD(ix, x);
+	sign = ix >> 31;
+	ix &= 0x7fffffff;
 	if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
-		uint32_t low;
-
-		GET_LOW_WORD(low, x);
-		if (ix > 0x7ff00000 ||
-		    (ix == 0x7ff00000 && low != 0))  /* NaN */
-			return x+x;
-		if (hx > 0)
-			return  atanhi[3] + *(volatile double *)&atanlo[3];
-		else
-			return -atanhi[3] - *(volatile double *)&atanlo[3];
+		if (isnan(x))
+			return x;
+		z = atanhi[3] + 0x1p-1000;
+		return sign ? -z : z;
 	}
 	if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
 		if (ix < 0x3e400000) {  /* |x| < 2^-27 */
-			/* raise inexact */
-			if (huge+x > 1.0)
-				return x;
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
 		id = -1;
 	} else {
@@ -117,5 +111,5 @@ double atan(double x)
 	if (id < 0)
 		return x - x*(s1+s2);
 	z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x);
-	return hx < 0 ? -z : z;
+	return sign ? -z : z;
 }
diff --git a/src/math/atan2l.c b/src/math/atan2l.c
index 45cbfcc1..e86dbffb 100644
--- a/src/math/atan2l.c
+++ b/src/math/atan2l.c
@@ -24,8 +24,6 @@ long double atan2l(long double y, long double x)
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #include "__invtrigl.h"
-// FIXME:
-static const volatile long double tiny = 1.0e-300;
 
 long double atan2l(long double y, long double x)
 {
@@ -40,12 +38,12 @@ long double atan2l(long double y, long double x)
 	ux.e = x;
 	expsignx = ux.xbits.expsign;
 	exptx = expsignx & 0x7fff;
-	if ((exptx==BIAS+LDBL_MAX_EXP &&
+	if ((exptx==0x7fff &&
 	     ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)!=0) || /* x is NaN */
-	    (expty==BIAS+LDBL_MAX_EXP &&
+	    (expty==0x7fff &&
 	     ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0))   /* y is NaN */
 		return x+y;
-	if (expsignx==BIAS && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */
+	if (expsignx==0x3fff && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */
 		return atanl(y);
 	m = ((expsigny>>15)&1) | ((expsignx>>14)&2);  /* 2*sign(x)+sign(y) */
 
@@ -54,39 +52,39 @@ long double atan2l(long double y, long double x)
 		switch(m) {
 		case 0:
 		case 1: return y;           /* atan(+-0,+anything)=+-0 */
-		case 2: return  pi_hi+tiny; /* atan(+0,-anything) = pi */
-		case 3: return -pi_hi-tiny; /* atan(-0,-anything) =-pi */
+		case 2: return  2*pio2_hi+0x1p-1000; /* atan(+0,-anything) = pi */
+		case 3: return -2*pio2_hi-0x1p-1000; /* atan(-0,-anything) =-pi */
 		}
 	}
 	/* when x = 0 */
 	if (exptx==0 && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0)
-		return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny;
+		return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000;
 	/* when x is INF */
-	if (exptx == BIAS+LDBL_MAX_EXP) {
-		if (expty == BIAS+LDBL_MAX_EXP) {
+	if (exptx == 0x7fff) {
+		if (expty == 0x7fff) {
 			switch(m) {
-			case 0: return  pio2_hi*0.5+tiny; /* atan(+INF,+INF) */
-			case 1: return -pio2_hi*0.5-tiny; /* atan(-INF,+INF) */
-			case 2: return  1.5*pio2_hi+tiny; /* atan(+INF,-INF) */
-			case 3: return -1.5*pio2_hi-tiny; /* atan(-INF,-INF) */
+			case 0: return  pio2_hi*0.5+0x1p-1000; /* atan(+INF,+INF) */
+			case 1: return -pio2_hi*0.5-0x1p-1000; /* atan(-INF,+INF) */
+			case 2: return  1.5*pio2_hi+0x1p-1000; /* atan(+INF,-INF) */
+			case 3: return -1.5*pio2_hi-0x1p-1000; /* atan(-INF,-INF) */
 			}
 		} else {
 			switch(m) {
 			case 0: return  0.0;        /* atan(+...,+INF) */
 			case 1: return -0.0;        /* atan(-...,+INF) */
-			case 2: return  pi_hi+tiny; /* atan(+...,-INF) */
-			case 3: return -pi_hi-tiny; /* atan(-...,-INF) */
+			case 2: return  2*pio2_hi+0x1p-1000; /* atan(+...,-INF) */
+			case 3: return -2*pio2_hi-0x1p-1000; /* atan(-...,-INF) */
 			}
 		}
 	}
 	/* when y is INF */
-	if (expty == BIAS+LDBL_MAX_EXP)
-		return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny;
+	if (expty == 0x7fff)
+		return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000;
 
 	/* compute y/x */
 	k = expty-exptx;
 	if(k > LDBL_MANT_DIG+2) { /* |y/x| huge */
-		z = pio2_hi+pio2_lo;
+		z = pio2_hi+0x1p-1000;
 		m &= 1;
 	} else if (expsignx < 0 && k < -LDBL_MANT_DIG-2) /* |y/x| tiny, x<0 */
 		z = 0.0;
@@ -95,9 +93,9 @@ long double atan2l(long double y, long double x)
 	switch (m) {
 	case 0: return z;               /* atan(+,+) */
 	case 1: return -z;              /* atan(-,+) */
-	case 2: return pi_hi-(z-pi_lo); /* atan(+,-) */
+	case 2: return 2*pio2_hi-(z-2*pio2_lo); /* atan(+,-) */
 	default: /* case 3 */
-		return (z-pi_lo)-pi_hi; /* atan(-,-) */
+		return (z-2*pio2_lo)-2*pio2_hi; /* atan(-,-) */
 	}
 }
 #endif
diff --git a/src/math/atanf.c b/src/math/atanf.c
index 4e31b902..4b59509a 100644
--- a/src/math/atanf.c
+++ b/src/math/atanf.c
@@ -38,28 +38,26 @@ static const float aT[] = {
   6.1687607318e-02,
 };
 
-static const float huge = 1.0e30;
-
 float atanf(float x)
 {
 	float w,s1,s2,z;
-	int32_t ix,hx,id;
+	uint32_t ix,sign;
+	int id;
 
-	GET_FLOAT_WORD(hx, x);
-	ix = hx & 0x7fffffff;
+	GET_FLOAT_WORD(ix, x);
+	sign = ix>>31;
+	ix &= 0x7fffffff;
 	if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
-		if (ix > 0x7f800000)  /* NaN */
-			return x+x;
-		if (hx > 0)
-			return  atanhi[3] + *(volatile float *)&atanlo[3];
-		else
-			return -atanhi[3] - *(volatile float *)&atanlo[3];
+		if (isnan(x))
+			return x;
+		z = atanhi[3] + 0x1p-120f;
+		return sign ? -z : z;
 	}
 	if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			/* raise inexact */
-			if(huge+x>1.0f)
-				return x;
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p120f);
+			return x;
 		}
 		id = -1;
 	} else {
@@ -91,5 +89,5 @@ float atanf(float x)
 	if (id < 0)
 		return x - x*(s1+s2);
 	z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
-	return hx < 0 ? -z : z;
+	return sign ? -z : z;
 }
diff --git a/src/math/atanl.c b/src/math/atanl.c
index 33ecff0d..6a480a7a 100644
--- a/src/math/atanl.c
+++ b/src/math/atanl.c
@@ -22,11 +22,6 @@ long double atanl(long double x)
 	return atan(x);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-#include "__invtrigl.h"
-
-#define ATAN_CONST      (BIAS + 65)     /* 2**65 */
-#define ATAN_LINEAR     (BIAS - 32)     /* 2**-32 */
-static const long double huge = 1.0e300;
 
 static const long double atanhi[] = {
 	 4.63647609000806116202e-01L,
@@ -81,29 +76,27 @@ long double atanl(long double x)
 	u.e = x;
 	expsign = u.xbits.expsign;
 	expt = expsign & 0x7fff;
-	if (expt >= ATAN_CONST) { /* if |x| is large, atan(x)~=pi/2 */
-		if (expt == BIAS + LDBL_MAX_EXP &&
+	if (expt >= 0x3fff + 65) { /* if |x| is large, atan(x)~=pi/2 */
+		if (expt == 0x7fff &&
 		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0)  /* NaN */
 			return x+x;
-		if (expsign > 0)
-			return  atanhi[3]+atanlo[3];
-		else
-			return -atanhi[3]-atanlo[3];
+		z = atanhi[3] + 0x1p-1000;
+		return expsign < 0 ? -z : z;
 	}
 	/* Extract the exponent and the first few bits of the mantissa. */
 	/* XXX There should be a more convenient way to do this. */
-	expman = (expt << 8) | ((u.bits.manh >> (MANH_SIZE - 9)) & 0xff);
-	if (expman < ((BIAS - 2) << 8) + 0xc0) {  /* |x| < 0.4375 */
-		if (expt < ATAN_LINEAR) {   /* if |x| is small, atanl(x)~=x */
-			/* raise inexact */
-			if (huge+x > 1.0)
-				return x;
+	expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff);
+	if (expman < ((0x3fff - 2) << 8) + 0xc0) {  /* |x| < 0.4375 */
+		if (expt < 0x3fff - 32) {   /* if |x| is small, atanl(x)~=x */
+			/* raise inexact if x!=0 */
+			FORCE_EVAL(x + 0x1p1000);
+			return x;
 		}
 		id = -1;
 	} else {
 		x = fabsl(x);
-		if (expman < (BIAS << 8) + 0x30) {  /* |x| < 1.1875 */
-			if (expman < ((BIAS - 1) << 8) + 0x60) { /*  7/16 <= |x| < 11/16 */
+		if (expman < (0x3fff << 8) + 0x30) {  /* |x| < 1.1875 */
+			if (expman < ((0x3fff - 1) << 8) + 0x60) { /*  7/16 <= |x| < 11/16 */
 				id = 0;
 				x = (2.0*x-1.0)/(2.0+x);
 			} else {                                 /* 11/16 <= |x| < 19/16 */
@@ -111,7 +104,7 @@ long double atanl(long double x)
 				x = (x-1.0)/(x+1.0);
 			}
 		} else {
-			if (expman < ((BIAS + 1) << 8) + 0x38) { /* |x| < 2.4375 */
+			if (expman < ((0x3fff + 1) << 8) + 0x38) { /* |x| < 2.4375 */
 				id = 2;
 				x = (x-1.5)/(1.0+1.5*x);
 			} else {                                 /* 2.4375 <= |x| < 2^ATAN_CONST */
-- 
cgit v1.2.1


From 1384ad5f33ff0a4bb3e52eb3aa7e6f2536148e04 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 12 Dec 2012 00:16:32 +0100
Subject: math: add empty __invtrigl.s to i386 and x86_64

__invtrigl is not needed when acosl, asinl, atanl have asm
implementations
---
 src/math/i386/__invtrigl.s   | 0
 src/math/x86_64/__invtrigl.s | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/math/i386/__invtrigl.s
 create mode 100644 src/math/x86_64/__invtrigl.s

diff --git a/src/math/i386/__invtrigl.s b/src/math/i386/__invtrigl.s
new file mode 100644
index 00000000..e69de29b
diff --git a/src/math/x86_64/__invtrigl.s b/src/math/x86_64/__invtrigl.s
new file mode 100644
index 00000000..e69de29b
-- 
cgit v1.2.1


From 9c6b1de0fb11d6e2927a19bbaf6345aedcc84297 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 12 Dec 2012 01:28:22 +0100
Subject: math: fix comment in __rem_pio2f.c

---
 src/math/__rem_pio2f.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/__rem_pio2f.c b/src/math/__rem_pio2f.c
index 965dc46a..198ee91b 100644
--- a/src/math/__rem_pio2f.c
+++ b/src/math/__rem_pio2f.c
@@ -24,7 +24,7 @@
 
 /*
  * invpio2:  53 bits of 2/pi
- * pio2_1:   first  33 bit of pi/2
+ * pio2_1:   first 25 bits of pi/2
  * pio2_1t:  pi/2 - pio2_1
  */
 static const double
@@ -41,7 +41,7 @@ int __rem_pio2f(float x, double *y)
 
 	GET_FLOAT_WORD(hx, x);
 	ix = hx & 0x7fffffff;
-	/* 33+53 bit pi is good enough for medium size */
+	/* 25+53 bit pi is good enough for medium size */
 	if (ix < 0x4dc90fdb) {  /* |x| ~< 2^28*(pi/2), medium size */
 		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
 		STRICT_ASSIGN(double, fn, x*invpio2 + 0x1.8p52);
-- 
cgit v1.2.1


From 14cc9c7f38c80094c05353fcb11fe9e441340583 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 12 Dec 2012 01:39:23 +0100
Subject: math: cosh cleanup

do fabs by hand, don't check for nan and inf separately
---
 src/math/cosh.c  | 41 +++++++++++++++++++----------------------
 src/math/coshf.c | 37 +++++++++++++++++--------------------
 src/math/coshl.c | 55 +++++++++++++++++++++++++++----------------------------
 3 files changed, 63 insertions(+), 70 deletions(-)

diff --git a/src/math/cosh.c b/src/math/cosh.c
index a1f7dbc7..2fcdea8f 100644
--- a/src/math/cosh.c
+++ b/src/math/cosh.c
@@ -23,7 +23,7 @@
  *                                                        2
  *          22       <= x <= lnovft :  cosh(x) := exp(x)/2
  *          lnovft   <= x <= ln2ovft:  cosh(x) := exp(x/2)/2 * exp(x/2)
- *          ln2ovft  <  x           :  cosh(x) := huge*huge (overflow)
+ *          ln2ovft  <  x           :  cosh(x) := inf (overflow)
  *
  * Special cases:
  *      cosh(x) is |x| if x is +INF, -INF, or NaN.
@@ -32,43 +32,40 @@
 
 #include "libm.h"
 
-static const double huge = 1.0e300;
-
 double cosh(double x)
 {
-	double t, w;
-	int32_t ix;
-
-	GET_HIGH_WORD(ix, x);
-	ix &= 0x7fffffff;
+	union {double f; uint64_t i;} u = {.f = x};
+	uint32_t ix;
+	double t;
 
-	/* x is INF or NaN */
-	if (ix >= 0x7ff00000)
-		return x*x;
+	/* |x| */
+	u.i &= (uint64_t)-1/2;
+	x = u.f;
+	ix = u.i >> 32;
 
 	/* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */
 	if (ix < 0x3fd62e43) {
-		t = expm1(fabs(x));
-		w = 1.0+t;
+		t = expm1(x);
 		if (ix < 0x3c800000)
-			return w;  /* cosh(tiny) = 1 */
-		return 1.0 + (t*t)/(w+w);
+			return 1;
+		return 1 + t*t/(2*(1+t));
 	}
 
 	/* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|))/2; */
 	if (ix < 0x40360000) {
-		t = exp(fabs(x));
+		t = exp(x);
 		return 0.5*t + 0.5/t;
 	}
 
 	/* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */
-	if (ix < 0x40862E42)
-		return 0.5*exp(fabs(x));
+	if (ix < 0x40862e42)
+		return 0.5*exp(x);
 
 	/* |x| in [log(maxdouble), overflowthresold] */
-	if (ix <= 0x408633CE)
-		return __expo2(fabs(x));
+	if (ix <= 0x408633ce)
+		return __expo2(x);
 
-	/* |x| > overflowthresold, cosh(x) overflow */
-	return huge*huge;
+	/* overflow (or nan) */
+	x *= 0x1p1023;
+	return x;
 }
diff --git a/src/math/coshf.c b/src/math/coshf.c
index 97318f10..2bed1258 100644
--- a/src/math/coshf.c
+++ b/src/math/coshf.c
@@ -15,43 +15,40 @@
 
 #include "libm.h"
 
-static const float huge = 1.0e30;
-
 float coshf(float x)
 {
-	float t, w;
-	int32_t ix;
-
-	GET_FLOAT_WORD(ix, x);
-	ix &= 0x7fffffff;
+	union {float f; uint32_t i;} u = {.f = x};
+	uint32_t ix;
+	float t;
 
-	/* x is INF or NaN */
-	if (ix >= 0x7f800000)
-		return x*x;
+	/* |x| */
+	u.i &= 0x7fffffff;
+	x = u.f;
+	ix = u.i;
 
 	/* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */
 	if (ix < 0x3eb17218) {
-		t = expm1f(fabsf(x));
-		w = 1.0f+t;
-		if (ix<0x39800000)
-			return 1.0f;  /* cosh(tiny) = 1 */
-		return 1.0f + (t*t)/(w+w);
+		t = expm1f(x);
+		if (ix < 0x39800000)
+			return 1;
+		return 1 + t*t/(2*(1+t));
 	}
 
 	/* |x| in [0.5*ln2,9], return (exp(|x|)+1/exp(|x|))/2; */
 	if (ix < 0x41100000) {
-		t = expf(fabsf(x));
+		t = expf(x);
 		return 0.5f*t + 0.5f/t;
 	}
 
 	/* |x| in [9, log(maxfloat)] return 0.5f*exp(|x|) */
 	if (ix < 0x42b17217)
-		return 0.5f*expf(fabsf(x));
+		return 0.5f*expf(x);
 
 	/* |x| in [log(maxfloat), overflowthresold] */
 	if (ix <= 0x42b2d4fc)
-		return __expo2f(fabsf(x));
+		return __expo2f(x);
 
-	/* |x| > overflowthresold, cosh(x) overflow */
-	return huge*huge;
+	/* |x| > overflowthresold or nan */
+	x *= 0x1p127f;
+	return x;
 }
diff --git a/src/math/coshl.c b/src/math/coshl.c
index 36b52c02..3ea56e00 100644
--- a/src/math/coshl.c
+++ b/src/math/coshl.c
@@ -23,7 +23,7 @@
  *                                                         2
  *          22       <= x <= lnovft :  coshl(x) := expl(x)/2
  *          lnovft   <= x <= ln2ovft:  coshl(x) := expl(x/2)/2 * expl(x/2)
- *          ln2ovft  <  x           :  coshl(x) := huge*huge (overflow)
+ *          ln2ovft  <  x           :  coshl(x) := inf (overflow)
  *
  * Special cases:
  *      coshl(x) is |x| if x is +INF, -INF, or NaN.
@@ -38,49 +38,48 @@ long double coshl(long double x)
 	return cosh(x);
 }
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
-static const long double huge = 1.0e4900L;
-
 long double coshl(long double x)
 {
-	long double t,w;
-	int32_t ex;
+	union {
+		long double f;
+		struct{uint64_t m; uint16_t se; uint16_t pad;} i;
+	} u = {.f = x};
+	unsigned ex = u.i.se & 0x7fff;
+	long double t;
 	uint32_t mx,lx;
 
-	/* High word of |x|. */
-	GET_LDOUBLE_WORDS(ex, mx, lx, x);
-	ex &= 0x7fff;
-
-	/* x is INF or NaN */
-	if (ex == 0x7fff) return x*x;
+	/* |x| */
+	u.i.se = ex;
+	x = u.f;
+	mx = u.i.m >> 32;
+	lx = u.i.m;
 
 	/* |x| in [0,0.5*ln2], return 1+expm1l(|x|)^2/(2*expl(|x|)) */
-	if (ex < 0x3ffd || (ex == 0x3ffd && mx < 0xb17217f7u)) {
-		t = expm1l(fabsl(x));
-		w = 1.0 + t;
-		if (ex < 0x3fbc) return w;    /* cosh(tiny) = 1 */
-		return 1.0+(t*t)/(w+w);
+	if (ex < 0x3fff-2 || (ex == 0x3fff-2 && mx < 0xb17217f7)) {
+		t = expm1l(x);
+		if (ex < 0x3fff-64)
+			return 1;
+		return 1 + t*t/(2*(1+t));
 	}
 
 	/* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|)/2; */
-	if (ex < 0x4003 || (ex == 0x4003 && mx < 0xb0000000u)) {
-		t = expl(fabsl(x));
+	if (ex < 0x3fff+4 || (ex == 0x3fff+4 && mx < 0xb0000000)) {
+		t = expl(x);
 		return 0.5*t + 0.5/t;
 	}
 
 	/* |x| in [22, ln(maxdouble)] return 0.5*exp(|x|) */
-	if (ex < 0x400c || (ex == 0x400c && mx < 0xb1700000u))
-		return 0.5*expl(fabsl(x));
+	if (ex < 0x3fff+13 || (ex == 0x3fff+13 && mx < 0xb1700000))
+		return 0.5*expl(x);
 
 	/* |x| in [log(maxdouble), log(2*maxdouble)) */
-	if (ex == 0x400c && (mx < 0xb174ddc0u ||
-	     (mx == 0xb174ddc0u && lx < 0x31aec0ebu)))
-	{
-		w = expl(0.5*fabsl(x));
-		t = 0.5*w;
-		return t*w;
+	if (ex == 0x3fff+13 && (mx < 0xb174ddc0 ||
+	     (mx == 0xb174ddc0 && lx < 0x31aec0eb))) {
+		t = expl(0.5*x);
+		return 0.5*t*t;
 	}
 
-	/* |x| >= log(2*maxdouble), cosh(x) overflow */
-	return huge*huge;
+	/* |x| >= log(2*maxdouble) or nan */
+	return x*0x1p16383L;
 }
 #endif
-- 
cgit v1.2.1


From 0f53c1a4266ad4cca28115e2c3bcfdc86337d8ca Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 12 Dec 2012 01:43:43 +0100
Subject: math: add a non-dummy tgamma implementation

uses the lanczos approximation method with the usual tweaks.
same parameters were selected as in boost and python.
(avoides some extra work and special casing found in boost
so the precision is not that good: measured error is <5ulp for
positive x and <10ulp for negative)

an alternative lgamma_r implementation is also given in the same
file which is simpler and smaller than the current one, but less
precise so it's ifdefed out for now.
---
 src/math/tgamma.c  | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 src/math/tgammaf.c |  12 +--
 2 files changed, 215 insertions(+), 20 deletions(-)

diff --git a/src/math/tgamma.c b/src/math/tgamma.c
index f3bbe370..a3f203c1 100644
--- a/src/math/tgamma.c
+++ b/src/math/tgamma.c
@@ -1,16 +1,221 @@
-#include <math.h>
+/*
+"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964)
+"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001)
+"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004)
 
-// FIXME: use lanczos approximation
+approximation method:
 
-double __lgamma_r(double, int *);
+                        (x - 0.5)         S(x)
+Gamma(x) = (x + g - 0.5)         *  ----------------
+                                    exp(x + g - 0.5)
+
+with
+                 a1      a2      a3            aN
+S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ]
+               x + 1   x + 2   x + 3         x + N
+
+with a0, a1, a2, a3,.. aN constants which depend on g.
+
+for x < 0 the following reflection formula is used:
+
+Gamma(x)*Gamma(-x) = -pi/(x sin(pi x))
+
+most ideas and constants are from boost and python
+*/
+#include "libm.h"
+
+static const double pi = 3.141592653589793238462643383279502884;
+
+/* sin(pi x) with x > 0 && isnormal(x) assumption */
+static double sinpi(double x)
+{
+	int n;
+
+	/* argument reduction: x = |x| mod 2 */
+	/* spurious inexact when x is odd int */
+	x = x * 0.5;
+	x = 2 * (x - floor(x));
+
+	/* reduce x into [-.25,.25] */
+	n = 4 * x;
+	n = (n+1)/2;
+	x -= n * 0.5;
+
+	x *= pi;
+	switch (n) {
+	default: /* case 4 */
+	case 0:
+		return __sin(x, 0, 0);
+	case 1:
+		return __cos(x, 0);
+	case 2:
+		/* sin(0-x) and -sin(x) have different sign at 0 */
+		return __sin(0-x, 0, 0);
+	case 3:
+		return -__cos(x, 0);
+	}
+}
+
+#define N 12
+//static const double g = 6.024680040776729583740234375;
+static const double gmhalf = 5.524680040776729583740234375;
+static const double Snum[N+1] = {
+	23531376880.410759688572007674451636754734846804940,
+	42919803642.649098768957899047001988850926355848959,
+	35711959237.355668049440185451547166705960488635843,
+	17921034426.037209699919755754458931112671403265390,
+	6039542586.3520280050642916443072979210699388420708,
+	1439720407.3117216736632230727949123939715485786772,
+	248874557.86205415651146038641322942321632125127801,
+	31426415.585400194380614231628318205362874684987640,
+	2876370.6289353724412254090516208496135991145378768,
+	186056.26539522349504029498971604569928220784236328,
+	8071.6720023658162106380029022722506138218516325024,
+	210.82427775157934587250973392071336271166969580291,
+	2.5066282746310002701649081771338373386264310793408,
+};
+static const double Sden[N+1] = {
+	0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
+	2637558, 357423, 32670, 1925, 66, 1,
+};
+/* n! for small integer n */
+static const double fact[] = {
+	1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
+	479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
+	355687428096000.0, 6402373705728000.0, 121645100408832000.0,
+	2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
+};
+
+/* S(x) rational function for positive x */
+static double S(double x)
+{
+	double num = 0, den = 0;
+	int i;
+
+	/* to avoid overflow handle large x differently */
+	if (x < 8)
+		for (i = N; i >= 0; i--) {
+			num = num * x + Snum[i];
+			den = den * x + Sden[i];
+		}
+	else
+		for (i = 0; i <= N; i++) {
+			num = num / x + Snum[i];
+			den = den / x + Sden[i];
+		}
+	return num/den;
+}
 
 double tgamma(double x)
 {
-	int sign;
-	double y;
+	double absx, y, dy, z, r;
 
-	y = exp(__lgamma_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	/* special cases */
+	if (!isfinite(x))
+		/* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
+		return x + INFINITY;
+
+	/* integer arguments */
+	/* raise inexact when non-integer */
+	if (x == floor(x)) {
+		if (x == 0)
+			/* tgamma(+-0)=+-inf with divide-by-zero */
+			return 1/x;
+		if (x < 0)
+			return 0/0.0;
+		if (x <= sizeof fact/sizeof *fact)
+			return fact[(int)x - 1];
+	}
+
+	absx = fabs(x);
+
+	/* x ~ 0: tgamma(x) ~ 1/x */
+	if (absx < 0x1p-54)
+		return 1/x;
+
+	/* x >= 172: tgamma(x)=inf with overflow */
+	/* x =< -184: tgamma(x)=+-0 with underflow */
+	if (absx >= 184) {
+		if (x < 0) {
+			if (floor(x) * 0.5 == floor(x * 0.5))
+				return 0;
+			return -0.0;
+		}
+		x *= 0x1p1023;
+		return x;
+	}
+
+	/* handle the error of x + g - 0.5 */
+	y = absx + gmhalf;
+	if (absx > gmhalf) {
+		dy = y - absx;
+		dy -= gmhalf;
+	} else {
+		dy = y - gmhalf;
+		dy -= absx;
+	}
+
+	z = absx - 0.5;
+	r = S(absx) * exp(-y);
+	if (x < 0) {
+		/* reflection formula for negative x */
+		r = -pi / (sinpi(absx) * absx * r);
+		dy = -dy;
+		z = -z;
+	}
+	r += dy * (gmhalf+0.5) * r / y;
+	z = pow(y, 0.5*z);
+	r = r * z * z;
+	return r;
 }
+
+#if 0
+double __lgamma_r(double x, int *sign)
+{
+	double r, absx, z, zz, w;
+
+	*sign = 1;
+
+	/* special cases */
+	if (!isfinite(x))
+		/* lgamma(nan)=nan, lgamma(+-inf)=inf */
+		return x*x;
+
+	/* integer arguments */
+	if (x == floor(x) && x <= 2) {
+		/* n <= 0: lgamma(n)=inf with divbyzero */
+		/* n == 1,2: lgamma(n)=0 */
+		if (x <= 0)
+			return 1/0.0;
+		return 0;
+	}
+
+	absx = fabs(x);
+
+	/* lgamma(x) ~ -log(|x|) for tiny |x| */
+	if (absx < 0x1p-54) {
+		*sign = 1 - 2*!!signbit(x);
+		return -log(absx);
+	}
+
+	/* use tgamma for smaller |x| */
+	if (absx < 128) {
+		x = tgamma(x);
+		*sign = 1 - 2*!!signbit(x);
+		return log(fabs(x));
+	}
+
+	/* second term (log(S)-g) could be more precise here.. */
+	/* or with stirling: (|x|-0.5)*(log(|x|)-1) + poly(1/|x|) */
+	r = (absx-0.5)*(log(absx+gmhalf)-1) + (log(S(absx)) - (gmhalf+0.5));
+	if (x < 0) {
+		/* reflection formula for negative x */
+		x = sinpi(absx);
+		*sign = 2*!!signbit(x) - 1;
+		r = log(pi/(fabs(x)*absx)) - r;
+	}
+	return r;
+}
+
+weak_alias(__lgamma_r, lgamma_r);
+#endif
diff --git a/src/math/tgammaf.c b/src/math/tgammaf.c
index 16df8076..b4ca51c9 100644
--- a/src/math/tgammaf.c
+++ b/src/math/tgammaf.c
@@ -1,16 +1,6 @@
 #include <math.h>
 
-// FIXME: use lanczos approximation
-
-float __lgammaf_r(float, int *);
-
 float tgammaf(float x)
 {
-	int sign;
-	float y;
-
-	y = exp(__lgammaf_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	return tgamma(x);
 }
-- 
cgit v1.2.1


From 934609442339edb21a33532ccefac8b78e6305f2 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Fri, 14 Dec 2012 12:49:35 +0100
Subject: fixed tgmath.h for functions with integral result

in tgmath.h the return values are casted to the appropriate
floating-point type (if the compiler supports gcc __typeof__),
this is wrong in case of ilogb, lrint, llrint, lround, llround
which do not need such cast
---
 include/tgmath.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/tgmath.h b/include/tgmath.h
index 5b65e21f..832b052b 100644
--- a/include/tgmath.h
+++ b/include/tgmath.h
@@ -59,10 +59,12 @@ sizeof(double) == sizeof(long double)
 
 /* function selection */
 
-#define __tg_real(fun, x) (__RETCAST(x)( \
+#define __tg_real_nocast(fun, x) ( \
 	__FLT(x) ? fun ## f (x) : \
 	__LDBL(x) ? fun ## l (x) : \
-	fun(x) ))
+	fun(x) )
+
+#define __tg_real(fun, x) (__RETCAST(x)__tg_real_nocast(fun, x))
 
 #define __tg_real_2_1(fun, x, y) (__RETCAST(x)( \
 	__FLT(x) ? fun ## f (x, y) : \
@@ -217,18 +219,18 @@ sizeof(double) == sizeof(long double)
 #define fmod(x,y)       __tg_real_2(fmod, (x), (y))
 #define frexp(x,y)      __tg_real_2_1(frexp, (x), (y))
 #define hypot(x,y)      __tg_real_2(hypot, (x), (y))
-#define ilogb(x)        __tg_real(ilogb, (x))
+#define ilogb(x)        __tg_real_nocast(ilogb, (x))
 #define ldexp(x,y)      __tg_real_2_1(ldexp, (x), (y))
 #define lgamma(x)       __tg_real(lgamma, (x))
-#define llrint(x)       __tg_real(llrint, (x))
-#define llround(x)      __tg_real(llround, (x))
+#define llrint(x)       __tg_real_nocast(llrint, (x))
+#define llround(x)      __tg_real_nocast(llround, (x))
 #define log(x)          __tg_real_complex(log, (x))
 #define log10(x)        __tg_real(log10, (x))
 #define log1p(x)        __tg_real(log1p, (x))
 #define log2(x)         __tg_real(log2, (x))
 #define logb(x)         __tg_real(logb, (x))
-#define lrint(x)        __tg_real(lrint, (x))
-#define lround(x)       __tg_real(lround, (x))
+#define lrint(x)        __tg_real_nocast(lrint, (x))
+#define lround(x)       __tg_real_nocast(lround, (x))
 #define nearbyint(x)    __tg_real(nearbyint, (x))
 #define nextafter(x,y)  __tg_real_2(nextafter, (x), (y))
 #define nexttoward(x,y) __tg_real_2(nexttoward, (x), (y))
-- 
cgit v1.2.1


From a8f73bb1a685dd7d67669c6f6ceb255cfa967790 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Fri, 14 Dec 2012 18:29:56 +0100
Subject: math: fix i386/expl.s with more precise x*log2e

with naive exp2l(x*log2e) the last 12bits of the result was incorrect
for x with large absolute value

with hi + lo = x*log2e is caluclated to 128 bits precision and then
  expl(x) = exp2l(hi) + exp2l(hi) * f2xm1(lo)
this gives <1.5ulp measured error everywhere in nearest rounding mode
---
 src/math/i386/exp.s  |   6 ---
 src/math/i386/expl.s | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 107 insertions(+), 7 deletions(-)

diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index c7f5ad0f..e3b42af5 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -50,12 +50,6 @@ expf:
 	flds 4(%esp)
 	jmp 2f
 
-.global expl
-.type expl,@function
-expl:
-	fldt 4(%esp)
-	jmp 2f
-
 .global exp
 .type exp,@function
 exp:
diff --git a/src/math/i386/expl.s b/src/math/i386/expl.s
index f335a3e5..3f2f707d 100644
--- a/src/math/i386/expl.s
+++ b/src/math/i386/expl.s
@@ -1 +1,107 @@
-# see exp.s
+# exp(x) = 2^hi + 2^hi (2^lo - 1)
+# where hi+lo = log2e*x with 128bit precision
+# exact log2e*x calculation depends on nearest rounding mode
+
+.global expl
+.type expl,@function
+expl:
+	fldt 4(%esp)
+
+		# special cases: 2*x is +-inf, nan or |x| < 0x1p-32
+		# check (exponent|0x8000)+2 < 0xbfff+2-32
+	movw 12(%esp), %ax
+	movw %ax, %dx
+	orw $0x8000, %dx
+	addw $2, %dx
+	cmpw $0xbfff-30, %dx
+	jnb 3f
+	cmpw $1, %dx
+	jbe 1f
+		# if |x|<0x1p-32 return 1+x
+	fld1
+	jmp 2f
+1:	testw %ax, %ax
+	jns 1f
+		# if 2*x == -inf,-nan return -0/x
+	fldz
+	fchs
+	fdivp
+	ret
+		# if 2*x == inf,nan return 2*x
+1:	fld %st(0)
+2:	faddp
+	ret
+
+		# should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc
+		# it will be wrong on non-nearest rounding mode
+3:	fldl2e
+#	subl $32, %esp
+	subl $44, %esp
+		# hi = log2e_hi*x
+		# 2^hi = exp2l(hi)
+	fmul %st(1),%st
+	fld %st(0)
+	fstpt (%esp)
+	fstpt 16(%esp)
+	fstpt 32(%esp)
+	call exp2l
+		# if 2^hi == inf return 2^hi
+	fld %st(0)
+	fstpt (%esp)
+	cmpw $0x7fff, 8(%esp)
+	je 1f
+	fldt 32(%esp)
+	fldt 16(%esp)
+		# fpu stack: 2^hi x hi
+		# exact mult: x*log2e
+	fld %st(1) # x
+		# c = 0x1p32+1
+	pushl $0x41f00000
+	pushl $0x00100000
+	fldl (%esp)
+		# xh = x - c*x + c*x
+		# xl = x - xh
+	fmulp
+	fld %st(2)
+	fsub %st(1), %st
+	faddp
+	fld %st(2)
+	fsub %st(1), %st
+		# yh = log2e_hi - c*log2e_hi + c*log2e_hi
+	pushl $0x3ff71547
+	pushl $0x65200000
+	fldl (%esp)
+		# fpu stack: 2^hi x hi xh xl yh
+		# lo = hi - xh*yh + xl*yh
+	fld %st(2)
+	fmul %st(1), %st
+	fsubp %st, %st(4)
+	fmul %st(1), %st
+	faddp %st, %st(3)
+		# yl = log2e_hi - yh
+	pushl $0x3de705fc
+	pushl $0x2f000000
+	fldl (%esp)
+		# fpu stack: 2^hi x lo xh xl yl
+		# lo += xh*yl + xl*yl
+	fmul %st, %st(2)
+	fmulp %st, %st(1)
+	fxch %st(2)
+	faddp
+	faddp
+		# log2e_lo
+	pushl $0xbfbe
+	pushl $0x82f0025f
+	pushl $0x2dc582ee
+	fldt (%esp)
+	addl $36,%esp
+		# fpu stack: 2^hi x lo log2e_lo
+		# lo += log2e_lo*x
+		# return 2^hi + 2^hi (2^lo - 1)
+	fmulp %st, %st(2)
+	faddp
+	f2xm1
+	fmul %st(1), %st
+	faddp
+1:	addl $44, %esp
+	ret
-- 
cgit v1.2.1