diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2012-12-11 23:56:59 +0100 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2012-12-11 23:56:59 +0100 | 
| commit | b12a73d5bf595b7fbb73db30c6bb144078e86ef5 (patch) | |
| tree | 547edc4ee13453230bb1c154015150b9fc88c766 /src/math | |
| parent | 482ccd2f7497a79ca83e998f54e823e7cedaaa6e (diff) | |
| download | musl-b12a73d5bf595b7fbb73db30c6bb144078e86ef5.tar.gz | |
math: clean up inverse trigonometric functions
modifications:
* avoid unsigned->signed conversions
* removed various volatile hacks
* use FORCE_EVAL when evaluating only for side-effects
* factor out R() rational approximation instead of manual inline
* __invtrigl.h now only provides __invtrigl_R, __pio2_hi and __pio2_lo
* use 2*pio2_hi, 2*pio2_lo instead of pi_hi, pi_lo
otherwise the logic is not changed, long double versions will
need a revisit when a genaral long double cleanup happens
Diffstat (limited to 'src/math')
| -rw-r--r-- | src/math/__invtrigl.c | 41 | ||||
| -rw-r--r-- | src/math/__invtrigl.h | 66 | ||||
| -rw-r--r-- | src/math/acos.c | 74 | ||||
| -rw-r--r-- | src/math/acosf.c | 77 | ||||
| -rw-r--r-- | src/math/acosl.c | 61 | ||||
| -rw-r--r-- | src/math/asin.c | 71 | ||||
| -rw-r--r-- | src/math/asinf.c | 51 | ||||
| -rw-r--r-- | src/math/asinl.c | 61 | ||||
| -rw-r--r-- | src/math/atan.c | 32 | ||||
| -rw-r--r-- | src/math/atan2l.c | 40 | ||||
| -rw-r--r-- | src/math/atanf.c | 28 | ||||
| -rw-r--r-- | src/math/atanl.c | 33 | 
12 files changed, 258 insertions, 377 deletions
| diff --git a/src/math/__invtrigl.c b/src/math/__invtrigl.c index a51330e2..f2d33d3e 100644 --- a/src/math/__invtrigl.c +++ b/src/math/__invtrigl.c @@ -1,36 +1,8 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.c */ -/*- - * Copyright (c) 2008 David Schultz <das@FreeBSD.ORG> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - *    notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - *    notice, this list of conditions and the following disclaimer in the - *    documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -  #include "__invtrigl.h"  #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -/* coefficients used in asinl() and acosl() */ -const long double +static const long double  pS0 =  1.66666666666666666631e-01L,  pS1 = -4.16313987993683104320e-01L,  pS2 =  3.69068046323246813704e-01L, @@ -44,9 +16,16 @@ qS3 = -1.68285799854822427013e+00L,  qS4 =  3.90699412641738801874e-01L,  qS5 = -3.14365703596053263322e-02L; -const long double pi_hi = 3.1415926535897932384626433832795L; -const long double pi_lo = -5.01655761266833202345e-20L;  const long double pio2_hi = 1.57079632679489661926L;  const long double pio2_lo = -2.50827880633416601173e-20L; +/* used in asinl() and acosl() */ +/* R(x^2) is a rational approximation of (asin(x)-x)/x^3 with Remez algorithm */ +long double __invtrigl_R(long double z) +{ +	long double p, q; +	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*(pS5+z*pS6)))))); +	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*(qS4+z*qS5)))); +	return p/q; +}  #endif diff --git a/src/math/__invtrigl.h b/src/math/__invtrigl.h index 22748b68..cac465c2 100644 --- a/src/math/__invtrigl.h +++ b/src/math/__invtrigl.h @@ -1,68 +1,10 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/ld80/invtrig.h */ -/*- - * Copyright (c) 2008 David Schultz <das@FreeBSD.ORG> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - *    notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - *    notice, this list of conditions and the following disclaimer in the - *    documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "libm.h" +#include <float.h>  #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 - -#define BIAS            (LDBL_MAX_EXP - 1) -#define MANH_SIZE       LDBL_MANH_SIZE - -/* Constants shared by the long double inverse trig functions. */ -#define pS0     __pS0 -#define pS1     __pS1 -#define pS2     __pS2 -#define pS3     __pS3 -#define pS4     __pS4 -#define pS5     __pS5 -#define pS6     __pS6 -#define qS1     __qS1 -#define qS2     __qS2 -#define qS3     __qS3 -#define qS4     __qS4 -#define qS5     __qS5 -#define pi_hi   __pi_hi -#define pi_lo   __pi_lo +/* shared by acosl, asinl and atan2l */  #define pio2_hi __pio2_hi  #define pio2_lo __pio2_lo +extern const long double pio2_hi, pio2_lo; -extern const long double pS0, pS1, pS2, pS3, pS4, pS5, pS6; -extern const long double qS1, qS2, qS3, qS4, qS5; -extern const long double pi_hi, pi_lo, pio2_hi, pio2_lo; - -static long double P(long double x) -{ -	return x * (pS0 + x * (pS1 + x * (pS2 + x * (pS3 + -		x * (pS4 + x * (pS5 + x * pS6)))))); -} - -static long double Q(long double x) -{ -	return 1.0 + x * (qS1 + x * (qS2 + x * (qS3 + x * (qS4 + x * qS5)))); -} - +long double __invtrigl_R(long double z);  #endif diff --git a/src/math/acos.c b/src/math/acos.c index 0eb15bed..be95d25e 100644 --- a/src/math/acos.c +++ b/src/math/acos.c @@ -36,12 +36,8 @@  #include "libm.h"  static const double -pi      = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ -pio2_hi = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ -// FIXME -static const volatile double -pio2_lo = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ -static const double +pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ +pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */  pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */  pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */  pS2 =  2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ @@ -53,49 +49,53 @@ qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */  qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */  qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ +static double R(double z) +{ +	double p, q; +	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); +	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); +	return p/q; +} +  double acos(double x)  { -	double z,p,q,r,w,s,c,df; -	int32_t hx,ix; +	double z,w,s,c,df; +	uint32_t hx,ix;  	GET_HIGH_WORD(hx, x);  	ix = hx & 0x7fffffff; -	if (ix >= 0x3ff00000) {  /* |x| >= 1 */ +	/* |x| >= 1 or nan */ +	if (ix >= 0x3ff00000) {  		uint32_t lx;  		GET_LOW_WORD(lx,x); -		if ((ix-0x3ff00000 | lx) == 0) {  /* |x|==1 */ -			if (hx > 0) return 0.0;  /* acos(1) = 0  */ -			return pi + 2.0*pio2_lo; /* acos(-1)= pi */ +		if ((ix-0x3ff00000 | lx) == 0) { +			/* acos(1)=0, acos(-1)=pi */ +			if (hx >> 31) +				return 2*pio2_hi + 0x1p-1000; +			return 0;  		} -		return (x-x)/(x-x);  /* acos(|x|>1) is NaN */ +		return 0/(x-x);  	} -	if (ix < 0x3fe00000) {   /* |x| < 0.5 */ +	/* |x| < 0.5 */ +	if (ix < 0x3fe00000) {  		if (ix <= 0x3c600000)  /* |x| < 2**-57 */ -			return pio2_hi + pio2_lo; -		z = x*x; -		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); -		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); -		r = p/q; -		return pio2_hi - (x - (pio2_lo-x*r)); -	} else if (hx < 0) {     /* x < -0.5 */ +			return pio2_hi + 0x1p-1000; +		return pio2_hi - (x - (pio2_lo-x*R(x*x))); +	} +	/* x < -0.5 */ +	if (hx >> 31) {  		z = (1.0+x)*0.5; -		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); -		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); -		s = sqrt(z); -		r = p/q; -		w = r*s-pio2_lo; -		return pi - 2.0*(s+w); -	} else {                 /* x > 0.5 */ -		z = (1.0-x)*0.5;  		s = sqrt(z); -		df = s; -		SET_LOW_WORD(df,0); -		c  = (z-df*df)/(s+df); -		p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); -		q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); -		r = p/q; -		w = r*s+c; -		return 2.0*(df+w); +		w = R(z)*s-pio2_lo; +		return 2*(pio2_hi - (s+w));  	} +	/* x > 0.5 */ +	z = (1.0-x)*0.5; +	s = sqrt(z); +	df = s; +	SET_LOW_WORD(df,0); +	c = (z-df*df)/(s+df); +	w = R(z)*s+c; +	return 2*(df+w);  } diff --git a/src/math/acosf.c b/src/math/acosf.c index d875f3ef..5d7c0270 100644 --- a/src/math/acosf.c +++ b/src/math/acosf.c @@ -16,59 +16,56 @@  #include "libm.h"  static const float -pi      = 3.1415925026e+00, /* 0x40490fda */ -pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */ -static const volatile float -pio2_lo = 7.5497894159e-08; /* 0x33a22168 */ -static const float +pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */ +pio2_lo = 7.5497894159e-08, /* 0x33a22168 */  pS0 =  1.6666586697e-01,  pS1 = -4.2743422091e-02,  pS2 = -8.6563630030e-03,  qS1 = -7.0662963390e-01; +static float R(float z) +{ +	float p, q; +	p = z*(pS0+z*(pS1+z*pS2)); +	q = 1.0f+z*qS1; +	return p/q; +} +  float acosf(float x)  { -	float z,p,q,r,w,s,c,df; -	int32_t hx,ix; +	float z,w,s,c,df; +	uint32_t hx,ix;  	GET_FLOAT_WORD(hx, x);  	ix = hx & 0x7fffffff; -	if (ix >= 0x3f800000) {  /* |x| >= 1 */ -		if (ix == 0x3f800000) {  /* |x| == 1 */ -			if (hx > 0) return 0.0f;  /* acos(1) = 0 */ -			return pi + 2.0f*pio2_lo;  /* acos(-1)= pi */ +	/* |x| >= 1 or nan */ +	if (ix >= 0x3f800000) { +		if (ix == 0x3f800000) { +			if (hx >> 31) +				return 2*pio2_hi + 0x1p-120f; +			return 0;  		} -		return (x-x)/(x-x);  /* acos(|x|>1) is NaN */ +		return 0/(x-x);  	} -	if (ix < 0x3f000000) {   /* |x| < 0.5 */ +	/* |x| < 0.5 */ +	if (ix < 0x3f000000) {  		if (ix <= 0x32800000) /* |x| < 2**-26 */ -			return pio2_hi + pio2_lo; -		z = x*x; -		p = z*(pS0+z*(pS1+z*pS2)); -		q = 1.0f+z*qS1; -		r = p/q; -		return pio2_hi - (x - (pio2_lo-x*r)); -	} else if (hx < 0) {     /* x < -0.5 */ -		z = (1.0f+x)*0.5f; -		p = z*(pS0+z*(pS1+z*pS2)); -		q = 1.0f+z*qS1; -		s = sqrtf(z); -		r = p/q; -		w = r*s-pio2_lo; -		return pi - 2.0f*(s+w); -	} else {                 /* x > 0.5 */ -		int32_t idf; - -		z = (1.0f-x)*0.5f; +			return pio2_hi + 0x1p-120f; +		return pio2_hi - (x - (pio2_lo-x*R(x*x))); +	} +	/* x < -0.5 */ +	if (hx >> 31) { +		z = (1+x)*0.5f;  		s = sqrtf(z); -		df = s; -		GET_FLOAT_WORD(idf,df); -		SET_FLOAT_WORD(df,idf&0xfffff000); -		c  = (z-df*df)/(s+df); -		p = z*(pS0+z*(pS1+z*pS2)); -		q = 1.0f+z*qS1; -		r = p/q; -		w = r*s+c; -		return 2.0f*(df+w); +		w = R(z)*s-pio2_lo; +		return 2*(pio2_hi - (s+w));  	} +	/* x > 0.5 */ +	z = (1-x)*0.5f; +	s = sqrtf(z); +	GET_FLOAT_WORD(hx,s); +	SET_FLOAT_WORD(df,hx&0xfffff000); +	c = (z-df*df)/(s+df); +	w = R(z)*s+c; +	return 2*(df+w);  } diff --git a/src/math/acosl.c b/src/math/acosl.c index 83857d49..9e9b01e4 100644 --- a/src/math/acosl.c +++ b/src/math/acosl.c @@ -23,55 +23,46 @@ long double acosl(long double x)  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384  #include "__invtrigl.h" -#define ACOS_CONST      (BIAS - 65)     /* 2**-65 */  long double acosl(long double x)  {  	union IEEEl2bits u; -	long double z, p, q, r, w, s, c, df; +	long double z, w, s, c, df;  	int16_t expsign, expt;  	u.e = x;  	expsign = u.xbits.expsign;  	expt = expsign & 0x7fff; -	if (expt >= BIAS) {        /* |x| >= 1 */ -		if (expt == BIAS && +	/* |x| >= 1 or nan */ +	if (expt >= 0x3fff) { +		if (expt == 0x3fff &&  			((u.bits.manh & ~LDBL_NBIT) | u.bits.manl) == 0) {  			if (expsign > 0) -				return 0.0;  /* acos(1) = 0 */ -			else -				// FIXME -				return pi_hi + 2.0 * pio2_lo;  /* acos(-1)= pi */ +				return 0;  /* acos(1) = 0 */ +			return 2*pio2_hi + 0x1p-1000;  /* acos(-1)= pi */  		} -		return (x - x) / (x - x);  /* acos(|x|>1) is NaN */ +		return 0/(x-x);  /* acos(|x|>1) is NaN */  	} -	if (expt < BIAS - 1) {     /* |x| < 0.5 */ -		if (expt < ACOS_CONST) -			return pio2_hi + pio2_lo;  /* x tiny: acosl=pi/2 */ -		z = x * x; -		p = P(z); -		q = Q(z); -		r = p / q; -		return pio2_hi - (x - (pio2_lo - x * r)); -	} else if (expsign < 0) {  /* x < -0.5 */ +	/* |x| < 0.5 */ +	if (expt < 0x3fff - 1) { +		if (expt < 0x3fff - 65) +			return pio2_hi + 0x1p-1000;  /* x < 0x1p-65: acosl(x)=pi/2 */ +		return pio2_hi - (x - (pio2_lo - x * __invtrigl_R(x*x))); +	} +	/* x < -0.5 */ +	if (expsign < 0) {  		z = (1.0 + x) * 0.5; -		p = P(z); -		q = Q(z); -		s = sqrtl(z); -		r = p / q; -		w = r * s - pio2_lo; -		return pi_hi - 2.0 * (s + w); -	} else {                   /* x > 0.5 */ -		z = (1.0 - x) * 0.5;  		s = sqrtl(z); -		u.e = s; -		u.bits.manl = 0; -		df = u.e; -		c = (z - df * df) / (s + df); -		p = P(z); -		q = Q(z); -		r = p / q; -		w = r * s + c; -		return 2.0 * (df + w); +		w = __invtrigl_R(z) * s - pio2_lo; +		return 2*(pio2_hi - (s + w));  	} +	/* x > 0.5 */ +	z = (1.0 - x) * 0.5; +	s = sqrtl(z); +	u.e = s; +	u.bits.manl = 0; +	df = u.e; +	c = (z - df * df) / (s + df); +	w = __invtrigl_R(z) * s + c; +	return 2*(df + w);  }  #endif diff --git a/src/math/asin.c b/src/math/asin.c index e3d8b250..a1906b08 100644 --- a/src/math/asin.c +++ b/src/math/asin.c @@ -42,10 +42,8 @@  #include "libm.h"  static const double -huge = 1.000e+300,  pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */  pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ -pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */  /* coefficients for R(x^2) */  pS0 =  1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */  pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ @@ -58,51 +56,54 @@ qS2 =  2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */  qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */  qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ +static double R(double z) +{ +	double p, q; +	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); +	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); +	return p/q; +} +  double asin(double x)  { -	double t=0.0,w,p,q,c,r,s; -	int32_t hx,ix; +	double z,r,s; +	uint32_t hx,ix;  	GET_HIGH_WORD(hx, x);  	ix = hx & 0x7fffffff; -	if (ix >= 0x3ff00000) {           /* |x|>= 1 */ +	/* |x| >= 1 or nan */ +	if (ix >= 0x3ff00000) {  		uint32_t lx; -  		GET_LOW_WORD(lx, x);  		if ((ix-0x3ff00000 | lx) == 0)  			/* asin(1) = +-pi/2 with inexact */ -			return x*pio2_hi + x*pio2_lo; -		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */ -	} else if (ix < 0x3fe00000) {  /* |x|<0.5 */ -		if (ix < 0x3e500000) {  /* if |x| < 2**-26 */ -			if (huge+x > 1.0) -				return x; /* return x with inexact if x!=0*/ +			return x*pio2_hi + 0x1p-1000; +		return 0/(x-x); +	} +	/* |x| < 0.5 */ +	if (ix < 0x3fe00000) { +		if (ix < 0x3e500000) { +			/* |x|<0x1p-26, return x with inexact if x!=0*/ +			FORCE_EVAL(x + 0x1p1000); +			return x;  		} -		t = x*x; -		p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); -		q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4))); -		w = p/q; -		return x + x*w; +		return x + x*R(x*x);  	}  	/* 1 > |x| >= 0.5 */ -	w = 1.0 - fabs(x); -	t = w*0.5; -	p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); -	q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4))); -	s = sqrt(t); -	if (ix >= 0x3FEF3333) {  /* if |x| > 0.975 */ -		w = p/q; -		t = pio2_hi-(2.0*(s+s*w)-pio2_lo); +	z = (1 - fabs(x))*0.5; +	s = sqrt(z); +	r = R(z); +	if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */ +		x = pio2_hi-(2*(s+s*r)-pio2_lo);  	} else { -		w = s; -		SET_LOW_WORD(w,0); -		c = (t-w*w)/(s+w); -		r = p/q; -		p = 2.0*s*r-(pio2_lo-2.0*c); -		q = pio4_hi - 2.0*w; -		t = pio4_hi - (p-q); +		double f,c; +		/* f+c = sqrt(z) */ +		f = s; +		SET_LOW_WORD(f,0); +		c = (z-f*f)/(s+f); +		x = 0.5*pio2_hi - (2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));  	} -	if (hx > 0) -		return t; -	return -t; +	if (hx >> 31) +		return -x; +	return x;  } diff --git a/src/math/asinf.c b/src/math/asinf.c index c1c42c7b..462bf043 100644 --- a/src/math/asinf.c +++ b/src/math/asinf.c @@ -12,52 +12,51 @@   * is preserved.   * ====================================================   */ -  #include "libm.h" +static const double +pio2 = 1.570796326794896558e+00; +  static const float -huge = 1.000e+30,  /* coefficients for R(x^2) */  pS0 =  1.6666586697e-01,  pS1 = -4.2743422091e-02,  pS2 = -8.6563630030e-03,  qS1 = -7.0662963390e-01; -static const double -pio2 = 1.570796326794896558e+00; +static float R(float z) +{ +	float p, q; +	p = z*(pS0+z*(pS1+z*pS2)); +	q = 1.0f+z*qS1; +	return p/q; +}  float asinf(float x)  {  	double s; -	float t,w,p,q; -	int32_t hx,ix; +	float z; +	uint32_t hx,ix;  	GET_FLOAT_WORD(hx, x);  	ix = hx & 0x7fffffff;  	if (ix >= 0x3f800000) {  /* |x| >= 1 */  		if (ix == 0x3f800000)  /* |x| == 1 */ -			return x*pio2;  /* asin(+-1) = +-pi/2 with inexact */ -		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */ -	} else if (ix < 0x3f000000) {  /* |x|<0.5 */ +			return x*pio2 + 0x1p-120f;  /* asin(+-1) = +-pi/2 with inexact */ +		return 0/(x-x);  /* asin(|x|>1) is NaN */ +	} +	if (ix < 0x3f000000) {  /* |x| < 0.5 */  		if (ix < 0x39800000) {  /* |x| < 2**-12 */ -			if (huge+x > 1.0f) -				return x; /* return x with inexact if x!=0 */ +			FORCE_EVAL(x + 0x1p120f); +			return x; /* return x with inexact if x!=0 */  		} -		t = x*x; -		p = t*(pS0+t*(pS1+t*pS2)); -		q = 1.0f+t*qS1; -		w = p/q; -		return x + x*w; +		return x + x*R(x*x);  	}  	/* 1 > |x| >= 0.5 */ -	w = 1.0f - fabsf(x); -	t = w*0.5f; -	p = t*(pS0+t*(pS1+t*pS2)); -	q = 1.0f+t*qS1; -	s = sqrt(t); -	w = p/q; -	t = pio2-2.0*(s+s*w); -	if (hx > 0) -		return t; -	return -t; +	z = (1 - fabsf(x))*0.5f; +	s = sqrt(z); +	x = pio2 - 2*(s+s*R(z)); +	if (hx >> 31) +		return -x; +	return x;  } diff --git a/src/math/asinl.c b/src/math/asinl.c index 7572767c..0ef9853b 100644 --- a/src/math/asinl.c +++ b/src/math/asinl.c @@ -23,60 +23,49 @@ long double asinl(long double x)  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384  #include "__invtrigl.h" -static const long double huge = 1.000e+300; -static const long double pio4_hi = 7.85398163397448309628e-01L; -#define ASIN_LINEAR     (BIAS - 32)     /* 2**-32 */  /* 0.95 */ -#define THRESH  ((0xe666666666666666ULL>>(64-(MANH_SIZE-1)))|LDBL_NBIT) +#define THRESH  ((0xe666666666666666ULL>>(64-(LDBL_MANH_SIZE-1)))|LDBL_NBIT)  long double asinl(long double x)  {  	union IEEEl2bits u; -	long double t=0.0,w,p,q,c,r,s; -	int16_t expsign, expt; +	long double z,r,s; +	uint16_t expsign, expt;  	u.e = x;  	expsign = u.xbits.expsign;  	expt = expsign & 0x7fff; -	if (expt >= BIAS) {          /* |x|>= 1 */ -		if (expt == BIAS && +	if (expt >= 0x3fff) {   /* |x| >= 1 or nan */ +		if (expt == 0x3fff &&  		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl) == 0) -			/* asin(1)=+-pi/2 with inexact */ -			return x*pio2_hi + x*pio2_lo; -		return (x-x)/(x-x);  /* asin(|x|>1) is NaN */ -	} else if (expt < BIAS-1) {  /* |x|<0.5 */ -		if (expt < ASIN_LINEAR) {  /* if |x| is small, asinl(x)=x */ +			/* asin(+-1)=+-pi/2 with inexact */ +			return x*pio2_hi + 0x1p-1000; +		return 0/(x-x); +	} +	if (expt < 0x3fff - 1) {  /* |x| < 0.5 */ +		if (expt < 0x3fff - 32) {  /* |x|<0x1p-32, asinl(x)=x */  			/* return x with inexact if x!=0 */ -			if (huge+x > 1.0) -				return x; +			FORCE_EVAL(x + 0x1p1000); +			return x;  		} -		t = x*x; -		p = P(t); -		q = Q(t); -		w = p/q; -		return x + x*w; +		return x + x*__invtrigl_R(x*x);  	}  	/* 1 > |x| >= 0.5 */ -	w = 1.0 - fabsl(x); -	t = w*0.5; -	p = P(t); -	q = Q(t); -	s = sqrtl(t); +	z = (1.0 - fabsl(x))*0.5; +	s = sqrtl(z); +	r = __invtrigl_R(z);  	if (u.bits.manh >= THRESH) { /* if |x| is close to 1 */ -		w = p/q; -		t = pio2_hi-(2.0*(s+s*w)-pio2_lo); +		x = pio2_hi - (2*(s+s*r)-pio2_lo);  	} else { +		long double f, c;  		u.e = s;  		u.bits.manl = 0; -		w = u.e; -		c = (t-w*w)/(s+w); -		r = p/q; -		p = 2.0*s*r-(pio2_lo-2.0*c); -		q = pio4_hi-2.0*w; -		t = pio4_hi-(p-q); +		f = u.e; +		c = (z-f*f)/(s+f); +		x = 0.5*pio2_hi-(2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));  	} -	if (expsign > 0) -		return t; -	return -t; +	if (expsign>>15) +		return -x; +	return x;  }  #endif diff --git a/src/math/atan.c b/src/math/atan.c index f34551c7..7fd8a3b8 100644 --- a/src/math/atan.c +++ b/src/math/atan.c @@ -60,32 +60,26 @@ static const double aT[] = {    1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */  }; -static const double huge = 1.0e300; -  double atan(double x)  {  	double w,s1,s2,z; -	int32_t ix,hx,id; +	uint32_t ix,sign; +	int id; -	GET_HIGH_WORD(hx, x); -	ix = hx & 0x7fffffff; +	GET_HIGH_WORD(ix, x); +	sign = ix >> 31; +	ix &= 0x7fffffff;  	if (ix >= 0x44100000) {   /* if |x| >= 2^66 */ -		uint32_t low; - -		GET_LOW_WORD(low, x); -		if (ix > 0x7ff00000 || -		    (ix == 0x7ff00000 && low != 0))  /* NaN */ -			return x+x; -		if (hx > 0) -			return  atanhi[3] + *(volatile double *)&atanlo[3]; -		else -			return -atanhi[3] - *(volatile double *)&atanlo[3]; +		if (isnan(x)) +			return x; +		z = atanhi[3] + 0x1p-1000; +		return sign ? -z : z;  	}  	if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */  		if (ix < 0x3e400000) {  /* |x| < 2^-27 */ -			/* raise inexact */ -			if (huge+x > 1.0) -				return x; +			/* raise inexact if x!=0 */ +			FORCE_EVAL(x + 0x1p1000); +			return x;  		}  		id = -1;  	} else { @@ -117,5 +111,5 @@ double atan(double x)  	if (id < 0)  		return x - x*(s1+s2);  	z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x); -	return hx < 0 ? -z : z; +	return sign ? -z : z;  } diff --git a/src/math/atan2l.c b/src/math/atan2l.c index 45cbfcc1..e86dbffb 100644 --- a/src/math/atan2l.c +++ b/src/math/atan2l.c @@ -24,8 +24,6 @@ long double atan2l(long double y, long double x)  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384  #include "__invtrigl.h" -// FIXME: -static const volatile long double tiny = 1.0e-300;  long double atan2l(long double y, long double x)  { @@ -40,12 +38,12 @@ long double atan2l(long double y, long double x)  	ux.e = x;  	expsignx = ux.xbits.expsign;  	exptx = expsignx & 0x7fff; -	if ((exptx==BIAS+LDBL_MAX_EXP && +	if ((exptx==0x7fff &&  	     ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)!=0) || /* x is NaN */ -	    (expty==BIAS+LDBL_MAX_EXP && +	    (expty==0x7fff &&  	     ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl)!=0))   /* y is NaN */  		return x+y; -	if (expsignx==BIAS && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */ +	if (expsignx==0x3fff && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) /* x=1.0 */  		return atanl(y);  	m = ((expsigny>>15)&1) | ((expsignx>>14)&2);  /* 2*sign(x)+sign(y) */ @@ -54,39 +52,39 @@ long double atan2l(long double y, long double x)  		switch(m) {  		case 0:  		case 1: return y;           /* atan(+-0,+anything)=+-0 */ -		case 2: return  pi_hi+tiny; /* atan(+0,-anything) = pi */ -		case 3: return -pi_hi-tiny; /* atan(-0,-anything) =-pi */ +		case 2: return  2*pio2_hi+0x1p-1000; /* atan(+0,-anything) = pi */ +		case 3: return -2*pio2_hi-0x1p-1000; /* atan(-0,-anything) =-pi */  		}  	}  	/* when x = 0 */  	if (exptx==0 && ((ux.bits.manh&~LDBL_NBIT)|ux.bits.manl)==0) -		return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny; +		return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000;  	/* when x is INF */ -	if (exptx == BIAS+LDBL_MAX_EXP) { -		if (expty == BIAS+LDBL_MAX_EXP) { +	if (exptx == 0x7fff) { +		if (expty == 0x7fff) {  			switch(m) { -			case 0: return  pio2_hi*0.5+tiny; /* atan(+INF,+INF) */ -			case 1: return -pio2_hi*0.5-tiny; /* atan(-INF,+INF) */ -			case 2: return  1.5*pio2_hi+tiny; /* atan(+INF,-INF) */ -			case 3: return -1.5*pio2_hi-tiny; /* atan(-INF,-INF) */ +			case 0: return  pio2_hi*0.5+0x1p-1000; /* atan(+INF,+INF) */ +			case 1: return -pio2_hi*0.5-0x1p-1000; /* atan(-INF,+INF) */ +			case 2: return  1.5*pio2_hi+0x1p-1000; /* atan(+INF,-INF) */ +			case 3: return -1.5*pio2_hi-0x1p-1000; /* atan(-INF,-INF) */  			}  		} else {  			switch(m) {  			case 0: return  0.0;        /* atan(+...,+INF) */  			case 1: return -0.0;        /* atan(-...,+INF) */ -			case 2: return  pi_hi+tiny; /* atan(+...,-INF) */ -			case 3: return -pi_hi-tiny; /* atan(-...,-INF) */ +			case 2: return  2*pio2_hi+0x1p-1000; /* atan(+...,-INF) */ +			case 3: return -2*pio2_hi-0x1p-1000; /* atan(-...,-INF) */  			}  		}  	}  	/* when y is INF */ -	if (expty == BIAS+LDBL_MAX_EXP) -		return expsigny < 0 ? -pio2_hi-tiny : pio2_hi+tiny; +	if (expty == 0x7fff) +		return expsigny < 0 ? -pio2_hi-0x1p-1000 : pio2_hi+0x1p-1000;  	/* compute y/x */  	k = expty-exptx;  	if(k > LDBL_MANT_DIG+2) { /* |y/x| huge */ -		z = pio2_hi+pio2_lo; +		z = pio2_hi+0x1p-1000;  		m &= 1;  	} else if (expsignx < 0 && k < -LDBL_MANT_DIG-2) /* |y/x| tiny, x<0 */  		z = 0.0; @@ -95,9 +93,9 @@ long double atan2l(long double y, long double x)  	switch (m) {  	case 0: return z;               /* atan(+,+) */  	case 1: return -z;              /* atan(-,+) */ -	case 2: return pi_hi-(z-pi_lo); /* atan(+,-) */ +	case 2: return 2*pio2_hi-(z-2*pio2_lo); /* atan(+,-) */  	default: /* case 3 */ -		return (z-pi_lo)-pi_hi; /* atan(-,-) */ +		return (z-2*pio2_lo)-2*pio2_hi; /* atan(-,-) */  	}  }  #endif diff --git a/src/math/atanf.c b/src/math/atanf.c index 4e31b902..4b59509a 100644 --- a/src/math/atanf.c +++ b/src/math/atanf.c @@ -38,28 +38,26 @@ static const float aT[] = {    6.1687607318e-02,  }; -static const float huge = 1.0e30; -  float atanf(float x)  {  	float w,s1,s2,z; -	int32_t ix,hx,id; +	uint32_t ix,sign; +	int id; -	GET_FLOAT_WORD(hx, x); -	ix = hx & 0x7fffffff; +	GET_FLOAT_WORD(ix, x); +	sign = ix>>31; +	ix &= 0x7fffffff;  	if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */ -		if (ix > 0x7f800000)  /* NaN */ -			return x+x; -		if (hx > 0) -			return  atanhi[3] + *(volatile float *)&atanlo[3]; -		else -			return -atanhi[3] - *(volatile float *)&atanlo[3]; +		if (isnan(x)) +			return x; +		z = atanhi[3] + 0x1p-120f; +		return sign ? -z : z;  	}  	if (ix < 0x3ee00000) {   /* |x| < 0.4375 */  		if (ix < 0x39800000) {  /* |x| < 2**-12 */ -			/* raise inexact */ -			if(huge+x>1.0f) -				return x; +			/* raise inexact if x!=0 */ +			FORCE_EVAL(x + 0x1p120f); +			return x;  		}  		id = -1;  	} else { @@ -91,5 +89,5 @@ float atanf(float x)  	if (id < 0)  		return x - x*(s1+s2);  	z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); -	return hx < 0 ? -z : z; +	return sign ? -z : z;  } diff --git a/src/math/atanl.c b/src/math/atanl.c index 33ecff0d..6a480a7a 100644 --- a/src/math/atanl.c +++ b/src/math/atanl.c @@ -22,11 +22,6 @@ long double atanl(long double x)  	return atan(x);  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 -#include "__invtrigl.h" - -#define ATAN_CONST      (BIAS + 65)     /* 2**65 */ -#define ATAN_LINEAR     (BIAS - 32)     /* 2**-32 */ -static const long double huge = 1.0e300;  static const long double atanhi[] = {  	 4.63647609000806116202e-01L, @@ -81,29 +76,27 @@ long double atanl(long double x)  	u.e = x;  	expsign = u.xbits.expsign;  	expt = expsign & 0x7fff; -	if (expt >= ATAN_CONST) { /* if |x| is large, atan(x)~=pi/2 */ -		if (expt == BIAS + LDBL_MAX_EXP && +	if (expt >= 0x3fff + 65) { /* if |x| is large, atan(x)~=pi/2 */ +		if (expt == 0x7fff &&  		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0)  /* NaN */  			return x+x; -		if (expsign > 0) -			return  atanhi[3]+atanlo[3]; -		else -			return -atanhi[3]-atanlo[3]; +		z = atanhi[3] + 0x1p-1000; +		return expsign < 0 ? -z : z;  	}  	/* Extract the exponent and the first few bits of the mantissa. */  	/* XXX There should be a more convenient way to do this. */ -	expman = (expt << 8) | ((u.bits.manh >> (MANH_SIZE - 9)) & 0xff); -	if (expman < ((BIAS - 2) << 8) + 0xc0) {  /* |x| < 0.4375 */ -		if (expt < ATAN_LINEAR) {   /* if |x| is small, atanl(x)~=x */ -			/* raise inexact */ -			if (huge+x > 1.0) -				return x; +	expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff); +	if (expman < ((0x3fff - 2) << 8) + 0xc0) {  /* |x| < 0.4375 */ +		if (expt < 0x3fff - 32) {   /* if |x| is small, atanl(x)~=x */ +			/* raise inexact if x!=0 */ +			FORCE_EVAL(x + 0x1p1000); +			return x;  		}  		id = -1;  	} else {  		x = fabsl(x); -		if (expman < (BIAS << 8) + 0x30) {  /* |x| < 1.1875 */ -			if (expman < ((BIAS - 1) << 8) + 0x60) { /*  7/16 <= |x| < 11/16 */ +		if (expman < (0x3fff << 8) + 0x30) {  /* |x| < 1.1875 */ +			if (expman < ((0x3fff - 1) << 8) + 0x60) { /*  7/16 <= |x| < 11/16 */  				id = 0;  				x = (2.0*x-1.0)/(2.0+x);  			} else {                                 /* 11/16 <= |x| < 19/16 */ @@ -111,7 +104,7 @@ long double atanl(long double x)  				x = (x-1.0)/(x+1.0);  			}  		} else { -			if (expman < ((BIAS + 1) << 8) + 0x38) { /* |x| < 2.4375 */ +			if (expman < ((0x3fff + 1) << 8) + 0x38) { /* |x| < 2.4375 */  				id = 2;  				x = (x-1.5)/(1.0+1.5*x);  			} else {                                 /* 2.4375 <= |x| < 2^ATAN_CONST */ | 
