diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/math/__invtrigl.c | 34 | ||||
| -rw-r--r-- | src/math/__invtrigl.h | 4 | ||||
| -rw-r--r-- | src/math/acosl.c | 57 | ||||
| -rw-r--r-- | src/math/asinl.c | 44 | ||||
| -rw-r--r-- | src/math/atan2l.c | 50 | ||||
| -rw-r--r-- | src/math/atanl.c | 94 | 
6 files changed, 180 insertions, 103 deletions
diff --git a/src/math/__invtrigl.c b/src/math/__invtrigl.c index f2d33d3e..ef7f4e1b 100644 --- a/src/math/__invtrigl.c +++ b/src/math/__invtrigl.c @@ -1,7 +1,7 @@ +#include <float.h>  #include "__invtrigl.h"  #if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 -  static const long double  pS0 =  1.66666666666666666631e-01L,  pS1 = -4.16313987993683104320e-01L, @@ -28,4 +28,36 @@ long double __invtrigl_R(long double z)  	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*(qS4+z*qS5))));  	return p/q;  } +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 +static const long double +pS0 =  1.66666666666666666666666666666700314e-01L, +pS1 = -7.32816946414566252574527475428622708e-01L, +pS2 =  1.34215708714992334609030036562143589e+00L, +pS3 = -1.32483151677116409805070261790752040e+00L, +pS4 =  7.61206183613632558824485341162121989e-01L, +pS5 = -2.56165783329023486777386833928147375e-01L, +pS6 =  4.80718586374448793411019434585413855e-02L, +pS7 = -4.42523267167024279410230886239774718e-03L, +pS8 =  1.44551535183911458253205638280410064e-04L, +pS9 = -2.10558957916600254061591040482706179e-07L, +qS1 = -4.84690167848739751544716485245697428e+00L, +qS2 =  9.96619113536172610135016921140206980e+00L, +qS3 = -1.13177895428973036660836798461641458e+01L, +qS4 =  7.74004374389488266169304117714658761e+00L, +qS5 = -3.25871986053534084709023539900339905e+00L, +qS6 =  8.27830318881232209752469022352928864e-01L, +qS7 = -1.18768052702942805423330715206348004e-01L, +qS8 =  8.32600764660522313269101537926539470e-03L, +qS9 = -1.99407384882605586705979504567947007e-04L; + +const long double pio2_hi = 1.57079632679489661923132169163975140L; +const long double pio2_lo = 4.33590506506189051239852201302167613e-35L; + +long double __invtrigl_R(long double z) +{ +	long double p, q; +	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*(pS5+z*(pS6+z*(pS7+z*(pS8+z*pS9))))))))); +	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*(qS4+z*(qS5+z*(pS6+z*(pS7+z*(pS8+z*pS9)))))))); +	return p/q; +}  #endif diff --git a/src/math/__invtrigl.h b/src/math/__invtrigl.h index cac465c2..91a8a3b6 100644 --- a/src/math/__invtrigl.h +++ b/src/math/__invtrigl.h @@ -1,10 +1,6 @@ -#include <float.h> - -#if LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384  /* shared by acosl, asinl and atan2l */  #define pio2_hi __pio2_hi  #define pio2_lo __pio2_lo  extern const long double pio2_hi, pio2_lo;  long double __invtrigl_R(long double z); -#endif diff --git a/src/math/acosl.c b/src/math/acosl.c index 9e7b7fb3..c03bdf02 100644 --- a/src/math/acosl.c +++ b/src/math/acosl.c @@ -23,46 +23,45 @@ long double acosl(long double x)  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384  #include "__invtrigl.h" +#if LDBL_MANT_DIG == 64 +#define CLEARBOTTOM(u) (u.i.m &= -1ULL << 32) +#elif LDBL_MANT_DIG == 113 +#define CLEARBOTTOM(u) (u.i.lo = 0) +#endif  long double acosl(long double x)  { -	union IEEEl2bits u; -	long double z, w, s, c, df; -	int16_t expsign, expt; -	u.e = x; -	expsign = u.xbits.expsign; -	expt = expsign & 0x7fff; +	union ldshape u = {x}; +	long double z, s, c, f; +	uint16_t e = u.i.se & 0x7fff; +  	/* |x| >= 1 or nan */ -	if (expt >= 0x3fff) { -		if (expt == 0x3fff && -			((u.bits.manh & ~LDBL_NBIT) | u.bits.manl) == 0) { -			if (expsign > 0) -				return 0;  /* acos(1) = 0 */ -			return 2*pio2_hi + 0x1p-120f;  /* acos(-1)= pi */ -		} -		return 0/(x-x);  /* acos(|x|>1) is NaN */ +	if (e >= 0x3fff) { +		if (x == 1) +			return 0; +		if (x == -1) +			return 2*pio2_hi + 0x1p-120f; +		return 0/(x-x);  	}  	/* |x| < 0.5 */ -	if (expt < 0x3fff - 1) { -		if (expt < 0x3fff - 65) -			return pio2_hi + 0x1p-120f;  /* x < 0x1p-65: acosl(x)=pi/2 */ -		return pio2_hi - (x - (pio2_lo - x * __invtrigl_R(x*x))); +	if (e < 0x3fff - 1) { +		if (e < 0x3fff - LDBL_MANT_DIG - 1) +			return pio2_hi + 0x1p-120f; +		return pio2_hi - (__invtrigl_R(x*x)*x - pio2_lo + x);  	}  	/* x < -0.5 */ -	if (expsign < 0) { -		z = (1.0 + x) * 0.5; +	if (u.i.se >> 15) { +		z = (1 + x)*0.5;  		s = sqrtl(z); -		w = __invtrigl_R(z) * s - pio2_lo; -		return 2*(pio2_hi - (s + w)); +		return 2*(pio2_hi - (__invtrigl_R(z)*s - pio2_lo + s));  	}  	/* x > 0.5 */ -	z = (1.0 - x) * 0.5; +	z = (1 - x)*0.5;  	s = sqrtl(z); -	u.e = s; -	u.bits.manl = 0; -	df = u.e; -	c = (z - df * df) / (s + df); -	w = __invtrigl_R(z) * s + c; -	return 2*(df + w); +	u.f = s; +	CLEARBOTTOM(u); +	f = u.f; +	c = (z - f*f)/(s + f); +	return 2*(__invtrigl_R(z)*s + c + f);  }  #endif diff --git a/src/math/asinl.c b/src/math/asinl.c index 8799341d..347c5356 100644 --- a/src/math/asinl.c +++ b/src/math/asinl.c @@ -23,27 +23,29 @@ long double asinl(long double x)  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384  #include "__invtrigl.h" -/* 0.95 */ -#define THRESH  ((0xe666666666666666ULL>>(64-(LDBL_MANH_SIZE-1)))|LDBL_NBIT) +#if LDBL_MANT_DIG == 64 +#define CLOSETO1(u) (u.i.m>>56 >= 0xf7) +#define CLEARBOTTOM(u) (u.i.m &= -1ULL << 32) +#elif LDBL_MANT_DIG == 113 +#define CLOSETO1(u) (u.i.top >= 0xee00) +#define CLEARBOTTOM(u) (u.i.lo = 0) +#endif  long double asinl(long double x)  { -	union IEEEl2bits u; -	long double z,r,s; -	uint16_t expsign, expt; +	union ldshape u = {x}; +	long double z, r, s; +	uint16_t e = u.i.se & 0x7fff; +	int sign = u.i.se >> 15; -	u.e = x; -	expsign = u.xbits.expsign; -	expt = expsign & 0x7fff; -	if (expt >= 0x3fff) {   /* |x| >= 1 or nan */ -		if (expt == 0x3fff && -		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl) == 0) -			/* asin(+-1)=+-pi/2 with inexact */ +	if (e >= 0x3fff) {   /* |x| >= 1 or nan */ +		/* asin(+-1)=+-pi/2 with inexact */ +		if (x == 1 || x == -1)  			return x*pio2_hi + 0x1p-120f;  		return 0/(x-x);  	} -	if (expt < 0x3fff - 1) {  /* |x| < 0.5 */ -		if (expt < 0x3fff - 32) {  /* |x|<0x1p-32, asinl(x)=x */ +	if (e < 0x3fff - 1) {  /* |x| < 0.5 */ +		if (e < 0x3fff - (LDBL_MANT_DIG+1)/2) {  			/* return x with inexact if x!=0 */  			FORCE_EVAL(x + 0x1p120f);  			return x; @@ -54,18 +56,16 @@ long double asinl(long double x)  	z = (1.0 - fabsl(x))*0.5;  	s = sqrtl(z);  	r = __invtrigl_R(z); -	if (u.bits.manh >= THRESH) { /* if |x| is close to 1 */ +	if (CLOSETO1(u)) {  		x = pio2_hi - (2*(s+s*r)-pio2_lo);  	} else {  		long double f, c; -		u.e = s; -		u.bits.manl = 0; -		f = u.e; -		c = (z-f*f)/(s+f); +		u.f = s; +		CLEARBOTTOM(u); +		f = u.f; +		c = (z - f*f)/(s + f);  		x = 0.5*pio2_hi-(2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));  	} -	if (expsign>>15) -		return -x; -	return x; +	return sign ? -x : x;  }  #endif diff --git a/src/math/atan2l.c b/src/math/atan2l.c index e0167d09..f0937a97 100644 --- a/src/math/atan2l.c +++ b/src/math/atan2l.c @@ -27,56 +27,50 @@ long double atan2l(long double y, long double x)  long double atan2l(long double y, long double x)  { -	union IEEEl2bits ux, uy; +	union ldshape ux, uy;  	long double z; -	int m; -	uint16_t exptx, expsignx, expty, expsigny; +	int m, ex, ey;  	if (isnan(x) || isnan(y))  		return x+y;  	if (x == 1)  		return atanl(y); -	uy.e = y; -	expsigny = uy.xbits.expsign; -	expty = expsigny & 0x7fff; -	ux.e = x; -	expsignx = ux.xbits.expsign; -	exptx = expsignx & 0x7fff; -	m = ((expsigny>>15)&1) | ((expsignx>>14)&2);  /* 2*sign(x)+sign(y) */ - +	ux.f = x; +	uy.f = y; +	ex = ux.i.se & 0x7fff; +	ey = uy.i.se & 0x7fff; +	m = 2*(ux.i.se>>15) | uy.i.se>>15;  	if (y == 0) {  		switch(m) {  		case 0:  		case 1: return y;           /* atan(+-0,+anything)=+-0 */ -		case 2: return  2*pio2_hi+0x1p-120f; /* atan(+0,-anything) = pi */ -		case 3: return -2*pio2_hi-0x1p-120f; /* atan(-0,-anything) =-pi */ +		case 2: return  2*pio2_hi;  /* atan(+0,-anything) = pi */ +		case 3: return -2*pio2_hi;  /* atan(-0,-anything) =-pi */  		}  	}  	if (x == 0) -		return m&1 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f; -	/* when x is INF */ -	if (exptx == 0x7fff) { -		if (expty == 0x7fff) { +		return m&1 ? -pio2_hi : pio2_hi; +	if (ex == 0x7fff) { +		if (ey == 0x7fff) {  			switch(m) { -			case 0: return  pio2_hi*0.5+0x1p-120f; /* atan(+INF,+INF) */ -			case 1: return -pio2_hi*0.5-0x1p-120f; /* atan(-INF,+INF) */ -			case 2: return  1.5*pio2_hi+0x1p-120f; /* atan(+INF,-INF) */ -			case 3: return -1.5*pio2_hi-0x1p-120f; /* atan(-INF,-INF) */ +			case 0: return  pio2_hi/2;   /* atan(+INF,+INF) */ +			case 1: return -pio2_hi/2;   /* atan(-INF,+INF) */ +			case 2: return  1.5*pio2_hi; /* atan(+INF,-INF) */ +			case 3: return -1.5*pio2_hi; /* atan(-INF,-INF) */  			}  		} else {  			switch(m) {  			case 0: return  0.0;        /* atan(+...,+INF) */  			case 1: return -0.0;        /* atan(-...,+INF) */ -			case 2: return  2*pio2_hi+0x1p-120f; /* atan(+...,-INF) */ -			case 3: return -2*pio2_hi-0x1p-120f; /* atan(-...,-INF) */ +			case 2: return  2*pio2_hi;  /* atan(+...,-INF) */ +			case 3: return -2*pio2_hi;  /* atan(-...,-INF) */  			}  		}  	} -	/* when y is INF */ -	if (exptx+120 < expty || expty == 0x7fff) -		return m&1 ? -pio2_hi-0x1p-120f : pio2_hi+0x1p-120f; - -	if ((m&2) && expty+120 < exptx) /* |y/x| tiny, x<0 */ +	if (ex+120 < ey || ey == 0x7fff) +		return m&1 ? -pio2_hi : pio2_hi; +	/* z = atan(|y/x|) without spurious underflow */ +	if ((m&2) && ey+120 < ex)  /* |y/x| < 0x1p-120, x<0 */  		z = 0.0;  	else  		z = atanl(fabsl(y/x)); diff --git a/src/math/atanl.c b/src/math/atanl.c index d29e6316..79a3edb8 100644 --- a/src/math/atanl.c +++ b/src/math/atanl.c @@ -23,6 +23,9 @@ long double atanl(long double x)  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 +#if LDBL_MANT_DIG == 64 +#define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | (u.i.m>>55 & 0xff)) +  static const long double atanhi[] = {  	 4.63647609000806116202e-01L,  	 7.85398163397448309628e-01L, @@ -64,32 +67,85 @@ static long double T_odd(long double x)  	return aT[1] + x * (aT[3] + x * (aT[5] + x * (aT[7] +  		x * (aT[9] + x * aT[11]))));  } +#elif LDBL_MANT_DIG == 113 +#define EXPMAN(u) ((u.i.se & 0x7fff)<<8 | u.i.top>>8) + +const long double atanhi[] = { +	 4.63647609000806116214256231461214397e-01L, +	 7.85398163397448309615660845819875699e-01L, +	 9.82793723247329067985710611014666038e-01L, +	 1.57079632679489661923132169163975140e+00L, +}; + +const long double atanlo[] = { +	 4.89509642257333492668618435220297706e-36L, +	 2.16795253253094525619926100651083806e-35L, +	-2.31288434538183565909319952098066272e-35L, +	 4.33590506506189051239852201302167613e-35L, +}; + +const long double aT[] = { +	 3.33333333333333333333333333333333125e-01L, +	-1.99999999999999999999999999999180430e-01L, +	 1.42857142857142857142857142125269827e-01L, +	-1.11111111111111111111110834490810169e-01L, +	 9.09090909090909090908522355708623681e-02L, +	-7.69230769230769230696553844935357021e-02L, +	 6.66666666666666660390096773046256096e-02L, +	-5.88235294117646671706582985209643694e-02L, +	 5.26315789473666478515847092020327506e-02L, +	-4.76190476189855517021024424991436144e-02L, +	 4.34782608678695085948531993458097026e-02L, +	-3.99999999632663469330634215991142368e-02L, +	 3.70370363987423702891250829918659723e-02L, +	-3.44827496515048090726669907612335954e-02L, +	 3.22579620681420149871973710852268528e-02L, +	-3.03020767654269261041647570626778067e-02L, +	 2.85641979882534783223403715930946138e-02L, +	-2.69824879726738568189929461383741323e-02L, +	 2.54194698498808542954187110873675769e-02L, +	-2.35083879708189059926183138130183215e-02L, +	 2.04832358998165364349957325067131428e-02L, +	-1.54489555488544397858507248612362957e-02L, +	 8.64492360989278761493037861575248038e-03L, +	-2.58521121597609872727919154569765469e-03L, +}; + +static long double T_even(long double x) +{ +	return (aT[0] + x * (aT[2] + x * (aT[4] + x * (aT[6] + x * (aT[8] + +		x * (aT[10] + x * (aT[12] + x * (aT[14] + x * (aT[16] + +		x * (aT[18] + x * (aT[20] + x * aT[22]))))))))))); +} + +static long double T_odd(long double x) +{ +	return (aT[1] + x * (aT[3] + x * (aT[5] + x * (aT[7] + x * (aT[9] + +		x * (aT[11] + x * (aT[13] + x * (aT[15] + x * (aT[17] + +		x * (aT[19] + x * (aT[21] + x * aT[23]))))))))))); +} +#endif  long double atanl(long double x)  { -	union IEEEl2bits u; -	long double w,s1,s2,z; +	union ldshape u = {x}; +	long double w, s1, s2, z;  	int id; -	uint16_t expsign, expt; -	uint32_t expman; +	unsigned e = u.i.se & 0x7fff; +	unsigned sign = u.i.se >> 15; +	unsigned expman; -	u.e = x; -	expsign = u.xbits.expsign; -	expt = expsign & 0x7fff; -	if (expt >= 0x3fff + 65) { /* if |x| is large, atan(x)~=pi/2 */ -		if (expt == 0x7fff && -		    ((u.bits.manh&~LDBL_NBIT)|u.bits.manl)!=0)  /* NaN */ -			return x+x; -		z = atanhi[3] + 0x1p-120f; -		return expsign>>15 ? -z : z; +	if (e >= 0x3fff + LDBL_MANT_DIG + 1) { /* if |x| is large, atan(x)~=pi/2 */ +		if (isnan(x)) +			return x; +		return sign ? -atanhi[3] : atanhi[3];  	}  	/* Extract the exponent and the first few bits of the mantissa. */ -	/* XXX There should be a more convenient way to do this. */ -	expman = (expt << 8) | ((u.bits.manh >> (LDBL_MANH_SIZE - 9)) & 0xff); +	expman = EXPMAN(u);  	if (expman < ((0x3fff - 2) << 8) + 0xc0) {  /* |x| < 0.4375 */ -		if (expt < 0x3fff - 32) {   /* if |x| is small, atanl(x)~=x */ +		if (e < 0x3fff - (LDBL_MANT_DIG+1)/2) {   /* if |x| is small, atanl(x)~=x */  			/* raise underflow if subnormal */ -			if (expt == 0) +			if (e == 0)  				FORCE_EVAL((float)x);  			return x;  		} @@ -108,7 +164,7 @@ long double atanl(long double x)  			if (expman < ((0x3fff + 1) << 8) + 0x38) { /* |x| < 2.4375 */  				id = 2;  				x = (x-1.5)/(1.0+1.5*x); -			} else {                                 /* 2.4375 <= |x| < 2^ATAN_CONST */ +			} else {                                 /* 2.4375 <= |x| */  				id = 3;  				x = -1.0/x;  			} @@ -123,6 +179,6 @@ long double atanl(long double x)  	if (id < 0)  		return x - x*(s1+s2);  	z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); -	return expsign>>15 ? -z : z; +	return sign ? -z : z;  }  #endif  | 
