diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-09-04 15:51:05 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-09-05 11:30:08 +0000 | 
| commit | 535104ab6a2d6f22098f79e7107963e3fc3448a3 (patch) | |
| tree | 7e10076505696c7904e186743636e6c664b189ed | |
| parent | 39c910fb061114e6aa5c3bf2c94b1d7262d62221 (diff) | |
| download | musl-535104ab6a2d6f22098f79e7107963e3fc3448a3.tar.gz | |
math: cbrt cleanup and long double fix
* use float_t and double_t
* cleanup subnormal handling
* bithacks according to the new convention (ldshape for long double
and explicit unions for float and double)
| -rw-r--r-- | src/math/cbrt.c | 36 | ||||
| -rw-r--r-- | src/math/cbrtf.c | 31 | ||||
| -rw-r--r-- | src/math/cbrtl.c | 64 | 
3 files changed, 59 insertions, 72 deletions
| diff --git a/src/math/cbrt.c b/src/math/cbrt.c index f4253428..7599d3e3 100644 --- a/src/math/cbrt.c +++ b/src/math/cbrt.c @@ -15,7 +15,8 @@   * Return cube root of x   */ -#include "libm.h" +#include <math.h> +#include <stdint.h>  static const uint32_t  B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */ @@ -31,15 +32,10 @@ P4 =  0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */  double cbrt(double x)  { -	int32_t hx; -	union dshape u; -	double r,s,t=0.0,w; -	uint32_t sign; -	uint32_t high,low; +	union {double f; uint64_t i;} u = {x}; +	double_t r,s,t,w; +	uint32_t hx = u.i>>32 & 0x7fffffff; -	EXTRACT_WORDS(hx, low, x); -	sign = hx & 0x80000000; -	hx ^= sign;  	if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */  		return x+x; @@ -59,14 +55,16 @@ double cbrt(double x)  	 * division rounds towards minus infinity; this is also efficient.  	 */  	if (hx < 0x00100000) { /* zero or subnormal? */ -		if ((hx|low) == 0) +		u.f = x*0x1p54; +		hx = u.i>>32 & 0x7fffffff; +		if (hx == 0)  			return x;  /* cbrt(0) is itself */ -		SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */ -		t *= x; -		GET_HIGH_WORD(high, t); -		INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0); +		hx = hx/3 + B2;  	} else -		INSERT_WORDS(t, sign|(hx/3+B1), 0); +		hx = hx/3 + B1; +	u.i &= 1ULL<<63; +	u.i |= (uint64_t)hx << 32; +	t = u.f;  	/*  	 * New cbrt to 23 bits: @@ -76,7 +74,7 @@ double cbrt(double x)  	 * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this  	 * gives us bounds for r = t**3/x.  	 * -	 * Try to optimize for parallel evaluation as in k_tanf.c. +	 * Try to optimize for parallel evaluation as in __tanf.c.  	 */  	r = (t*t)*(t/x);  	t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4)); @@ -91,9 +89,9 @@ double cbrt(double x)  	 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps  	 * before the final error is larger than 0.667 ulps.  	 */ -	u.value = t; -	u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL; -	t = u.value; +	u.f = t; +	u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL; +	t = u.f;  	/* one step Newton iteration to 53 bits with error < 0.667 ulps */  	s = t*t;         /* t*t is exact */ diff --git a/src/math/cbrtf.c b/src/math/cbrtf.c index 4a984b10..89c2c865 100644 --- a/src/math/cbrtf.c +++ b/src/math/cbrtf.c @@ -17,7 +17,8 @@   * Return cube root of x   */ -#include "libm.h" +#include <math.h> +#include <stdint.h>  static const unsigned  B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */ @@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */  float cbrtf(float x)  { -	double r,T; -	float t; -	int32_t hx; -	uint32_t sign; -	uint32_t high; +	double_t r,T; +	union {float f; uint32_t i;} u = {x}; +	uint32_t hx = u.i & 0x7fffffff; -	GET_FLOAT_WORD(hx, x); -	sign = hx & 0x80000000; -	hx ^= sign;  	if (hx >= 0x7f800000)  /* cbrt(NaN,INF) is itself */  		return x + x; @@ -41,28 +37,29 @@ float cbrtf(float x)  	if (hx < 0x00800000) {  /* zero or subnormal? */  		if (hx == 0)  			return x;  /* cbrt(+-0) is itself */ -		SET_FLOAT_WORD(t, 0x4b800000);  /* set t = 2**24 */ -		t *= x; -		GET_FLOAT_WORD(high, t); -		SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2)); +		u.f = x*0x1p24f; +		hx = u.i & 0x7fffffff; +		hx = hx/3 + B2;  	} else -		SET_FLOAT_WORD(t, sign|(hx/3+B1)); +		hx = hx/3 + B1; +	u.i &= 0x80000000; +	u.i |= hx;  	/*  	 * First step Newton iteration (solving t*t-x/t == 0) to 16 bits.  In  	 * double precision so that its terms can be arranged for efficiency  	 * without causing overflow or underflow.  	 */ -	T = t; +	T = u.f;  	r = T*T*T; -	T = T*((double)x+x+r)/(x+r+r); +	T = T*((double_t)x+x+r)/(x+r+r);  	/*  	 * Second step Newton iteration to 47 bits.  In double precision for  	 * efficiency and accuracy.  	 */  	r = T*T*T; -	T = T*((double)x+x+r)/(x+r+r); +	T = T*((double_t)x+x+r)/(x+r+r);  	/* rounding to 24 bits is perfect in round-to-nearest mode */  	return T; diff --git a/src/math/cbrtl.c b/src/math/cbrtl.c index 0af65ef5..ceff9136 100644 --- a/src/math/cbrtl.c +++ b/src/math/cbrtl.c @@ -23,58 +23,50 @@ long double cbrtl(long double x)  	return cbrt(x);  }  #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 - -#define BIAS (LDBL_MAX_EXP - 1)  static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */  long double cbrtl(long double x)  { -	union IEEEl2bits u, v; +	union ldshape u = {x}, v; +	union {float f; uint32_t i;} uft;  	long double r, s, t, w; -	double dr, dt, dx; -	float ft, fx; -	uint32_t hx; -	uint16_t expsign; -	int k; - -	u.e = x; -	expsign = u.xbits.expsign; -	k = expsign & 0x7fff; +	double_t dr, dt, dx; +	float_t ft; +	int e = u.i.se & 0x7fff; +	int sign = u.i.se & 0x8000;  	/*  	 * If x = +-Inf, then cbrt(x) = +-Inf.  	 * If x = NaN, then cbrt(x) = NaN.  	 */ -	if (k == BIAS + LDBL_MAX_EXP) +	if (e == 0x7fff)  		return x + x; - -	if (k == 0) { +	if (e == 0) { +		/* Adjust subnormal numbers. */ +		u.f *= 0x1p120; +		e = u.i.se & 0x7fff;  		/* If x = +-0, then cbrt(x) = +-0. */ -		if ((u.bits.manh | u.bits.manl) == 0) +		if (e == 0)  			return x; -		/* Adjust subnormal numbers. */ -		u.e *= 0x1.0p514; -		k = u.bits.exp; -		k -= BIAS + 514; -	} else -		k -= BIAS; -	u.xbits.expsign = BIAS; -	v.e = 1; - -	x = u.e; -	switch (k % 3) { +		e -= 120; +	} +	e -= 0x3fff; +	u.i.se = 0x3fff; +	x = u.f; +	switch (e % 3) {  	case 1:  	case -2: -		x = 2*x; -		k--; +		x *= 2; +		e--;  		break;  	case 2:  	case -1: -		x = 4*x; -		k -= 2; +		x *= 4; +		e -= 2;  		break;  	} -	v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3); +	v.f = 1.0; +	v.i.se = sign | (0x3fff + e/3);  	/*  	 * The following is the guts of s_cbrtf, with the handling of @@ -83,9 +75,9 @@ long double cbrtl(long double x)  	 */  	/* ~5-bit estimate: */ -	fx = x; -	GET_FLOAT_WORD(hx, fx); -	SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1)); +	uft.f = x; +	uft.i = (uft.i & 0x7fffffff)/3 + B1; +	ft = uft.f;  	/* ~16-bit estimate: */  	dx = x; @@ -126,7 +118,7 @@ long double cbrtl(long double x)  	r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */  	t = t+t*r;       /* error <= 0.5 + 0.5/3 + epsilon */ -	t *= v.e; +	t *= v.f;  	return t;  }  #endif | 
