diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-11-24 01:06:38 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-11-24 01:06:38 +0000 | 
| commit | 3fdf94ec5101fab63d0e8196d3e557641a0e19e2 (patch) | |
| tree | a8684dad2f216505e97263187b768013648146d0 | |
| parent | 10c8b7148b918938d8e681c5801b913dd56cb7e4 (diff) | |
| download | musl-3fdf94ec5101fab63d0e8196d3e557641a0e19e2.tar.gz | |
math: clean up __rem_pio2
- remove the HAVE_EFFICIENT_IRINT case: fn is an exact integer, so
  it can be converted to int32_t a bit more efficiently than with a
  cast (the rounding mode change can be avoided), but musl does not
  support this case on any arch.
- __rem_pio2: use double_t where possible
- __rem_pio2f: use less assignments to avoid stores on i386
- use unsigned int bit manipulation (and union instead of macros)
- use hexfloat literals instead of named constants
| -rw-r--r-- | src/math/__rem_pio2.c | 72 | ||||
| -rw-r--r-- | src/math/__rem_pio2_large.c | 18 | ||||
| -rw-r--r-- | src/math/__rem_pio2f.c | 34 | 
3 files changed, 53 insertions, 71 deletions
| diff --git a/src/math/__rem_pio2.c b/src/math/__rem_pio2.c index 9305be5f..5fafc4a8 100644 --- a/src/math/__rem_pio2.c +++ b/src/math/__rem_pio2.c @@ -29,7 +29,6 @@   * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)   */  static const double -two24   = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */  invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */  pio2_1  = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */  pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ @@ -41,18 +40,19 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */  /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */  int __rem_pio2(double x, double *y)  { -	double z,w,t,r,fn; -	double tx[3],ty[2]; -	int32_t e0,i,j,nx,n,ix,hx; -	uint32_t low; +	union {double f; uint64_t i;} u = {x}; +	double_t z,w,t,r; +	double tx[3],ty[2],fn; +	uint32_t ix; +	int sign, n, ex, ey, i; -	GET_HIGH_WORD(hx,x); -	ix = hx & 0x7fffffff; +	sign = u.i>>63; +	ix = u.i>>32 & 0x7fffffff;  	if (ix <= 0x400f6a7a) {  /* |x| ~<= 5pi/4 */  		if ((ix & 0xfffff) == 0x921fb)  /* |x| ~= pi/2 or 2pi/2 */  			goto medium;  /* cancellation -- use medium case */  		if (ix <= 0x4002d97c) {  /* |x| ~<= 3pi/4 */ -			if (hx > 0) { +			if (!sign) {  				z = x - pio2_1;  /* one round good to 85 bits */  				y[0] = z - pio2_1t;  				y[1] = (z-y[0]) - pio2_1t; @@ -64,7 +64,7 @@ int __rem_pio2(double x, double *y)  				return -1;  			}  		} else { -			if (hx > 0) { +			if (!sign) {  				z = x - 2*pio2_1;  				y[0] = z - 2*pio2_1t;  				y[1] = (z-y[0]) - 2*pio2_1t; @@ -81,7 +81,7 @@ int __rem_pio2(double x, double *y)  		if (ix <= 0x4015fdbc) {  /* |x| ~<= 7pi/4 */  			if (ix == 0x4012d97c)  /* |x| ~= 3pi/2 */  				goto medium; -			if (hx > 0) { +			if (!sign) {  				z = x - 3*pio2_1;  				y[0] = z - 3*pio2_1t;  				y[1] = (z-y[0]) - 3*pio2_1t; @@ -95,7 +95,7 @@ int __rem_pio2(double x, double *y)  		} else {  			if (ix == 0x401921fb)  /* |x| ~= 4pi/2 */  				goto medium; -			if (hx > 0) { +			if (!sign) {  				z = x - 4*pio2_1;  				y[0] = z - 4*pio2_1t;  				y[1] = (z-y[0]) - 4*pio2_1t; @@ -109,32 +109,26 @@ int __rem_pio2(double x, double *y)  		}  	}  	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */ -		uint32_t high;  medium: -		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */ +		/* rint(x/(pi/2)), Assume round-to-nearest. */  		fn = x*invpio2 + 0x1.8p52;  		fn = fn - 0x1.8p52; -// FIXME -#ifdef HAVE_EFFICIENT_IRINT -		n = irint(fn); -#else  		n = (int32_t)fn; -#endif  		r = x - fn*pio2_1;  		w = fn*pio2_1t;  /* 1st round, good to 85 bits */ -		j = ix>>20;  		y[0] = r - w; -		GET_HIGH_WORD(high,y[0]); -		i = j - ((high>>20)&0x7ff); -		if (i > 16) {  /* 2nd round, good to 118 bits */ +		u.f = y[0]; +		ey = u.i>>52 & 0x7ff; +		ex = ix>>20; +		if (ex - ey > 16) { /* 2nd round, good to 118 bits */  			t = r;  			w = fn*pio2_2;  			r = t - w;  			w = fn*pio2_2t - ((t-r)-w);  			y[0] = r - w; -			GET_HIGH_WORD(high,y[0]); -			i = j - ((high>>20)&0x7ff); -			if (i > 49) {  /* 3rd round, good to 151 bits, covers all cases */ +			u.f = y[0]; +			ey = u.i>>52 & 0x7ff; +			if (ex - ey > 49) {  /* 3rd round, good to 151 bits, covers all cases */  				t = r;  				w = fn*pio2_3;  				r = t - w; @@ -142,7 +136,7 @@ medium:  				y[0] = r - w;  			}  		} -		y[1] = (r-y[0]) - w; +		y[1] = (r - y[0]) - w;  		return n;  	}  	/* @@ -152,19 +146,21 @@ medium:  		y[0] = y[1] = x - x;  		return 0;  	} -	/* set z = scalbn(|x|,ilogb(x)-23) */ -	GET_LOW_WORD(low,x); -	e0 = (ix>>20) - 1046;  /* e0 = ilogb(z)-23; */ -	INSERT_WORDS(z, ix - ((int32_t)(e0<<20)), low); -	for (i=0; i<2; i++) { -		tx[i] = (double)((int32_t)(z)); -		z = (z-tx[i])*two24; +	/* set z = scalbn(|x|,-ilogb(x)+23) */ +	u.f = x; +	u.i &= (uint64_t)-1>>12; +	u.i |= (uint64_t)(0x3ff + 23)<<52; +	z = u.f; +	for (i=0; i < 2; i++) { +		tx[i] = (double)(int32_t)z; +		z     = (z-tx[i])*0x1p24;  	} -	tx[2] = z; -	nx = 3; -	while (tx[nx-1] == 0.0) nx--;  /* skip zero term */ -	n = __rem_pio2_large(tx,ty,e0,nx,1); -	if (hx < 0) { +	tx[i] = z; +	/* skip zero terms, first term is non-zero */ +	while (tx[i] == 0.0) +		i--; +	n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1); +	if (sign) {  		y[0] = -ty[0];  		y[1] = -ty[1];  		return -n; diff --git a/src/math/__rem_pio2_large.c b/src/math/__rem_pio2_large.c index bb2dc43f..958f28c2 100644 --- a/src/math/__rem_pio2_large.c +++ b/src/math/__rem_pio2_large.c @@ -270,10 +270,6 @@ static const double PIo2[] = {    2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */  }; -static const double -two24  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ -twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ -  int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)  {  	int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; @@ -304,8 +300,8 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)  recompute:  	/* distill q[] into iq[] reversingly */  	for (i=0,j=jz,z=q[jz]; j>0; i++,j--) { -		fw    = (double)((int32_t)(twon24* z)); -		iq[i] = (int32_t)(z-two24*fw); +		fw    = (double)(int32_t)(0x1p-24*z); +		iq[i] = (int32_t)(z - 0x1p24*fw);  		z     = q[j-1]+fw;  	} @@ -330,7 +326,7 @@ recompute:  			if (carry == 0) {  				if (j != 0) {  					carry = 1; -					iq[i] = 0x1000000- j; +					iq[i] = 0x1000000 - j;  				}  			} else  				iq[i] = 0xffffff - j; @@ -378,9 +374,9 @@ recompute:  		}  	} else { /* break z into 24-bit if necessary */  		z = scalbn(z,-q0); -		if (z >= two24) { -			fw = (double)((int32_t)(twon24*z)); -			iq[jz] = (int32_t)(z-two24*fw); +		if (z >= 0x1p24) { +			fw = (double)(int32_t)(0x1p-24*z); +			iq[jz] = (int32_t)(z - 0x1p24*fw);  			jz += 1;  			q0 += 24;  			iq[jz] = (int32_t)fw; @@ -392,7 +388,7 @@ recompute:  	fw = scalbn(1.0,q0);  	for (i=jz; i>=0; i--) {  		q[i] = fw*(double)iq[i]; -		fw *= twon24; +		fw *= 0x1p-24;  	}  	/* compute PIo2[0,...,jp]*q[jz,...,0] */ diff --git a/src/math/__rem_pio2f.c b/src/math/__rem_pio2f.c index 5bdeb529..838e1fca 100644 --- a/src/math/__rem_pio2f.c +++ b/src/math/__rem_pio2f.c @@ -34,42 +34,32 @@ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */  int __rem_pio2f(float x, double *y)  { -	double w,r,fn; -	double tx[1],ty[1]; -	float z; -	int32_t e0,n,ix,hx; +	union {float f; uint32_t i;} u = {x}; +	double tx[1],ty[1],fn; +	uint32_t ix; +	int n, sign, e0; -	GET_FLOAT_WORD(hx, x); -	ix = hx & 0x7fffffff; +	ix = u.i & 0x7fffffff;  	/* 25+53 bit pi is good enough for medium size */  	if (ix < 0x4dc90fdb) {  /* |x| ~< 2^28*(pi/2), medium size */  		/* Use a specialized rint() to get fn.  Assume round-to-nearest. */  		fn = x*invpio2 + 0x1.8p52;  		fn = fn - 0x1.8p52; -// FIXME -#ifdef HAVE_EFFICIENT_IRINT -		n  = irint(fn); -#else  		n  = (int32_t)fn; -#endif -		r  = x - fn*pio2_1; -		w  = fn*pio2_1t; -		*y = r - w; +		*y = x - fn*pio2_1 - fn*pio2_1t;  		return n;  	} -	/* -	 * all other (large) arguments -	 */  	if(ix>=0x7f800000) {  /* x is inf or NaN */  		*y = x-x;  		return 0;  	} -	/* set z = scalbn(|x|,ilogb(|x|)-23) */ -	e0 = (ix>>23) - 150;  /* e0 = ilogb(|x|)-23; */ -	SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23))); -	tx[0] = z; +	/* scale x into [2^23, 2^24-1] */ +	sign = u.i>>31; +	e0 = (ix>>23) - (0x7f+23);  /* e0 = ilogb(|x|)-23, positive */ +	u.i = ix - (e0<<23); +	tx[0] = u.f;  	n  =  __rem_pio2_large(tx,ty,e0,1,0); -	if (hx < 0) { +	if (sign) {  		*y = -ty[0];  		return -n;  	} | 
