diff options
| -rw-r--r-- | src/math/__log1p.h | 94 | ||||
| -rw-r--r-- | src/math/__log1pf.h | 35 | ||||
| -rw-r--r-- | src/math/log.c | 84 | ||||
| -rw-r--r-- | src/math/log10.c | 99 | ||||
| -rw-r--r-- | src/math/log10f.c | 84 | ||||
| -rw-r--r-- | src/math/log10l.c | 7 | ||||
| -rw-r--r-- | src/math/log1p.c | 148 | ||||
| -rw-r--r-- | src/math/log1pf.c | 125 | ||||
| -rw-r--r-- | src/math/log1pl.c | 2 | ||||
| -rw-r--r-- | src/math/log2.c | 91 | ||||
| -rw-r--r-- | src/math/log2f.c | 95 | ||||
| -rw-r--r-- | src/math/log2l.c | 2 | ||||
| -rw-r--r-- | src/math/logf.c | 78 | ||||
| -rw-r--r-- | src/math/logl.c | 8 | 
14 files changed, 369 insertions, 583 deletions
| diff --git a/src/math/__log1p.h b/src/math/__log1p.h deleted file mode 100644 index 57187115..00000000 --- a/src/math/__log1p.h +++ /dev/null @@ -1,94 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/k_log.h */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * __log1p(f): - * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)]. - * - * The following describes the overall strategy for computing - * logarithms in base e.  The argument reduction and adding the final - * term of the polynomial are done by the caller for increased accuracy - * when different bases are used. - * - * Method : - *   1. Argument Reduction: find k and f such that - *                      x = 2^k * (1+f), - *         where  sqrt(2)/2 < 1+f < sqrt(2) . - * - *   2. Approximation of log(1+f). - *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) - *               = 2s + 2/3 s**3 + 2/5 s**5 + ....., - *               = 2s + s*R - *      We use a special Reme algorithm on [0,0.1716] to generate - *      a polynomial of degree 14 to approximate R The maximum error - *      of this polynomial approximation is bounded by 2**-58.45. In - *      other words, - *                      2      4      6      8      10      12      14 - *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s - *      (the values of Lg1 to Lg7 are listed in the program) - *      and - *          |      2          14          |     -58.45 - *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2 - *          |                             | - *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. - *      In order to guarantee error in log below 1ulp, we compute log - *      by - *              log(1+f) = f - s*(f - R)        (if f is not too large) - *              log(1+f) = f - (hfsq - s*(hfsq+R)).     (better accuracy) - * - *      3. Finally,  log(x) = k*ln2 + log(1+f). - *                          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) - *         Here ln2 is split into two floating point number: - *                      ln2_hi + ln2_lo, - *         where n*ln2_hi is always exact for |n| < 2000. - * - * Special cases: - *      log(x) is NaN with signal if x < 0 (including -INF) ; - *      log(+INF) is +INF; log(0) is -INF with signal; - *      log(NaN) is that NaN with no signal. - * - * Accuracy: - *      according to an error analysis, the error is always less than - *      1 ulp (unit in the last place). - * - * Constants: - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. - */ - -static const double -Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ -Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ -Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ -Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ -Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ -Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ -Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ - -/* - * We always inline __log1p(), since doing so produces a - * substantial performance improvement (~40% on amd64). - */ -static inline double __log1p(double f) -{ -	double_t hfsq,s,z,R,w,t1,t2; - -	s = f/(2.0+f); -	z = s*s; -	w = z*z; -	t1= w*(Lg2+w*(Lg4+w*Lg6)); -	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); -	R = t2+t1; -	hfsq = 0.5*f*f; -	return s*(hfsq+R); -} diff --git a/src/math/__log1pf.h b/src/math/__log1pf.h deleted file mode 100644 index f2fbef29..00000000 --- a/src/math/__log1pf.h +++ /dev/null @@ -1,35 +0,0 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/k_logf.h */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * See comments in __log1p.h. - */ - -/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ -static const float -Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ -Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ -Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ -Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ - -static inline float __log1pf(float f) -{ -	float_t hfsq,s,z,R,w,t1,t2; - -	s = f/(2.0f + f); -	z = s*s; -	w = z*z; -	t1 = w*(Lg2+w*Lg4); -	t2 = z*(Lg1+w*Lg3); -	R = t2+t1; -	hfsq = 0.5f * f * f; -	return s*(hfsq+R); -} diff --git a/src/math/log.c b/src/math/log.c index 98051205..e61e113d 100644 --- a/src/math/log.c +++ b/src/math/log.c @@ -10,7 +10,7 @@   * ====================================================   */  /* log(x) - * Return the logrithm of x + * Return the logarithm of x   *   * Method :   *   1. Argument Reduction: find k and f such that @@ -60,12 +60,12 @@   * to produce the hexadecimal values shown.   */ -#include "libm.h" +#include <math.h> +#include <stdint.h>  static const double  ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */  ln2_lo = 1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */ -two54  = 1.80143985094819840000e+16,  /* 43500000 00000000 */  Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */  Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */  Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */ @@ -76,63 +76,43 @@ Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */  double log(double x)  { -	double hfsq,f,s,z,R,w,t1,t2,dk; -	int32_t k,hx,i,j; -	uint32_t lx; - -	EXTRACT_WORDS(hx, lx, x); +	union {double f; uint64_t i;} u = {x}; +	double_t hfsq,f,s,z,R,w,t1,t2,dk; +	uint32_t hx; +	int k; +	hx = u.i>>32;  	k = 0; -	if (hx < 0x00100000) {  /* x < 2**-1022  */ -		if (((hx&0x7fffffff)|lx) == 0) -			return -two54/0.0;  /* log(+-0)=-inf */ -		if (hx < 0) -			return (x-x)/0.0;   /* log(-#) = NaN */ -		/* subnormal number, scale up x */ +	if (hx < 0x00100000 || hx>>31) { +		if (u.i<<1 == 0) +			return -1/(x*x);  /* log(+-0)=-inf */ +		if (hx>>31) +			return (x-x)/0.0; /* log(-#) = NaN */ +		/* subnormal number, scale x up */  		k -= 54; -		x *= two54; -		GET_HIGH_WORD(hx,x); -	} -	if (hx >= 0x7ff00000) -		return x+x; -	k += (hx>>20) - 1023; -	hx &= 0x000fffff; -	i = (hx+0x95f64)&0x100000; -	SET_HIGH_WORD(x, hx|(i^0x3ff00000));  /* normalize x or x/2 */ -	k += i>>20; +		x *= 0x1p54; +		u.f = x; +		hx = u.i>>32; +	} else if (hx >= 0x7ff00000) { +		return x; +	} else if (hx == 0x3ff00000 && u.i<<32 == 0) +		return 0; + +	/* reduce x into [sqrt(2)/2, sqrt(2)] */ +	hx += 0x3ff00000 - 0x3fe6a09e; +	k += (int)(hx>>20) - 0x3ff; +	hx = (hx&0x000fffff) + 0x3fe6a09e; +	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); +	x = u.f; +  	f = x - 1.0; -	if ((0x000fffff&(2+hx)) < 3) {  /* -2**-20 <= f < 2**-20 */ -		if (f == 0.0) { -			if (k == 0) { -				return 0.0; -			} -			dk = (double)k; -			return dk*ln2_hi + dk*ln2_lo; -		} -		R = f*f*(0.5-0.33333333333333333*f); -		if (k == 0) -			return f - R; -		dk = (double)k; -		return dk*ln2_hi - ((R-dk*ln2_lo)-f); -	} +	hfsq = 0.5*f*f;  	s = f/(2.0+f); -	dk = (double)k;  	z = s*s; -	i = hx - 0x6147a;  	w = z*z; -	j = 0x6b851 - hx;  	t1 = w*(Lg2+w*(Lg4+w*Lg6));  	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); -	i |= j;  	R = t2 + t1; -	if (i > 0) { -		hfsq = 0.5*f*f; -		if (k == 0) -			return f - (hfsq-s*(hfsq+R)); -		return dk*ln2_hi - ((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); -	} else { -		if (k == 0) -			return f - s*(f-R); -		return dk*ln2_hi - ((s*(f-R)-dk*ln2_lo)-f); -	} +	dk = k; +	return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;  } diff --git a/src/math/log10.c b/src/math/log10.c index ed65d9be..81026876 100644 --- a/src/math/log10.c +++ b/src/math/log10.c @@ -10,72 +10,91 @@   * ====================================================   */  /* - * Return the base 10 logarithm of x.  See e_log.c and k_log.h for most - * comments. + * Return the base 10 logarithm of x.  See log.c for most comments.   * - *    log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) - * in not-quite-routine extra precision. + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + *    log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)   */ -#include "libm.h" -#include "__log1p.h" +#include <math.h> +#include <stdint.h>  static const double -two54     = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */  ivln10hi  = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */  ivln10lo  = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */  log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ -log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ +log10_2lo = 3.69423907715893078616e-13, /* 0x3D59FEF3, 0x11F12B36 */ +Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */ +Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */ +Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */ +Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */ +Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */ +Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */ +Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */  double log10(double x)  { -	double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2; -	int32_t i,k,hx; -	uint32_t lx; - -	EXTRACT_WORDS(hx, lx, x); +	union {double f; uint64_t i;} u = {x}; +	double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo; +	uint32_t hx; +	int k; +	hx = u.i>>32;  	k = 0; -	if (hx < 0x00100000) {  /* x < 2**-1022  */ -		if (((hx&0x7fffffff)|lx) == 0) -			return -two54/0.0;  /* log(+-0)=-inf */ -		if (hx<0) -			return (x-x)/0.0;   /* log(-#) = NaN */ -		/* subnormal number, scale up x */ +	if (hx < 0x00100000 || hx>>31) { +		if (u.i<<1 == 0) +			return -1/(x*x);  /* log(+-0)=-inf */ +		if (hx>>31) +			return (x-x)/0.0; /* log(-#) = NaN */ +		/* subnormal number, scale x up */  		k -= 54; -		x *= two54; -		GET_HIGH_WORD(hx, x); -	} -	if (hx >= 0x7ff00000) -		return x+x; -	if (hx == 0x3ff00000 && lx == 0) -		return 0.0;  /* log(1) = +0 */ -	k += (hx>>20) - 1023; -	hx &= 0x000fffff; -	i = (hx+0x95f64)&0x100000; -	SET_HIGH_WORD(x, hx|(i^0x3ff00000));  /* normalize x or x/2 */ -	k += i>>20; -	y = (double)k; +		x *= 0x1p54; +		u.f = x; +		hx = u.i>>32; +	} else if (hx >= 0x7ff00000) { +		return x; +	} else if (hx == 0x3ff00000 && u.i<<32 == 0) +		return 0; + +	/* reduce x into [sqrt(2)/2, sqrt(2)] */ +	hx += 0x3ff00000 - 0x3fe6a09e; +	k += (int)(hx>>20) - 0x3ff; +	hx = (hx&0x000fffff) + 0x3fe6a09e; +	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); +	x = u.f; +  	f = x - 1.0;  	hfsq = 0.5*f*f; -	r = __log1p(f); +	s = f/(2.0+f); +	z = s*s; +	w = z*z; +	t1 = w*(Lg2+w*(Lg4+w*Lg6)); +	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); +	R = t2 + t1;  	/* See log2.c for details. */ +	/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */  	hi = f - hfsq; -	SET_LOW_WORD(hi, 0); -	lo = (f - hi) - hfsq + r; +	u.f = hi; +	u.i &= (uint64_t)-1<<32; +	hi = u.f; +	lo = f - hi - hfsq + s*(hfsq+R); + +	/* val_hi+val_lo ~ log10(1+f) + k*log10(2) */  	val_hi = hi*ivln10hi; -	y2 = y*log10_2hi; -	val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi; +	dk = k; +	y = dk*log10_2hi; +	val_lo = dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;  	/* -	 * Extra precision in for adding y*log10_2hi is not strictly needed +	 * Extra precision in for adding y is not strictly needed  	 * since there is no very large cancellation near x = sqrt(2) or  	 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs  	 * with some parallelism and it reduces the error for many args.  	 */ -	w = y2 + val_hi; -	val_lo += (y2 - w) + val_hi; +	w = y + val_hi; +	val_lo += (y - w) + val_hi;  	val_hi = w;  	return val_lo + val_hi; diff --git a/src/math/log10f.c b/src/math/log10f.c index e10749b5..9ca2f017 100644 --- a/src/math/log10f.c +++ b/src/math/log10f.c @@ -13,57 +13,65 @@   * See comments in log10.c.   */ -#include "libm.h" -#include "__log1pf.h" +#include <math.h> +#include <stdint.h>  static const float -two25     =  3.3554432000e+07, /* 0x4c000000 */  ivln10hi  =  4.3432617188e-01, /* 0x3ede6000 */  ivln10lo  = -3.1689971365e-05, /* 0xb804ead9 */  log10_2hi =  3.0102920532e-01, /* 0x3e9a2080 */ -log10_2lo =  7.9034151668e-07; /* 0x355427db */ +log10_2lo =  7.9034151668e-07, /* 0x355427db */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */  float log10f(float x)  { -	float f,hfsq,hi,lo,r,y; -	int32_t i,k,hx; - -	GET_FLOAT_WORD(hx, x); +	union {float f; uint32_t i;} u = {x}; +	float_t hfsq,f,s,z,R,w,t1,t2,dk,hi,lo; +	uint32_t ix; +	int k; +	ix = u.i;  	k = 0; -	if (hx < 0x00800000) {  /* x < 2**-126  */ -		if ((hx&0x7fffffff) == 0) -			return -two25/0.0f;  /* log(+-0)=-inf */ -		if (hx < 0) -			return (x-x)/0.0f;   /* log(-#) = NaN */ +	if (ix < 0x00800000 || ix>>31) {  /* x < 2**-126  */ +		if (ix<<1 == 0) +			return -1/(x*x);  /* log(+-0)=-inf */ +		if (ix>>31) +			return (x-x)/0.0f; /* log(-#) = NaN */  		/* subnormal number, scale up x */  		k -= 25; -		x *= two25; -		GET_FLOAT_WORD(hx, x); -	} -	if (hx >= 0x7f800000) -		return x+x; -	if (hx == 0x3f800000) -		return 0.0f;  /* log(1) = +0 */ -	k += (hx>>23) - 127; -	hx &= 0x007fffff; -	i = (hx+(0x4afb0d))&0x800000; -	SET_FLOAT_WORD(x, hx|(i^0x3f800000));  /* normalize x or x/2 */ -	k += i>>23; -	y = (float)k; +		x *= 0x1p25f; +		u.f = x; +		ix = u.i; +	} else if (ix >= 0x7f800000) { +		return x; +	} else if (ix == 0x3f800000) +		return 0; + +	/* reduce x into [sqrt(2)/2, sqrt(2)] */ +	ix += 0x3f800000 - 0x3f3504f3; +	k += (int)(ix>>23) - 0x7f; +	ix = (ix&0x007fffff) + 0x3f3504f3; +	u.i = ix; +	x = u.f; +  	f = x - 1.0f; -	hfsq = 0.5f * f * f; -	r = __log1pf(f); +	s = f/(2.0f + f); +	z = s*s; +	w = z*z; +	t1= w*(Lg2+w*Lg4); +	t2= z*(Lg1+w*Lg3); +	R = t2 + t1; +	hfsq = 0.5f*f*f; -// FIXME -//      /* See log2f.c and log2.c for details. */ -//      if (sizeof(float_t) > sizeof(float)) -//              return (r - hfsq + f) * ((float_t)ivln10lo + ivln10hi) + -//                  y * ((float_t)log10_2lo + log10_2hi);  	hi = f - hfsq; -	GET_FLOAT_WORD(hx, hi); -	SET_FLOAT_WORD(hi, hx&0xfffff000); -	lo = (f - hi) - hfsq + r; -	return y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + -	        hi*ivln10hi + y*log10_2hi; +	u.f = hi; +	u.i &= 0xfffff000; +	hi = u.f; +	lo = f - hi - hfsq + s*(hfsq+R); +	dk = k; +	return dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + hi*ivln10hi + dk*log10_2hi;  } diff --git a/src/math/log10l.c b/src/math/log10l.c index f0eeeafb..c7aacf90 100644 --- a/src/math/log10l.c +++ b/src/math/log10l.c @@ -117,16 +117,15 @@ static const long double S[4] = {  long double log10l(long double x)  { -	long double y; -	volatile long double z; +	long double y, z;  	int e;  	if (isnan(x))  		return x;  	if(x <= 0.0) {  		if(x == 0.0) -			return -1.0 / (x - x); -		return (x - x) / (x - x); +			return -1.0 / (x*x); +		return (x - x) / 0.0;  	}  	if (x == INFINITY)  		return INFINITY; diff --git a/src/math/log1p.c b/src/math/log1p.c index a71ac423..00971349 100644 --- a/src/math/log1p.c +++ b/src/math/log1p.c @@ -10,6 +10,7 @@   * ====================================================   */  /* double log1p(double x) + * Return the natural logarithm of 1+x.   *   * Method :   *   1. Argument Reduction: find k and f such that @@ -23,31 +24,9 @@   *      and add back the correction term c/u.   *      (Note: when x > 2**53, one can simply return log(x))   * - *   2. Approximation of log1p(f). - *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) - *               = 2s + 2/3 s**3 + 2/5 s**5 + ....., - *               = 2s + s*R - *      We use a special Reme algorithm on [0,0.1716] to generate - *      a polynomial of degree 14 to approximate R The maximum error - *      of this polynomial approximation is bounded by 2**-58.45. In - *      other words, - *                      2      4      6      8      10      12      14 - *          R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s - *      (the values of Lp1 to Lp7 are listed in the program) - *      and - *          |      2          14          |     -58.45 - *          | Lp1*s +...+Lp7*s    -  R(z) | <= 2 - *          |                             | - *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. - *      In order to guarantee error in log below 1ulp, we compute log - *      by - *              log1p(f) = f - (hfsq - s*(hfsq+R)). + *   2. Approximation of log(1+f): See log.c   * - *      3. Finally, log1p(x) = k*ln2 + log1p(f). - *                           = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) - *         Here ln2 is split into two floating point number: - *                      ln2_hi + ln2_lo, - *         where n*ln2_hi is always exact for |n| < 2000. + *   3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c   *   * Special cases:   *      log1p(x) is NaN with signal if x < -1 (including -INF) ; @@ -79,94 +58,65 @@  static const double  ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */  ln2_lo = 1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */ -two54  = 1.80143985094819840000e+16,  /* 43500000 00000000 */ -Lp1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */ -Lp2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */ -Lp3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */ -Lp4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */ -Lp5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */ -Lp6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */ -Lp7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */ +Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */ +Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */ +Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */ +Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */ +Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */ +Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */ +Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */  double log1p(double x)  { -	double hfsq,f,c,s,z,R,u; -	int32_t k,hx,hu,ax; - -	GET_HIGH_WORD(hx, x); -	ax = hx & 0x7fffffff; +	union {double f; uint64_t i;} u = {x}; +	double_t hfsq,f,c,s,z,R,w,t1,t2,dk; +	uint32_t hx,hu; +	int k; +	hx = u.i>>32;  	k = 1; -	if (hx < 0x3FDA827A) {  /* 1+x < sqrt(2)+ */ -		if (ax >= 0x3ff00000) {  /* x <= -1.0 */ -			if (x == -1.0) -				return -two54/0.0; /* log1p(-1)=+inf */ -			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */ +	if (hx < 0x3fda827a || hx>>31) {  /* 1+x < sqrt(2)+ */ +		if (hx >= 0xbff00000) {  /* x <= -1.0 */ +			if (x == -1) +				return x/0.0; /* log1p(-1) = -inf */ +			return (x-x)/0.0;     /* log1p(x<-1) = NaN */  		} -		if (ax < 0x3e200000) {   /* |x| < 2**-29 */ -			/* if 0x1p-1022 <= |x| < 0x1p-54, avoid raising underflow */ -			if (ax < 0x3c900000 && ax >= 0x00100000) -				return x; -#if FLT_EVAL_METHOD != 0 -			FORCE_EVAL((float)x); -#endif -			return x - x*x*0.5; +		if (hx<<1 < 0x3ca00000<<1) {  /* |x| < 2**-53 */ +			/* underflow if subnormal */ +			if ((hx&0x7ff00000) == 0) +				FORCE_EVAL((float)x); +			return x;  		} -		if (hx > 0 || hx <= (int32_t)0xbfd2bec4) {  /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ +		if (hx <= 0xbfd2bec4) {  /* sqrt(2)/2- <= 1+x < sqrt(2)+ */  			k = 0; +			c = 0;  			f = x; -			hu = 1;  		} -	} -	if (hx >= 0x7ff00000) -		return x+x; -	if (k != 0) { -		if (hx < 0x43400000) { -			u = 1 + x; -			GET_HIGH_WORD(hu, u); -			k = (hu>>20) - 1023; -			c = k > 0 ? 1.0-(u-x) : x-(u-1.0); /* correction term */ -			c /= u; -		} else { -			u = x; -			GET_HIGH_WORD(hu,u); -			k = (hu>>20) - 1023; +	} else if (hx >= 0x7ff00000) +		return x; +	if (k) { +		u.f = 1 + x; +		hu = u.i>>32; +		hu += 0x3ff00000 - 0x3fe6a09e; +		k = (int)(hu>>20) - 0x3ff; +		/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ +		if (k < 54) { +			c = k >= 2 ? 1-(u.f-x) : x-(u.f-1); +			c /= u.f; +		} else  			c = 0; -		} -		hu &= 0x000fffff; -		/* -		 * The approximation to sqrt(2) used in thresholds is not -		 * critical.  However, the ones used above must give less -		 * strict bounds than the one here so that the k==0 case is -		 * never reached from here, since here we have committed to -		 * using the correction term but don't use it if k==0. -		 */ -		if (hu < 0x6a09e) {  /* u ~< sqrt(2) */ -			SET_HIGH_WORD(u, hu|0x3ff00000); /* normalize u */ -		} else { -			k += 1; -			SET_HIGH_WORD(u, hu|0x3fe00000); /* normalize u/2 */ -			hu = (0x00100000-hu)>>2; -		} -		f = u - 1.0; +		/* reduce u into [sqrt(2)/2, sqrt(2)] */ +		hu = (hu&0x000fffff) + 0x3fe6a09e; +		u.i = (uint64_t)hu<<32 | (u.i&0xffffffff); +		f = u.f - 1;  	}  	hfsq = 0.5*f*f; -	if (hu == 0) {   /* |f| < 2**-20 */ -		if (f == 0.0) { -			if(k == 0) -				return 0.0; -			c += k*ln2_lo; -			return k*ln2_hi + c; -		} -		R = hfsq*(1.0 - 0.66666666666666666*f); -		if (k == 0) -			return f - R; -		return k*ln2_hi - ((R-(k*ln2_lo+c))-f); -	}  	s = f/(2.0+f);  	z = s*s; -	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); -	if (k == 0) -		return f - (hfsq-s*(hfsq+R)); -	return k*ln2_hi - ((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); +	w = z*z; +	t1 = w*(Lg2+w*(Lg4+w*Lg6)); +	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); +	R = t2 + t1; +	dk = k; +	return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;  } diff --git a/src/math/log1pf.c b/src/math/log1pf.c index e6940d29..23985c35 100644 --- a/src/math/log1pf.c +++ b/src/math/log1pf.c @@ -1,8 +1,5 @@  /* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */  /* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/*   * ====================================================   * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.   * @@ -18,95 +15,63 @@  static const float  ln2_hi = 6.9313812256e-01, /* 0x3f317180 */  ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ -two25  = 3.355443200e+07,  /* 0x4c000000 */ -Lp1 = 6.6666668653e-01, /* 3F2AAAAB */ -Lp2 = 4.0000000596e-01, /* 3ECCCCCD */ -Lp3 = 2.8571429849e-01, /* 3E924925 */ -Lp4 = 2.2222198546e-01, /* 3E638E29 */ -Lp5 = 1.8183572590e-01, /* 3E3A3325 */ -Lp6 = 1.5313838422e-01, /* 3E1CD04F */ -Lp7 = 1.4798198640e-01; /* 3E178897 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */  float log1pf(float x)  { -	float hfsq,f,c,s,z,R,u; -	int32_t k,hx,hu,ax; - -	GET_FLOAT_WORD(hx, x); -	ax = hx & 0x7fffffff; +	union {float f; uint32_t i;} u = {x}; +	float_t hfsq,f,c,s,z,R,w,t1,t2,dk; +	uint32_t ix,iu; +	int k; +	ix = u.i;  	k = 1; -	if (hx < 0x3ed413d0) {  /* 1+x < sqrt(2)+  */ -		if (ax >= 0x3f800000) {  /* x <= -1.0 */ -			if (x == -1.0f) -				return -two25/0.0f; /* log1p(-1)=+inf */ -			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */ +	if (ix < 0x3ed413d0 || ix>>31) {  /* 1+x < sqrt(2)+  */ +		if (ix >= 0xbf800000) {  /* x <= -1.0 */ +			if (x == -1) +				return x/0.0f; /* log1p(-1)=+inf */ +			return (x-x)/0.0f;     /* log1p(x<-1)=NaN */  		} -		if (ax < 0x38000000) {   /* |x| < 2**-15 */ -			/* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */ -			if (ax < 0x33800000 && ax >= 0x00800000) -				return x; -#if FLT_EVAL_METHOD != 0 -			FORCE_EVAL(x*x); -#endif -			return x - x*x*0.5f; +		if (ix<<1 < 0x33800000<<1) {   /* |x| < 2**-24 */ +			/* underflow if subnormal */ +			if ((ix&0x7f800000) == 0) +				FORCE_EVAL(x*x); +			return x;  		} -		if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ +		if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */  			k = 0; +			c = 0;  			f = x; -			hu = 1;  		} -	} -	if (hx >= 0x7f800000) -		return x+x; -	if (k != 0) { -		if (hx < 0x5a000000) { -			u = 1 + x; -			GET_FLOAT_WORD(hu, u); -			k = (hu>>23) - 127; -			/* correction term */ -			c = k > 0 ? 1.0f-(u-x) : x-(u-1.0f); -			c /= u; -		} else { -			u = x; -			GET_FLOAT_WORD(hu,u); -			k = (hu>>23) - 127; +	} else if (ix >= 0x7f800000) +		return x; +	if (k) { +		u.f = 1 + x; +		iu = u.i; +		iu += 0x3f800000 - 0x3f3504f3; +		k = (int)(iu>>23) - 0x7f; +		/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ +		if (k < 25) { +			c = k >= 2 ? 1-(u.f-x) : x-(u.f-1); +			c /= u.f; +		} else  			c = 0; -		} -		hu &= 0x007fffff; -		/* -		 * The approximation to sqrt(2) used in thresholds is not -		 * critical.  However, the ones used above must give less -		 * strict bounds than the one here so that the k==0 case is -		 * never reached from here, since here we have committed to -		 * using the correction term but don't use it if k==0. -		 */ -		if (hu < 0x3504f4) {  /* u < sqrt(2) */ -			SET_FLOAT_WORD(u, hu|0x3f800000);  /* normalize u */ -		} else { -			k += 1; -			SET_FLOAT_WORD(u, hu|0x3f000000);  /* normalize u/2 */ -			hu = (0x00800000-hu)>>2; -		} -		f = u - 1.0f; -	} -	hfsq = 0.5f * f * f; -	if (hu == 0) {  /* |f| < 2**-20 */ -		if (f == 0.0f) { -			if (k == 0) -				return 0.0f; -			c += k*ln2_lo; -			return k*ln2_hi+c; -		} -		R = hfsq*(1.0f - 0.66666666666666666f * f); -		if (k == 0) -			return f - R; -		return k*ln2_hi - ((R-(k*ln2_lo+c))-f); +		/* reduce u into [sqrt(2)/2, sqrt(2)] */ +		iu = (iu&0x007fffff) + 0x3f3504f3; +		u.i = iu; +		f = u.f - 1;  	}  	s = f/(2.0f + f);  	z = s*s; -	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); -	if (k == 0) -		return f - (hfsq-s*(hfsq+R)); -	return k*ln2_hi - ((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); +	w = z*z; +	t1= w*(Lg2+w*Lg4); +	t2= z*(Lg1+w*Lg3); +	R = t2 + t1; +	hfsq = 0.5f*f*f; +	dk = k; +	return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;  } diff --git a/src/math/log1pl.c b/src/math/log1pl.c index edb48df1..37da46d2 100644 --- a/src/math/log1pl.c +++ b/src/math/log1pl.c @@ -118,7 +118,7 @@ long double log1pl(long double xm1)  	/* Test for domain errors.  */  	if (x <= 0.0) {  		if (x == 0.0) -			return -1/x; /* -inf with divbyzero */ +			return -1/(x*x); /* -inf with divbyzero */  		return 0/0.0f; /* nan with invalid */  	} diff --git a/src/math/log2.c b/src/math/log2.c index 1974215d..0aafad4b 100644 --- a/src/math/log2.c +++ b/src/math/log2.c @@ -10,55 +10,66 @@   * ====================================================   */  /* - * Return the base 2 logarithm of x.  See log.c and __log1p.h for most - * comments. + * Return the base 2 logarithm of x.  See log.c for most comments.   * - * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, - * then does the combining and scaling steps - *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k - * in not-quite-routine extra precision. + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + *    log2(x) = (f - f*f/2 + r)/log(2) + k   */ -#include "libm.h" -#include "__log1p.h" +#include <math.h> +#include <stdint.h>  static const double -two54   = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */  ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ -ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ +ivln2lo = 1.67517131648865118353e-10, /* 0x3de705fc, 0x2eefa200 */ +Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */ +Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */ +Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */ +Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */ +Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */ +Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */ +Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */  double log2(double x)  { -	double f,hfsq,hi,lo,r,val_hi,val_lo,w,y; -	int32_t i,k,hx; -	uint32_t lx; - -	EXTRACT_WORDS(hx, lx, x); +	union {double f; uint64_t i;} u = {x}; +	double_t hfsq,f,s,z,R,w,t1,t2,y,hi,lo,val_hi,val_lo; +	uint32_t hx; +	int k; +	hx = u.i>>32;  	k = 0; -	if (hx < 0x00100000) {  /* x < 2**-1022  */ -		if (((hx&0x7fffffff)|lx) == 0) -			return -two54/0.0;  /* log(+-0)=-inf */ -		if (hx < 0) -			return (x-x)/0.0;   /* log(-#) = NaN */ -		/* subnormal number, scale up x */ +	if (hx < 0x00100000 || hx>>31) { +		if (u.i<<1 == 0) +			return -1/(x*x);  /* log(+-0)=-inf */ +		if (hx>>31) +			return (x-x)/0.0; /* log(-#) = NaN */ +		/* subnormal number, scale x up */  		k -= 54; -		x *= two54; -		GET_HIGH_WORD(hx, x); -	} -	if (hx >= 0x7ff00000) -		return x+x; -	if (hx == 0x3ff00000 && lx == 0) -		return 0.0;  /* log(1) = +0 */ -	k += (hx>>20) - 1023; -	hx &= 0x000fffff; -	i = (hx+0x95f64) & 0x100000; -	SET_HIGH_WORD(x, hx|(i^0x3ff00000));  /* normalize x or x/2 */ -	k += i>>20; -	y = (double)k; +		x *= 0x1p54; +		u.f = x; +		hx = u.i>>32; +	} else if (hx >= 0x7ff00000) { +		return x; +	} else if (hx == 0x3ff00000 && u.i<<32 == 0) +		return 0; + +	/* reduce x into [sqrt(2)/2, sqrt(2)] */ +	hx += 0x3ff00000 - 0x3fe6a09e; +	k += (int)(hx>>20) - 0x3ff; +	hx = (hx&0x000fffff) + 0x3fe6a09e; +	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); +	x = u.f; +  	f = x - 1.0;  	hfsq = 0.5*f*f; -	r = __log1p(f); +	s = f/(2.0+f); +	z = s*s; +	w = z*z; +	t1 = w*(Lg2+w*(Lg4+w*Lg6)); +	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); +	R = t2 + t1;  	/*  	 * f-hfsq must (for args near 1) be evaluated in extra precision @@ -90,13 +101,19 @@ double log2(double x)  	 * The multi-precision calculations for the multiplications are  	 * routine.  	 */ + +	/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */  	hi = f - hfsq; -	SET_LOW_WORD(hi, 0); -	lo = (f - hi) - hfsq + r; +	u.f = hi; +	u.i &= (uint64_t)-1<<32; +	hi = u.f; +	lo = f - hi - hfsq + s*(hfsq+R); +  	val_hi = hi*ivln2hi;  	val_lo = (lo+hi)*ivln2lo + lo*ivln2hi;  	/* spadd(val_hi, val_lo, y), except for not using double_t: */ +	y = k;  	w = y + val_hi;  	val_lo += (y - w) + val_hi;  	val_hi = w; diff --git a/src/math/log2f.c b/src/math/log2f.c index e0d6a9e4..b3e305fe 100644 --- a/src/math/log2f.c +++ b/src/math/log2f.c @@ -13,67 +13,62 @@   * See comments in log2.c.   */ -#include "libm.h" -#include "__log1pf.h" +#include <math.h> +#include <stdint.h>  static const float -two25   =  3.3554432000e+07, /* 0x4c000000 */  ivln2hi =  1.4428710938e+00, /* 0x3fb8b000 */ -ivln2lo = -1.7605285393e-04; /* 0xb9389ad4 */ +ivln2lo = -1.7605285393e-04, /* 0xb9389ad4 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */  float log2f(float x)  { -	float f,hfsq,hi,lo,r,y; -	int32_t i,k,hx; - -	GET_FLOAT_WORD(hx, x); +	union {float f; uint32_t i;} u = {x}; +	float_t hfsq,f,s,z,R,w,t1,t2,hi,lo; +	uint32_t ix; +	int k; +	ix = u.i;  	k = 0; -	if (hx < 0x00800000) {  /* x < 2**-126  */ -		if ((hx&0x7fffffff) == 0) -			return -two25/0.0f;  /* log(+-0)=-inf */ -		if (hx < 0) -			return (x-x)/0.0f;   /* log(-#) = NaN */ +	if (ix < 0x00800000 || ix>>31) {  /* x < 2**-126  */ +		if (ix<<1 == 0) +			return -1/(x*x);  /* log(+-0)=-inf */ +		if (ix>>31) +			return (x-x)/0.0f; /* log(-#) = NaN */  		/* subnormal number, scale up x */  		k -= 25; -		x *= two25; -		GET_FLOAT_WORD(hx, x); -	} -	if (hx >= 0x7f800000) -		return x+x; -	if (hx == 0x3f800000) -		return 0.0f;  /* log(1) = +0 */ -	k += (hx>>23) - 127; -	hx &= 0x007fffff; -	i = (hx+(0x4afb0d))&0x800000; -	SET_FLOAT_WORD(x, hx|(i^0x3f800000));  /* normalize x or x/2 */ -	k += i>>23; -	y = (float)k; -	f = x - 1.0f; -	hfsq = 0.5f * f * f; -	r = __log1pf(f); +		x *= 0x1p25f; +		u.f = x; +		ix = u.i; +	} else if (ix >= 0x7f800000) { +		return x; +	} else if (ix == 0x3f800000) +		return 0; -	/* -	 * We no longer need to avoid falling into the multi-precision -	 * calculations due to compiler bugs breaking Dekker's theorem. -	 * Keep avoiding this as an optimization.  See log2.c for more -	 * details (some details are here only because the optimization -	 * is not yet available in double precision). -	 * -	 * Another compiler bug turned up.  With gcc on i386, -	 * (ivln2lo + ivln2hi) would be evaluated in float precision -	 * despite runtime evaluations using double precision.  So we -	 * must cast one of its terms to float_t.  This makes the whole -	 * expression have type float_t, so return is forced to waste -	 * time clobbering its extra precision. -	 */ -// FIXME -//      if (sizeof(float_t) > sizeof(float)) -//              return (r - hfsq + f) * ((float_t)ivln2lo + ivln2hi) + y; +	/* reduce x into [sqrt(2)/2, sqrt(2)] */ +	ix += 0x3f800000 - 0x3f3504f3; +	k += (int)(ix>>23) - 0x7f; +	ix = (ix&0x007fffff) + 0x3f3504f3; +	u.i = ix; +	x = u.f; + +	f = x - 1.0f; +	s = f/(2.0f + f); +	z = s*s; +	w = z*z; +	t1= w*(Lg2+w*Lg4); +	t2= z*(Lg1+w*Lg3); +	R = t2 + t1; +	hfsq = 0.5f*f*f;  	hi = f - hfsq; -	GET_FLOAT_WORD(hx,hi); -	SET_FLOAT_WORD(hi,hx&0xfffff000); -	lo = (f - hi) - hfsq + r; -	return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + y; +	u.f = hi; +	u.i &= 0xfffff000; +	hi = u.f; +	lo = f - hi - hfsq + s*(hfsq+R); +	return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + k;  } diff --git a/src/math/log2l.c b/src/math/log2l.c index 345b395d..d00531d5 100644 --- a/src/math/log2l.c +++ b/src/math/log2l.c @@ -117,7 +117,7 @@ long double log2l(long double x)  		return x;  	if (x <= 0.0) {  		if (x == 0.0) -			return -1/(x+0); /* -inf with divbyzero */ +			return -1/(x*x); /* -inf with divbyzero */  		return 0/0.0f; /* nan with invalid */  	} diff --git a/src/math/logf.c b/src/math/logf.c index c7f7dbe6..52230a1b 100644 --- a/src/math/logf.c +++ b/src/math/logf.c @@ -13,12 +13,12 @@   * ====================================================   */ -#include "libm.h" +#include <math.h> +#include <stdint.h>  static const float  ln2_hi = 6.9313812256e-01, /* 0x3f317180 */  ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ -two25  = 3.355443200e+07,  /* 0x4c000000 */  /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */  Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */  Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ @@ -27,61 +27,43 @@ Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */  float logf(float x)  { -	float hfsq,f,s,z,R,w,t1,t2,dk; -	int32_t k,ix,i,j; - -	GET_FLOAT_WORD(ix, x); +	union {float f; uint32_t i;} u = {x}; +	float_t hfsq,f,s,z,R,w,t1,t2,dk; +	uint32_t ix; +	int k; +	ix = u.i;  	k = 0; -	if (ix < 0x00800000) {  /* x < 2**-126  */ -		if ((ix & 0x7fffffff) == 0) -			return -two25/0.0f;  /* log(+-0)=-inf */ -		if (ix < 0) -			return (x-x)/0.0f;   /* log(-#) = NaN */ +	if (ix < 0x00800000 || ix>>31) {  /* x < 2**-126  */ +		if (ix<<1 == 0) +			return -1/(x*x);  /* log(+-0)=-inf */ +		if (ix>>31) +			return (x-x)/0.0f; /* log(-#) = NaN */  		/* subnormal number, scale up x */  		k -= 25; -		x *= two25; -		GET_FLOAT_WORD(ix, x); -	} -	if (ix >= 0x7f800000) -		return x+x; -	k += (ix>>23) - 127; -	ix &= 0x007fffff; -	i = (ix + (0x95f64<<3)) & 0x800000; -	SET_FLOAT_WORD(x, ix|(i^0x3f800000));  /* normalize x or x/2 */ -	k += i>>23; +		x *= 0x1p25f; +		u.f = x; +		ix = u.i; +	} else if (ix >= 0x7f800000) { +		return x; +	} else if (ix == 0x3f800000) +		return 0; + +	/* reduce x into [sqrt(2)/2, sqrt(2)] */ +	ix += 0x3f800000 - 0x3f3504f3; +	k += (int)(ix>>23) - 0x7f; +	ix = (ix&0x007fffff) + 0x3f3504f3; +	u.i = ix; +	x = u.f; +  	f = x - 1.0f; -	if ((0x007fffff & (0x8000 + ix)) < 0xc000) {  /* -2**-9 <= f < 2**-9 */ -		if (f == 0.0f) { -			if (k == 0) -				return 0.0f; -			dk = (float)k; -			return dk*ln2_hi + dk*ln2_lo; -		} -		R = f*f*(0.5f - 0.33333333333333333f*f); -		if (k == 0) -			return f-R; -		dk = (float)k; -		return dk*ln2_hi - ((R-dk*ln2_lo)-f); -	}  	s = f/(2.0f + f); -	dk = (float)k;  	z = s*s; -	i = ix-(0x6147a<<3);  	w = z*z; -	j = (0x6b851<<3)-ix;  	t1= w*(Lg2+w*Lg4);  	t2= z*(Lg1+w*Lg3); -	i |= j;  	R = t2 + t1; -	if (i > 0) { -		hfsq = 0.5f * f * f; -		if (k == 0) -			return f - (hfsq-s*(hfsq+R)); -		return dk*ln2_hi - ((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); -	} else { -		if (k == 0) -			return f - s*(f-R); -		return dk*ln2_hi - ((s*(f-R)-dk*ln2_lo)-f); -	} +	hfsq = 0.5f*f*f; +	dk = k; +	return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;  } diff --git a/src/math/logl.c b/src/math/logl.c index ef2b5515..03c5188f 100644 --- a/src/math/logl.c +++ b/src/math/logl.c @@ -35,9 +35,9 @@   *   *     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).   * - * Otherwise, setting  z = 2(x-1)/x+1), + * Otherwise, setting  z = 2(x-1)/(x+1),   * - *     log(x) = z + z**3 P(z)/Q(z). + *     log(x) = log(1+z/2) - log(1-z/2) = z + z**3 P(z)/Q(z).   *   *   * ACCURACY: @@ -116,7 +116,7 @@ long double logl(long double x)  		return x;  	if (x <= 0.0) {  		if (x == 0.0) -			return -1/(x+0); /* -inf with divbyzero */ +			return -1/(x*x); /* -inf with divbyzero */  		return 0/0.0f; /* nan with invalid */  	} @@ -127,7 +127,7 @@ long double logl(long double x)  	x = frexpl(x, &e);  	/* logarithm using log(x) = z + z**3 P(z)/Q(z), -	 * where z = 2(x-1)/x+1) +	 * where z = 2(x-1)/(x+1)  	 */  	if (e > 2 || e < -2) {  		if (x < SQRTH) {  /* 2(2x-1)/(2x+1) */ | 
