math: extensive log*.c cleanup

The log, log2 and log10 functions share a lot of code and to a lesser extent log1p too. A small part of the code was kept separately in __log1p.h, but since it did not capture much of the common code and it was inlined anyway, it did not solve the issue properly. Now the log functions have significant code duplication, which may be resolved later, until then they need to be modified together. logl, log10l, log2l, log1pl: * Fix the sign when the return value should be -inf. * Remove the volatile hack from log10l (seems unnecessary) log1p, log1pf: * Change the handling of small inputs: only |x|<2^-53 is special (then it is enough to return x with the usual subnormal handling) this fixes the sign of log1p(0) in downward rounding. * Do not handle the k==0 case specially (other than skipping the elaborate argument reduction) * Do not handle 1+x close to power-of-two specially (this code was used rarely, did not give much speed up and the precision wasn't better than the general) * Fix the correction term formula (c=1-(u-x) was used incorrectly when x<1 but (double)(x+1)==2, this was not a critical issue) * Use the exact same method for calculating log(1+f) as in log (except in log1p the c correction term is added to the result). log, logf, log10, log10f, log2, log2f: * Use double_t and float_t consistently. * Now the first part of log10 and log2 is identical to log (until the return statement, hopefully this makes maintainence easier). * Most special case formulas were removed (close to power-of-two and k==0 cases), they increase the code size without providing precision or performance benefits (and obfuscate the code). Only x==1 is handled specially so in downward rounding mode the sign of zero is correct (the general formula happens to give -0). * For x==0 instead of -1/0.0 or -two54/0.0, return -1/(x*x) to force raising the exception at runtime. * Arg reduction code is changed (slightly simplified) * The thresholds for arg reduction to [sqrt(2)/2,sqrt(2)] are now consistently the [0x3fe6a09e00000000,0x3ff6a09dffffffff] and the [0x3f3504f3,0x3fb504f2] intervals for double and float reductions respectively (the exact threshold values are not critical) * Remove the obsolete comment for the FLT_EVAL_METHOD!=0 case in log2f (The same code is used for all eval methods now, on i386 slightly simpler code could be used, but we have asm there anyway) all: * Fix signed int arithmetics (using unsigned for bitmanipulation) * Fix various comments
author: Szabolcs Nagy <nsz@port70.net> 2013-10-28 01:16:14 +0000
committer: Szabolcs Nagy <nsz@port70.net> 2013-10-28 01:16:14 +0000
commit: 71d23b310383699a3101ea8bf088398796529ddd (patch)
tree: 812e6281a32bdc70977475abef9ee6b52b187422 /src/math/log10.c
parent: 4b15d9f46a2b260661d2e054575e617c76795578 (diff)
download: musl-71d23b310383699a3101ea8bf088398796529ddd.tar.gz
1 files changed, 59 insertions, 40 deletions
diff --git a/src/math/log10.c b/src/math/log10.c
index ed65d9be..81026876 100644
--- a/src/math/log10.c
+++ b/src/math/log10.c
@@ -10,72 +10,91 @@
  * ====================================================
  */
 /*
- * Return the base 10 logarithm of x.  See e_log.c and k_log.h for most
- * comments.
+ * Return the base 10 logarithm of x.  See log.c for most comments.
  *
- *    log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2)
- * in not-quite-routine extra precision.
+ * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2
+ * as in log.c, then combine and scale in extra precision:
+ *    log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)
  */
 
-#include "libm.h"
-#include "__log1p.h"
+#include <math.h>
+#include <stdint.h>
 
 static const double
-two54     = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
 ivln10hi  = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
 ivln10lo  = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */
 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
-log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
+log10_2lo = 3.69423907715893078616e-13, /* 0x3D59FEF3, 0x11F12B36 */
+Lg1 = 6.666666666666735130e-01,  /* 3FE55555 55555593 */
+Lg2 = 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
+Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+Lg4 = 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 
 double log10(double x)
 {
-	double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2;
-	int32_t i,k,hx;
-	uint32_t lx;
-
-	EXTRACT_WORDS(hx, lx, x);
+	union {double f; uint64_t i;} u = {x};
+	double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
+	uint32_t hx;
+	int k;
 
+	hx = u.i>>32;
 	k = 0;
-	if (hx < 0x00100000) {  /* x < 2**-1022  */
-		if (((hx&0x7fffffff)|lx) == 0)
-			return -two54/0.0;  /* log(+-0)=-inf */
-		if (hx<0)
-			return (x-x)/0.0;   /* log(-#) = NaN */
-		/* subnormal number, scale up x */
+	if (hx < 0x00100000 || hx>>31) {
+		if (u.i<<1 == 0)
+			return -1/(x*x);  /* log(+-0)=-inf */
+		if (hx>>31)
+			return (x-x)/0.0; /* log(-#) = NaN */
+		/* subnormal number, scale x up */
 		k -= 54;
-		x *= two54;
-		GET_HIGH_WORD(hx, x);
-	}
-	if (hx >= 0x7ff00000)
-		return x+x;
-	if (hx == 0x3ff00000 && lx == 0)
-		return 0.0;  /* log(1) = +0 */
-	k += (hx>>20) - 1023;
-	hx &= 0x000fffff;
-	i = (hx+0x95f64)&0x100000;
-	SET_HIGH_WORD(x, hx|(i^0x3ff00000));  /* normalize x or x/2 */
-	k += i>>20;
-	y = (double)k;
+		x *= 0x1p54;
+		u.f = x;
+		hx = u.i>>32;
+	} else if (hx >= 0x7ff00000) {
+		return x;
+	} else if (hx == 0x3ff00000 && u.i<<32 == 0)
+		return 0;
+
+	/* reduce x into [sqrt(2)/2, sqrt(2)] */
+	hx += 0x3ff00000 - 0x3fe6a09e;
+	k += (int)(hx>>20) - 0x3ff;
+	hx = (hx&0x000fffff) + 0x3fe6a09e;
+	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
+	x = u.f;
+
 	f = x - 1.0;
 	hfsq = 0.5*f*f;
-	r = __log1p(f);
+	s = f/(2.0+f);
+	z = s*s;
+	w = z*z;
+	t1 = w*(Lg2+w*(Lg4+w*Lg6));
+	t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+	R = t2 + t1;
 
 	/* See log2.c for details. */
+	/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
 	hi = f - hfsq;
-	SET_LOW_WORD(hi, 0);
-	lo = (f - hi) - hfsq + r;
+	u.f = hi;
+	u.i &= (uint64_t)-1<<32;
+	hi = u.f;
+	lo = f - hi - hfsq + s*(hfsq+R);
+
+	/* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
 	val_hi = hi*ivln10hi;
-	y2 = y*log10_2hi;
-	val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
+	dk = k;
+	y = dk*log10_2hi;
+	val_lo = dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
 
 	/*
-	 * Extra precision in for adding y*log10_2hi is not strictly needed
+	 * Extra precision in for adding y is not strictly needed
 	 * since there is no very large cancellation near x = sqrt(2) or
 	 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
 	 * with some parallelism and it reduces the error for many args.
 	 */
-	w = y2 + val_hi;
-	val_lo += (y2 - w) + val_hi;
+	w = y + val_hi;
+	val_lo += (y - w) + val_hi;
 	val_hi = w;
 
 	return val_lo + val_hi;
author	Szabolcs Nagy <nsz@port70.net>	2013-10-28 01:16:14 +0000
committer	Szabolcs Nagy <nsz@port70.net>	2013-10-28 01:16:14 +0000
commit	71d23b310383699a3101ea8bf088398796529ddd (patch)
tree	812e6281a32bdc70977475abef9ee6b52b187422 /src/math/log10.c
parent	4b15d9f46a2b260661d2e054575e617c76795578 (diff)
download	musl-71d23b310383699a3101ea8bf088398796529ddd.tar.gz