From db505b794c697631f65e6b91ff106496debb86ac Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sun, 22 Oct 2017 14:19:20 +0000
Subject: math: new logf

from https://github.com/ARM-software/optimized-routines,
commit 04884bd04eac4b251da4026900010ea7d8850edc,
with minor changes to better fit into musl.

code size change: +289 bytes.
benchmark on x86_64 before, after, speedup:

-Os:
  logf rthruput:   8.40 ns/call  6.14 ns/call 1.37x
   logf latency:  31.79 ns/call 24.33 ns/call 1.31x
-O3:
  logf rthruput:   8.43 ns/call  5.58 ns/call 1.51x
   logf latency:  32.04 ns/call 20.88 ns/call 1.53x
---
 src/math/logf.c | 110 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 56 insertions(+), 54 deletions(-)

(limited to 'src/math/logf.c')

diff --git a/src/math/logf.c b/src/math/logf.c
index 52230a1b..7ee5d7fe 100644
--- a/src/math/logf.c
+++ b/src/math/logf.c
@@ -1,69 +1,71 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */
 /*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Single-precision log function.
  *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
  */
 
 #include <math.h>
 #include <stdint.h>
+#include "libm.h"
+#include "logf_data.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
 
-static const float
-ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
-ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
-/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
-Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
-Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
-Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
-Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
 
 float logf(float x)
 {
-	union {float f; uint32_t i;} u = {x};
-	float_t hfsq,f,s,z,R,w,t1,t2,dk;
-	uint32_t ix;
-	int k;
+	double_t z, r, r2, y, y0, invc, logc;
+	uint32_t ix, iz, tmp;
+	int k, i;
 
-	ix = u.i;
-	k = 0;
-	if (ix < 0x00800000 || ix>>31) {  /* x < 2**-126  */
-		if (ix<<1 == 0)
-			return -1/(x*x);  /* log(+-0)=-inf */
-		if (ix>>31)
-			return (x-x)/0.0f; /* log(-#) = NaN */
-		/* subnormal number, scale up x */
-		k -= 25;
-		x *= 0x1p25f;
-		u.f = x;
-		ix = u.i;
-	} else if (ix >= 0x7f800000) {
-		return x;
-	} else if (ix == 0x3f800000)
+	ix = asuint(x);
+	/* Fix sign of zero with downward rounding when x==1.  */
+	if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
 		return 0;
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
+		/* x < 0x1p-126 or inf or nan.  */
+		if (ix * 2 == 0)
+			return __math_divzerof(1);
+		if (ix == 0x7f800000) /* log(inf) == inf.  */
+			return x;
+		if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+			return __math_invalidf(x);
+		/* x is subnormal, normalize it.  */
+		ix = asuint(x * 0x1p23f);
+		ix -= 23 << 23;
+	}
+
+	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+	k = (int32_t)tmp >> 23; /* arithmetic shift */
+	iz = ix - (tmp & 0x1ff << 23);
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = (double_t)asfloat(iz);
 
-	/* reduce x into [sqrt(2)/2, sqrt(2)] */
-	ix += 0x3f800000 - 0x3f3504f3;
-	k += (int)(ix>>23) - 0x7f;
-	ix = (ix&0x007fffff) + 0x3f3504f3;
-	u.i = ix;
-	x = u.f;
+	/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+	r = z * invc - 1;
+	y0 = logc + (double_t)k * Ln2;
 
-	f = x - 1.0f;
-	s = f/(2.0f + f);
-	z = s*s;
-	w = z*z;
-	t1= w*(Lg2+w*Lg4);
-	t2= z*(Lg1+w*Lg3);
-	R = t2 + t1;
-	hfsq = 0.5f*f*f;
-	dk = k;
-	return s*(hfsq+R) + dk*ln2_lo - hfsq + f + dk*ln2_hi;
+	/* Pipelined polynomial evaluation to approximate log1p(r).  */
+	r2 = r * r;
+	y = A[1] * r + A[2];
+	y = A[0] * r2 + y;
+	y = y * r2 + (y0 + r);
+	return eval_as_float(y);
 }
-- 
cgit v1.2.1