summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/math/cbrt.c36
-rw-r--r--src/math/cbrtf.c31
-rw-r--r--src/math/cbrtl.c64
3 files changed, 59 insertions, 72 deletions
diff --git a/src/math/cbrt.c b/src/math/cbrt.c
index f4253428..7599d3e3 100644
--- a/src/math/cbrt.c
+++ b/src/math/cbrt.c
@@ -15,7 +15,8 @@
* Return cube root of x
*/
-#include "libm.h"
+#include <math.h>
+#include <stdint.h>
static const uint32_t
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
@@ -31,15 +32,10 @@ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
double cbrt(double x)
{
- int32_t hx;
- union dshape u;
- double r,s,t=0.0,w;
- uint32_t sign;
- uint32_t high,low;
+ union {double f; uint64_t i;} u = {x};
+ double_t r,s,t,w;
+ uint32_t hx = u.i>>32 & 0x7fffffff;
- EXTRACT_WORDS(hx, low, x);
- sign = hx & 0x80000000;
- hx ^= sign;
if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */
return x+x;
@@ -59,14 +55,16 @@ double cbrt(double x)
* division rounds towards minus infinity; this is also efficient.
*/
if (hx < 0x00100000) { /* zero or subnormal? */
- if ((hx|low) == 0)
+ u.f = x*0x1p54;
+ hx = u.i>>32 & 0x7fffffff;
+ if (hx == 0)
return x; /* cbrt(0) is itself */
- SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */
- t *= x;
- GET_HIGH_WORD(high, t);
- INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0);
+ hx = hx/3 + B2;
} else
- INSERT_WORDS(t, sign|(hx/3+B1), 0);
+ hx = hx/3 + B1;
+ u.i &= 1ULL<<63;
+ u.i |= (uint64_t)hx << 32;
+ t = u.f;
/*
* New cbrt to 23 bits:
@@ -76,7 +74,7 @@ double cbrt(double x)
* has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
* gives us bounds for r = t**3/x.
*
- * Try to optimize for parallel evaluation as in k_tanf.c.
+ * Try to optimize for parallel evaluation as in __tanf.c.
*/
r = (t*t)*(t/x);
t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
@@ -91,9 +89,9 @@ double cbrt(double x)
* 0.667; the error in the rounded t can be up to about 3 23-bit ulps
* before the final error is larger than 0.667 ulps.
*/
- u.value = t;
- u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
- t = u.value;
+ u.f = t;
+ u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
+ t = u.f;
/* one step Newton iteration to 53 bits with error < 0.667 ulps */
s = t*t; /* t*t is exact */
diff --git a/src/math/cbrtf.c b/src/math/cbrtf.c
index 4a984b10..89c2c865 100644
--- a/src/math/cbrtf.c
+++ b/src/math/cbrtf.c
@@ -17,7 +17,8 @@
* Return cube root of x
*/
-#include "libm.h"
+#include <math.h>
+#include <stdint.h>
static const unsigned
B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
@@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
float cbrtf(float x)
{
- double r,T;
- float t;
- int32_t hx;
- uint32_t sign;
- uint32_t high;
+ double_t r,T;
+ union {float f; uint32_t i;} u = {x};
+ uint32_t hx = u.i & 0x7fffffff;
- GET_FLOAT_WORD(hx, x);
- sign = hx & 0x80000000;
- hx ^= sign;
if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */
return x + x;
@@ -41,28 +37,29 @@ float cbrtf(float x)
if (hx < 0x00800000) { /* zero or subnormal? */
if (hx == 0)
return x; /* cbrt(+-0) is itself */
- SET_FLOAT_WORD(t, 0x4b800000); /* set t = 2**24 */
- t *= x;
- GET_FLOAT_WORD(high, t);
- SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2));
+ u.f = x*0x1p24f;
+ hx = u.i & 0x7fffffff;
+ hx = hx/3 + B2;
} else
- SET_FLOAT_WORD(t, sign|(hx/3+B1));
+ hx = hx/3 + B1;
+ u.i &= 0x80000000;
+ u.i |= hx;
/*
* First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In
* double precision so that its terms can be arranged for efficiency
* without causing overflow or underflow.
*/
- T = t;
+ T = u.f;
r = T*T*T;
- T = T*((double)x+x+r)/(x+r+r);
+ T = T*((double_t)x+x+r)/(x+r+r);
/*
* Second step Newton iteration to 47 bits. In double precision for
* efficiency and accuracy.
*/
r = T*T*T;
- T = T*((double)x+x+r)/(x+r+r);
+ T = T*((double_t)x+x+r)/(x+r+r);
/* rounding to 24 bits is perfect in round-to-nearest mode */
return T;
diff --git a/src/math/cbrtl.c b/src/math/cbrtl.c
index 0af65ef5..ceff9136 100644
--- a/src/math/cbrtl.c
+++ b/src/math/cbrtl.c
@@ -23,58 +23,50 @@ long double cbrtl(long double x)
return cbrt(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#define BIAS (LDBL_MAX_EXP - 1)
static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
long double cbrtl(long double x)
{
- union IEEEl2bits u, v;
+ union ldshape u = {x}, v;
+ union {float f; uint32_t i;} uft;
long double r, s, t, w;
- double dr, dt, dx;
- float ft, fx;
- uint32_t hx;
- uint16_t expsign;
- int k;
-
- u.e = x;
- expsign = u.xbits.expsign;
- k = expsign & 0x7fff;
+ double_t dr, dt, dx;
+ float_t ft;
+ int e = u.i.se & 0x7fff;
+ int sign = u.i.se & 0x8000;
/*
* If x = +-Inf, then cbrt(x) = +-Inf.
* If x = NaN, then cbrt(x) = NaN.
*/
- if (k == BIAS + LDBL_MAX_EXP)
+ if (e == 0x7fff)
return x + x;
-
- if (k == 0) {
+ if (e == 0) {
+ /* Adjust subnormal numbers. */
+ u.f *= 0x1p120;
+ e = u.i.se & 0x7fff;
/* If x = +-0, then cbrt(x) = +-0. */
- if ((u.bits.manh | u.bits.manl) == 0)
+ if (e == 0)
return x;
- /* Adjust subnormal numbers. */
- u.e *= 0x1.0p514;
- k = u.bits.exp;
- k -= BIAS + 514;
- } else
- k -= BIAS;
- u.xbits.expsign = BIAS;
- v.e = 1;
-
- x = u.e;
- switch (k % 3) {
+ e -= 120;
+ }
+ e -= 0x3fff;
+ u.i.se = 0x3fff;
+ x = u.f;
+ switch (e % 3) {
case 1:
case -2:
- x = 2*x;
- k--;
+ x *= 2;
+ e--;
break;
case 2:
case -1:
- x = 4*x;
- k -= 2;
+ x *= 4;
+ e -= 2;
break;
}
- v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3);
+ v.f = 1.0;
+ v.i.se = sign | (0x3fff + e/3);
/*
* The following is the guts of s_cbrtf, with the handling of
@@ -83,9 +75,9 @@ long double cbrtl(long double x)
*/
/* ~5-bit estimate: */
- fx = x;
- GET_FLOAT_WORD(hx, fx);
- SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1));
+ uft.f = x;
+ uft.i = (uft.i & 0x7fffffff)/3 + B1;
+ ft = uft.f;
/* ~16-bit estimate: */
dx = x;
@@ -126,7 +118,7 @@ long double cbrtl(long double x)
r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
- t *= v.e;
+ t *= v.f;
return t;
}
#endif