diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/math/sqrt.c | 320 | ||||
| -rw-r--r-- | src/math/sqrt_data.c | 19 | ||||
| -rw-r--r-- | src/math/sqrt_data.h | 13 | 
3 files changed, 179 insertions, 173 deletions
diff --git a/src/math/sqrt.c b/src/math/sqrt.c index f1f6d76c..5ba26559 100644 --- a/src/math/sqrt.c +++ b/src/math/sqrt.c @@ -1,184 +1,158 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* sqrt(x) - * Return correctly rounded sqrt. - *           ------------------------------------------ - *           |  Use the hardware sqrt if you have one | - *           ------------------------------------------ - * Method: - *   Bit by bit method using integer arithmetic. (Slow, but portable) - *   1. Normalization - *      Scale x to y in [1,4) with even powers of 2: - *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then - *              sqrt(x) = 2^k * sqrt(y) - *   2. Bit by bit computation - *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1), - *           i                                                   0 - *                                     i+1         2 - *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1) - *           i      i            i                 i - * - *      To compute q    from q , one checks whether - *                  i+1       i - * - *                            -(i+1) 2 - *                      (q + 2      ) <= y.                     (2) - *                        i - *                                                            -(i+1) - *      If (2) is false, then q   = q ; otherwise q   = q  + 2      . - *                             i+1   i             i+1   i - * - *      With some algebric manipulation, it is not difficult to see - *      that (2) is equivalent to - *                             -(i+1) - *                      s  +  2       <= y                      (3) - *                       i                i - * - *      The advantage of (3) is that s  and y  can be computed by - *                                    i      i - *      the following recurrence formula: - *          if (3) is false - * - *          s     =  s  ,       y    = y   ;                    (4) - *           i+1      i          i+1    i - * - *          otherwise, - *                         -i                     -(i+1) - *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5) - *           i+1      i          i+1    i     i - * - *      One may easily use induction to prove (4) and (5). - *      Note. Since the left hand side of (3) contain only i+2 bits, - *            it does not necessary to do a full (53-bit) comparison - *            in (3). - *   3. Final rounding - *      After generating the 53 bits result, we compute one more bit. - *      Together with the remainder, we can decide whether the - *      result is exact, bigger than 1/2ulp, or less than 1/2ulp - *      (it will never equal to 1/2ulp). - *      The rounding mode can be detected by checking whether - *      huge + tiny is equal to huge, and whether huge - tiny is - *      equal to huge for some floating point number "huge" and "tiny". - * - * Special cases: - *      sqrt(+-0) = +-0         ... exact - *      sqrt(inf) = inf - *      sqrt(-ve) = NaN         ... with invalid signal - *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN - */ - +#include <stdint.h> +#include <math.h>  #include "libm.h" +#include "sqrt_data.h" -static const double tiny = 1.0e-300; +#define FENV_SUPPORT 1 -double sqrt(double x) +/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */ +static inline uint32_t mul32(uint32_t a, uint32_t b)  { -	double z; -	int32_t sign = (int)0x80000000; -	int32_t ix0,s0,q,m,t,i; -	uint32_t r,t1,s1,ix1,q1; +	return (uint64_t)a*b >> 32; +} -	EXTRACT_WORDS(ix0, ix1, x); +/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */ +static inline uint64_t mul64(uint64_t a, uint64_t b) +{ +	uint64_t ahi = a>>32; +	uint64_t alo = a&0xffffffff; +	uint64_t bhi = b>>32; +	uint64_t blo = b&0xffffffff; +	return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32); +} -	/* take care of Inf and NaN */ -	if ((ix0&0x7ff00000) == 0x7ff00000) { -		return x*x + x;  /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ -	} -	/* take care of zero */ -	if (ix0 <= 0) { -		if (((ix0&~sign)|ix1) == 0) -			return x;  /* sqrt(+-0) = +-0 */ -		if (ix0 < 0) -			return (x-x)/(x-x);  /* sqrt(-ve) = sNaN */ -	} -	/* normalize x */ -	m = ix0>>20; -	if (m == 0) {  /* subnormal x */ -		while (ix0 == 0) { -			m -= 21; -			ix0 |= (ix1>>11); -			ix1 <<= 21; -		} -		for (i=0; (ix0&0x00100000) == 0; i++) -			ix0<<=1; -		m -= i - 1; -		ix0 |= ix1>>(32-i); -		ix1 <<= i; -	} -	m -= 1023;    /* unbias exponent */ -	ix0 = (ix0&0x000fffff)|0x00100000; -	if (m & 1) {  /* odd m, double x to make it even */ -		ix0 += ix0 + ((ix1&sign)>>31); -		ix1 += ix1; -	} -	m >>= 1;      /* m = [m/2] */ - -	/* generate sqrt(x) bit by bit */ -	ix0 += ix0 + ((ix1&sign)>>31); -	ix1 += ix1; -	q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */ -	r = 0x00200000;        /* r = moving bit from right to left */ - -	while (r != 0) { -		t = s0 + r; -		if (t <= ix0) { -			s0   = t + r; -			ix0 -= t; -			q   += r; -		} -		ix0 += ix0 + ((ix1&sign)>>31); -		ix1 += ix1; -		r >>= 1; -	} +double sqrt(double x) +{ +	uint64_t ix, top, m; -	r = sign; -	while (r != 0) { -		t1 = s1 + r; -		t  = s0; -		if (t < ix0 || (t == ix0 && t1 <= ix1)) { -			s1 = t1 + r; -			if ((t1&sign) == sign && (s1&sign) == 0) -				s0++; -			ix0 -= t; -			if (ix1 < t1) -				ix0--; -			ix1 -= t1; -			q1 += r; -		} -		ix0 += ix0 + ((ix1&sign)>>31); -		ix1 += ix1; -		r >>= 1; +	/* special case handling.  */ +	ix = asuint64(x); +	top = ix >> 52; +	if (predict_false(top - 0x001 >= 0x7ff - 0x001)) { +		/* x < 0x1p-1022 or inf or nan.  */ +		if (ix * 2 == 0) +			return x; +		if (ix == 0x7ff0000000000000) +			return x; +		if (ix > 0x7ff0000000000000) +			return __math_invalid(x); +		/* x is subnormal, normalize it.  */ +		ix = asuint64(x * 0x1p52); +		top = ix >> 52; +		top -= 52;  	} -	/* use floating add to find out rounding direction */ -	if ((ix0|ix1) != 0) { -		z = 1.0 - tiny; /* raise inexact flag */ -		if (z >= 1.0) { -			z = 1.0 + tiny; -			if (q1 == (uint32_t)0xffffffff) { -				q1 = 0; -				q++; -			} else if (z > 1.0) { -				if (q1 == (uint32_t)0xfffffffe) -					q++; -				q1 += 2; -			} else -				q1 += q1 & 1; -		} +	/* argument reduction: +	   x = 4^e m; with integer e, and m in [1, 4) +	   m: fixed point representation [2.62] +	   2^e is the exponent part of the result.  */ +	int even = top & 1; +	m = (ix << 11) | 0x8000000000000000; +	if (even) m >>= 1; +	top = (top + 0x3ff) >> 1; + +	/* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4) + +	   initial estimate: +	   7bit table lookup (1bit exponent and 6bit significand). + +	   iterative approximation: +	   using 2 goldschmidt iterations with 32bit int arithmetics +	   and a final iteration with 64bit int arithmetics. + +	   details: + +	   the relative error (e = r0 sqrt(m)-1) of a linear estimate +	   (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best, +	   a table lookup is faster and needs one less iteration +	   6 bit lookup table (128b) gives |e| < 0x1.f9p-8 +	   7 bit lookup table (256b) gives |e| < 0x1.fdp-9 +	   for single and double prec 6bit is enough but for quad +	   prec 7bit is needed (or modified iterations). to avoid +	   one more iteration >=13bit table would be needed (16k). + +	   a newton-raphson iteration for r is +	     w = r*r +	     u = 3 - m*w +	     r = r*u/2 +	   can use a goldschmidt iteration for s at the end or +	     s = m*r + +	   first goldschmidt iteration is +	     s = m*r +	     u = 3 - s*r +	     r = r*u/2 +	     s = s*u/2 +	   next goldschmidt iteration is +	     u = 3 - s*r +	     r = r*u/2 +	     s = s*u/2 +	   and at the end r is not computed only s. + +	   they use the same amount of operations and converge at the +	   same quadratic rate, i.e. if +	     r1 sqrt(m) - 1 = e, then +	     r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3 +	   the advantage of goldschmidt is that the mul for s and r +	   are independent (computed in parallel), however it is not +	   "self synchronizing": it only uses the input m in the +	   first iteration so rounding errors accumulate. at the end +	   or when switching to larger precision arithmetics rounding +	   errors dominate so the first iteration should be used. + +	   the fixed point representations are +	     m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30 +	   and after switching to 64 bit +	     m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62  */ + +	static const uint64_t three = 0xc0000000; +	uint64_t r, s, d, u, i; + +	i = (ix >> 46) % 128; +	r = (uint32_t)__rsqrt_tab[i] << 16; +	/* |r sqrt(m) - 1| < 0x1.fdp-9 */ +	s = mul32(m>>32, r); +	/* |s/sqrt(m) - 1| < 0x1.fdp-9 */ +	d = mul32(s, r); +	u = three - d; +	r = mul32(r, u) << 1; +	/* |r sqrt(m) - 1| < 0x1.7bp-16 */ +	s = mul32(s, u) << 1; +	/* |s/sqrt(m) - 1| < 0x1.7bp-16 */ +	d = mul32(s, r); +	u = three - d; +	r = mul32(r, u) << 1; +	/* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */ +	r = r << 32; +	s = mul64(m, r); +	d = mul64(s, r); +	u = (three<<32) - d; +	s = mul64(s, u);  /* repr: 3.61 */ +	/* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */ +	s = (s - 2) >> 9; /* repr: 12.52 */ +	/* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */ + +	/* s < sqrt(m) < s + 0x1.09p-52, +	   compute nearest rounded result: +	   the nearest result to 52 bits is either s or s+0x1p-52, +	   we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.  */ +	uint64_t d0, d1, d2; +	double y, t; +	d0 = (m << 42) - s*s; +	d1 = s - d0; +	d2 = d1 + s + 1; +	s += d1 >> 63; +	s &= 0x000fffffffffffff; +	s |= top << 52; +	y = asdouble(s); +	if (FENV_SUPPORT) { +		/* handle rounding modes and inexact exception: +		   only (s+1)^2 == 2^42 m case is exact otherwise +		   add a tiny value to cause the fenv effects.  */ +		uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000; +		tiny |= (d1^d2) & 0x8000000000000000; +		t = asdouble(tiny); +		y = eval_as_double(y + t);  	} -	ix0 = (q>>1) + 0x3fe00000; -	ix1 = q1>>1; -	if (q&1) -		ix1 |= sign; -	INSERT_WORDS(z, ix0 + ((uint32_t)m << 20), ix1); -	return z; +	return y;  } diff --git a/src/math/sqrt_data.c b/src/math/sqrt_data.c new file mode 100644 index 00000000..61bc22f4 --- /dev/null +++ b/src/math/sqrt_data.c @@ -0,0 +1,19 @@ +#include "sqrt_data.h" +const uint16_t __rsqrt_tab[128] = { +0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43, +0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b, +0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1, +0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430, +0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59, +0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925, +0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479, +0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040, +0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234, +0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2, +0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1, +0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192, +0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f, +0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4, +0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59, +0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560, +}; diff --git a/src/math/sqrt_data.h b/src/math/sqrt_data.h new file mode 100644 index 00000000..260c7f9c --- /dev/null +++ b/src/math/sqrt_data.h @@ -0,0 +1,13 @@ +#ifndef _SQRT_DATA_H +#define _SQRT_DATA_H + +#include <features.h> +#include <stdint.h> + +/* if x in [1,2): i = (int)(64*x); +   if x in [2,4): i = (int)(32*x-64); +   __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error: +   |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */ +extern hidden const uint16_t __rsqrt_tab[128]; + +#endif  | 
