diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/math/sqrtf.c | 140 | 
1 files changed, 70 insertions, 70 deletions
diff --git a/src/math/sqrtf.c b/src/math/sqrtf.c index d6ace38a..740d81cb 100644 --- a/src/math/sqrtf.c +++ b/src/math/sqrtf.c @@ -1,83 +1,83 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - +#include <stdint.h> +#include <math.h>  #include "libm.h" +#include "sqrt_data.h" -static const float tiny = 1.0e-30; +#define FENV_SUPPORT 1 -float sqrtf(float x) +static inline uint32_t mul32(uint32_t a, uint32_t b)  { -	float z; -	int32_t sign = (int)0x80000000; -	int32_t ix,s,q,m,t,i; -	uint32_t r; +	return (uint64_t)a*b >> 32; +} -	GET_FLOAT_WORD(ix, x); +/* see sqrt.c for more detailed comments.  */ -	/* take care of Inf and NaN */ -	if ((ix&0x7f800000) == 0x7f800000) -		return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ +float sqrtf(float x) +{ +	uint32_t ix, m, m1, m0, even, ey; -	/* take care of zero */ -	if (ix <= 0) { -		if ((ix&~sign) == 0) -			return x;  /* sqrt(+-0) = +-0 */ -		if (ix < 0) -			return (x-x)/(x-x);  /* sqrt(-ve) = sNaN */ -	} -	/* normalize x */ -	m = ix>>23; -	if (m == 0) {  /* subnormal x */ -		for (i = 0; (ix&0x00800000) == 0; i++) -			ix<<=1; -		m -= i - 1; +	ix = asuint(x); +	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { +		/* x < 0x1p-126 or inf or nan.  */ +		if (ix * 2 == 0) +			return x; +		if (ix == 0x7f800000) +			return x; +		if (ix > 0x7f800000) +			return __math_invalidf(x); +		/* x is subnormal, normalize it.  */ +		ix = asuint(x * 0x1p23f); +		ix -= 23 << 23;  	} -	m -= 127;  /* unbias exponent */ -	ix = (ix&0x007fffff)|0x00800000; -	if (m&1)  /* odd m, double x to make it even */ -		ix += ix; -	m >>= 1;  /* m = [m/2] */ -	/* generate sqrt(x) bit by bit */ -	ix += ix; -	q = s = 0;       /* q = sqrt(x) */ -	r = 0x01000000;  /* r = moving bit from right to left */ +	/* x = 4^e m; with int e and m in [1, 4).  */ +	even = ix & 0x00800000; +	m1 = (ix << 8) | 0x80000000; +	m0 = (ix << 7) & 0x7fffffff; +	m = even ? m0 : m1; -	while (r != 0) { -		t = s + r; -		if (t <= ix) { -			s = t+r; -			ix -= t; -			q += r; -		} -		ix += ix; -		r >>= 1; -	} +	/* 2^e is the exponent part of the return value.  */ +	ey = ix >> 1; +	ey += 0x3f800000 >> 1; +	ey &= 0x7f800000; + +	/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations.  */ +	static const uint32_t three = 0xc0000000; +	uint32_t r, s, d, u, i; +	i = (ix >> 17) % 128; +	r = (uint32_t)__rsqrt_tab[i] << 16; +	/* |r*sqrt(m) - 1| < 0x1p-8 */ +	s = mul32(m, r); +	/* |s/sqrt(m) - 1| < 0x1p-8 */ +	d = mul32(s, r); +	u = three - d; +	r = mul32(r, u) << 1; +	/* |r*sqrt(m) - 1| < 0x1.7bp-16 */ +	s = mul32(s, u) << 1; +	/* |s/sqrt(m) - 1| < 0x1.7bp-16 */ +	d = mul32(s, r); +	u = three - d; +	s = mul32(s, u); +	/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */ +	s = (s - 1)>>6; +	/* s < sqrt(m) < s + 0x1.08p-23 */ -	/* use floating add to find out rounding direction */ -	if (ix != 0) { -		z = 1.0f - tiny; /* raise inexact flag */ -		if (z >= 1.0f) { -			z = 1.0f + tiny; -			if (z > 1.0f) -				q += 2; -			else -				q += q & 1; -		} +	/* compute nearest rounded result.  */ +	uint32_t d0, d1, d2; +	float y, t; +	d0 = (m << 16) - s*s; +	d1 = s - d0; +	d2 = d1 + s + 1; +	s += d1 >> 31; +	s &= 0x007fffff; +	s |= ey; +	y = asfloat(s); +	if (FENV_SUPPORT) { +		/* handle rounding and inexact exception. */ +		uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000; +		tiny |= (d1^d2) & 0x80000000; +		t = asfloat(tiny); +		y = eval_as_float(y + t);  	} -	ix = (q>>1) + 0x3f000000; -	SET_FLOAT_WORD(z, ix + ((uint32_t)m << 23)); -	return z; +	return y;  }  | 
