diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-09-03 04:09:12 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-09-05 11:30:07 +0000 | 
| commit | ee2ee92d62c43f6658d37ddea4c316d2089d0fe9 (patch) | |
| tree | 1a3e5a63fd40fa763f87d5dd9efd021117ea1d11 /src/math/fmod.c | |
| parent | d1a2ead878c27ac4ec600740320f8b76e1f961e9 (diff) | |
| download | musl-ee2ee92d62c43f6658d37ddea4c316d2089d0fe9.tar.gz | |
math: rewrite remainder functions (remainder, remquo, fmod, modf)
* results are exact
* modfl follows truncl (raises inexact flag spuriously now)
* modf and modff only had cosmetic cleanup
* remainder is just a wrapper around remquo now
* using iterative shift+subtract for remquo and fmod
* ld80 and ld128 are supported as well
Diffstat (limited to 'src/math/fmod.c')
| -rw-r--r-- | src/math/fmod.c | 179 | 
1 files changed, 51 insertions, 128 deletions
diff --git a/src/math/fmod.c b/src/math/fmod.c index 84a1b4ac..6849722b 100644 --- a/src/math/fmod.c +++ b/src/math/fmod.c @@ -1,145 +1,68 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_fmod.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * fmod(x,y) - * Return x mod y in exact arithmetic - * Method: shift and subtract - */ - -#include "libm.h" - -static const double Zero[] = {0.0, -0.0,}; +#include <math.h> +#include <stdint.h>  double fmod(double x, double y)  { -	int32_t n,hx,hy,hz,ix,iy,sx,i; -	uint32_t lx,ly,lz; +	union {double f; uint64_t i;} ux = {x}, uy = {y}; +	int ex = ux.i>>52 & 0x7ff; +	int ey = uy.i>>52 & 0x7ff; +	int sx = ux.i>>63; +	uint64_t i; -	EXTRACT_WORDS(hx, lx, x); -	EXTRACT_WORDS(hy, ly, y); -	sx = hx & 0x80000000;  /* sign of x */ -	hx ^= sx;              /* |x| */ -	hy &= 0x7fffffff;      /* |y| */ +	/* in the followings uxi should be ux.i, but then gcc wrongly adds */ +	/* float load/store to inner loops ruining performance and code size */ +	uint64_t uxi = ux.i; -	/* purge off exception values */ -	if ((hy|ly) == 0 || hx >= 0x7ff00000 ||  /* y=0,or x not finite */ -	    (hy|((ly|-ly)>>31)) > 0x7ff00000)    /* or y is NaN */ +	if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)  		return (x*y)/(x*y); -	if (hx <= hy) { -		if (hx < hy || lx < ly)  /* |x| < |y| */ -			return x; -		if (lx == ly)            /* |x| = |y|, return x*0 */ -			return Zero[(uint32_t)sx>>31]; +	if (uxi<<1 <= uy.i<<1) { +		if (uxi<<1 == uy.i<<1) +			return 0*x; +		return x;  	} -	/* determine ix = ilogb(x) */ -	if (hx < 0x00100000) {  /* subnormal x */ -		if (hx == 0) { -			for (ix = -1043, i = lx; i > 0; i <<= 1) -				ix -= 1; -		} else { -			for (ix = -1022, i = hx<<11; i > 0; i <<= 1) -				ix -= 1; -		} -	} else -		ix = (hx>>20) - 1023; - -	/* determine iy = ilogb(y) */ -	if (hy < 0x00100000) {  /* subnormal y */ -		if (hy == 0) { -			for (iy = -1043, i = ly; i > 0; i <<= 1) -				iy -= 1; -		} else { -			for (iy = -1022, i = hy<<11; i > 0; i <<= 1) -				iy -= 1; -		} -	} else -		iy = (hy>>20) - 1023; - -	/* set up {hx,lx}, {hy,ly} and align y to x */ -	if (ix >= -1022) -		hx = 0x00100000|(0x000fffff&hx); -	else {       /* subnormal x, shift x to normal */ -		n = -1022-ix; -		if (n <= 31) { -			hx = (hx<<n)|(lx>>(32-n)); -			lx <<= n; -		} else { -			hx = lx<<(n-32); -			lx = 0; -		} +	/* normalize x and y */ +	if (!ex) { +		for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1); +		uxi <<= -ex + 1; +	} else { +		uxi &= -1ULL >> 12; +		uxi |= 1ULL << 52;  	} -	if(iy >= -1022) -		hy = 0x00100000|(0x000fffff&hy); -	else {       /* subnormal y, shift y to normal */ -		n = -1022-iy; -		if (n <= 31) { -			hy = (hy<<n)|(ly>>(32-n)); -			ly <<= n; -		} else { -			hy = ly<<(n-32); -			ly = 0; -		} +	if (!ey) { +		for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1); +		uy.i <<= -ey + 1; +	} else { +		uy.i &= -1ULL >> 12; +		uy.i |= 1ULL << 52;  	} -	/* fix point fmod */ -	n = ix - iy; -	while (n--) { -		hz = hx-hy; -		lz = lx-ly; -		if (lx < ly) -			hz -= 1; -		if (hz < 0) { -			hx = hx+hx+(lx>>31); -			lx = lx+lx; -		} else { -			if ((hz|lz) == 0)   /* return sign(x)*0 */ -				return Zero[(uint32_t)sx>>31]; -			hx = hz+hz+(lz>>31); -			lx = lz+lz; +	/* x mod y */ +	for (; ex > ey; ex--) { +		i = uxi - uy.i; +		if (i >> 63 == 0) { +			if (i == 0) +				return 0*x; +			uxi = i;  		} +		uxi <<= 1;  	} -	hz = hx-hy; -	lz = lx-ly; -	if (lx < ly) -		hz -= 1; -	if (hz >= 0) { -		hx = hz; -		lx = lz; +	i = uxi - uy.i; +	if (i >> 63 == 0) { +		if (i == 0) +			return 0*x; +		uxi = i;  	} +	for (; uxi>>52 == 0; uxi <<= 1, ex--); -	/* convert back to floating value and restore the sign */ -	if ((hx|lx) == 0)          /* return sign(x)*0 */ -		return Zero[(uint32_t)sx>>31]; -	while (hx < 0x00100000) {  /* normalize x */ -		hx = hx+hx+(lx>>31); -		lx = lx+lx; -		iy -= 1; -	} -	if (iy >= -1022) {         /* normalize output */ -		hx = ((hx-0x00100000)|((iy+1023)<<20)); -		INSERT_WORDS(x, hx|sx, lx); -	} else {                   /* subnormal output */ -		n = -1022 - iy; -		if (n <= 20) { -			lx = (lx>>n)|((uint32_t)hx<<(32-n)); -			hx >>= n; -		} else if (n <= 31) { -			lx = (hx<<(32-n))|(lx>>n); -			hx = sx; -		} else { -			lx = hx>>(n-32); hx = sx; -		} -		INSERT_WORDS(x, hx|sx, lx); +	/* scale result */ +	if (ex > 0) { +		uxi -= 1ULL << 52; +		uxi |= (uint64_t)ex << 52; +	} else { +		uxi >>= -ex + 1;  	} -	return x;  /* exact output */ +	uxi |= (uint64_t)sx << 63; +	ux.i = uxi; +	return ux.f;  }  | 
