From ee2ee92d62c43f6658d37ddea4c316d2089d0fe9 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 3 Sep 2013 04:09:12 +0000 Subject: math: rewrite remainder functions (remainder, remquo, fmod, modf) * results are exact * modfl follows truncl (raises inexact flag spuriously now) * modf and modff only had cosmetic cleanup * remainder is just a wrapper around remquo now * using iterative shift+subtract for remquo and fmod * ld80 and ld128 are supported as well --- src/math/fmod.c | 179 ++++++++++++++++---------------------------------------- 1 file changed, 51 insertions(+), 128 deletions(-) (limited to 'src/math/fmod.c') diff --git a/src/math/fmod.c b/src/math/fmod.c index 84a1b4ac..6849722b 100644 --- a/src/math/fmod.c +++ b/src/math/fmod.c @@ -1,145 +1,68 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_fmod.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* - * fmod(x,y) - * Return x mod y in exact arithmetic - * Method: shift and subtract - */ - -#include "libm.h" - -static const double Zero[] = {0.0, -0.0,}; +#include +#include double fmod(double x, double y) { - int32_t n,hx,hy,hz,ix,iy,sx,i; - uint32_t lx,ly,lz; + union {double f; uint64_t i;} ux = {x}, uy = {y}; + int ex = ux.i>>52 & 0x7ff; + int ey = uy.i>>52 & 0x7ff; + int sx = ux.i>>63; + uint64_t i; - EXTRACT_WORDS(hx, lx, x); - EXTRACT_WORDS(hy, ly, y); - sx = hx & 0x80000000; /* sign of x */ - hx ^= sx; /* |x| */ - hy &= 0x7fffffff; /* |y| */ + /* in the followings uxi should be ux.i, but then gcc wrongly adds */ + /* float load/store to inner loops ruining performance and code size */ + uint64_t uxi = ux.i; - /* purge off exception values */ - if ((hy|ly) == 0 || hx >= 0x7ff00000 || /* y=0,or x not finite */ - (hy|((ly|-ly)>>31)) > 0x7ff00000) /* or y is NaN */ + if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff) return (x*y)/(x*y); - if (hx <= hy) { - if (hx < hy || lx < ly) /* |x| < |y| */ - return x; - if (lx == ly) /* |x| = |y|, return x*0 */ - return Zero[(uint32_t)sx>>31]; + if (uxi<<1 <= uy.i<<1) { + if (uxi<<1 == uy.i<<1) + return 0*x; + return x; } - /* determine ix = ilogb(x) */ - if (hx < 0x00100000) { /* subnormal x */ - if (hx == 0) { - for (ix = -1043, i = lx; i > 0; i <<= 1) - ix -= 1; - } else { - for (ix = -1022, i = hx<<11; i > 0; i <<= 1) - ix -= 1; - } - } else - ix = (hx>>20) - 1023; - - /* determine iy = ilogb(y) */ - if (hy < 0x00100000) { /* subnormal y */ - if (hy == 0) { - for (iy = -1043, i = ly; i > 0; i <<= 1) - iy -= 1; - } else { - for (iy = -1022, i = hy<<11; i > 0; i <<= 1) - iy -= 1; - } - } else - iy = (hy>>20) - 1023; - - /* set up {hx,lx}, {hy,ly} and align y to x */ - if (ix >= -1022) - hx = 0x00100000|(0x000fffff&hx); - else { /* subnormal x, shift x to normal */ - n = -1022-ix; - if (n <= 31) { - hx = (hx<>(32-n)); - lx <<= n; - } else { - hx = lx<<(n-32); - lx = 0; - } + /* normalize x and y */ + if (!ex) { + for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1); + uxi <<= -ex + 1; + } else { + uxi &= -1ULL >> 12; + uxi |= 1ULL << 52; } - if(iy >= -1022) - hy = 0x00100000|(0x000fffff&hy); - else { /* subnormal y, shift y to normal */ - n = -1022-iy; - if (n <= 31) { - hy = (hy<>(32-n)); - ly <<= n; - } else { - hy = ly<<(n-32); - ly = 0; - } + if (!ey) { + for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1); + uy.i <<= -ey + 1; + } else { + uy.i &= -1ULL >> 12; + uy.i |= 1ULL << 52; } - /* fix point fmod */ - n = ix - iy; - while (n--) { - hz = hx-hy; - lz = lx-ly; - if (lx < ly) - hz -= 1; - if (hz < 0) { - hx = hx+hx+(lx>>31); - lx = lx+lx; - } else { - if ((hz|lz) == 0) /* return sign(x)*0 */ - return Zero[(uint32_t)sx>>31]; - hx = hz+hz+(lz>>31); - lx = lz+lz; + /* x mod y */ + for (; ex > ey; ex--) { + i = uxi - uy.i; + if (i >> 63 == 0) { + if (i == 0) + return 0*x; + uxi = i; } + uxi <<= 1; } - hz = hx-hy; - lz = lx-ly; - if (lx < ly) - hz -= 1; - if (hz >= 0) { - hx = hz; - lx = lz; + i = uxi - uy.i; + if (i >> 63 == 0) { + if (i == 0) + return 0*x; + uxi = i; } + for (; uxi>>52 == 0; uxi <<= 1, ex--); - /* convert back to floating value and restore the sign */ - if ((hx|lx) == 0) /* return sign(x)*0 */ - return Zero[(uint32_t)sx>>31]; - while (hx < 0x00100000) { /* normalize x */ - hx = hx+hx+(lx>>31); - lx = lx+lx; - iy -= 1; - } - if (iy >= -1022) { /* normalize output */ - hx = ((hx-0x00100000)|((iy+1023)<<20)); - INSERT_WORDS(x, hx|sx, lx); - } else { /* subnormal output */ - n = -1022 - iy; - if (n <= 20) { - lx = (lx>>n)|((uint32_t)hx<<(32-n)); - hx >>= n; - } else if (n <= 31) { - lx = (hx<<(32-n))|(lx>>n); - hx = sx; - } else { - lx = hx>>(n-32); hx = sx; - } - INSERT_WORDS(x, hx|sx, lx); + /* scale result */ + if (ex > 0) { + uxi -= 1ULL << 52; + uxi |= (uint64_t)ex << 52; + } else { + uxi >>= -ex + 1; } - return x; /* exact output */ + uxi |= (uint64_t)sx << 63; + ux.i = uxi; + return ux.f; } -- cgit v1.2.1