summaryrefslogtreecommitdiff
path: root/src/math/fmodl.c
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2013-09-03 04:09:12 +0000
committerSzabolcs Nagy <nsz@port70.net>2013-09-05 11:30:07 +0000
commitee2ee92d62c43f6658d37ddea4c316d2089d0fe9 (patch)
tree1a3e5a63fd40fa763f87d5dd9efd021117ea1d11 /src/math/fmodl.c
parentd1a2ead878c27ac4ec600740320f8b76e1f961e9 (diff)
downloadmusl-ee2ee92d62c43f6658d37ddea4c316d2089d0fe9.tar.gz
math: rewrite remainder functions (remainder, remquo, fmod, modf)
* results are exact * modfl follows truncl (raises inexact flag spuriously now) * modf and modff only had cosmetic cleanup * remainder is just a wrapper around remquo now * using iterative shift+subtract for remquo and fmod * ld80 and ld128 are supported as well
Diffstat (limited to 'src/math/fmodl.c')
-rw-r--r--src/math/fmodl.c213
1 files changed, 80 insertions, 133 deletions
diff --git a/src/math/fmodl.c b/src/math/fmodl.c
index b930c49d..54af6a3f 100644
--- a/src/math/fmodl.c
+++ b/src/math/fmodl.c
@@ -1,15 +1,3 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_fmodl.c */
-/*-
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
#include "libm.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
@@ -18,141 +6,100 @@ long double fmodl(long double x, long double y)
return fmod(x, y);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-
-#define BIAS (LDBL_MAX_EXP - 1)
-
-#if LDBL_MANL_SIZE > 32
-typedef uint64_t manl_t;
-#else
-typedef uint32_t manl_t;
-#endif
-
-#if LDBL_MANH_SIZE > 32
-typedef uint64_t manh_t;
-#else
-typedef uint32_t manh_t;
-#endif
-
-/*
- * These macros add and remove an explicit integer bit in front of the
- * fractional mantissa, if the architecture doesn't have such a bit by
- * default already.
- */
-#ifdef LDBL_IMPLICIT_NBIT
-#define SET_NBIT(hx) ((hx) | (1ULL << LDBL_MANH_SIZE))
-#define HFRAC_BITS LDBL_MANH_SIZE
-#else
-#define SET_NBIT(hx) (hx)
-#define HFRAC_BITS (LDBL_MANH_SIZE - 1)
-#endif
-
-#define MANL_SHIFT (LDBL_MANL_SIZE - 1)
-
-static const long double Zero[] = {0.0, -0.0,};
-
-/*
- * fmodl(x,y)
- * Return x mod y in exact arithmetic
- * Method: shift and subtract
- *
- * Assumptions:
- * - The low part of the mantissa fits in a manl_t exactly.
- * - The high part of the mantissa fits in an int64_t with enough room
- * for an explicit integer bit in front of the fractional bits.
- */
long double fmodl(long double x, long double y)
{
- union IEEEl2bits ux, uy;
- int64_t hx,hz; /* We need a carry bit even if LDBL_MANH_SIZE is 32. */
- manh_t hy;
- manl_t lx,ly,lz;
- int ix,iy,n,sx;
-
- ux.e = x;
- uy.e = y;
- sx = ux.bits.sign;
+ union ldshape ux = {x}, uy = {y};
+ int ex = ux.i.se & 0x7fff;
+ int ey = uy.i.se & 0x7fff;
+ int sx = ux.i.se & 0x8000;
- /* purge off exception values */
- if ((uy.bits.exp|uy.bits.manh|uy.bits.manl) == 0 || /* y=0 */
- ux.bits.exp == BIAS + LDBL_MAX_EXP || /* or x not finite */
- (uy.bits.exp == BIAS + LDBL_MAX_EXP &&
- ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl) != 0)) /* or y is NaN */
+ if (y == 0 || isnan(y) || ex == 0x7fff)
return (x*y)/(x*y);
- if (ux.bits.exp <= uy.bits.exp) {
- if (ux.bits.exp < uy.bits.exp ||
- (ux.bits.manh<=uy.bits.manh &&
- (ux.bits.manh<uy.bits.manh ||
- ux.bits.manl<uy.bits.manl))) /* |x|<|y| return x or x-y */
- return x;
- if (ux.bits.manh==uy.bits.manh && ux.bits.manl==uy.bits.manl)
- return Zero[sx]; /* |x| = |y| return x*0 */
+ ux.i.se = ex;
+ uy.i.se = ey;
+ if (ux.f <= uy.f) {
+ if (ux.f == uy.f)
+ return 0*x;
+ return x;
}
- /* determine ix = ilogb(x) */
- if (ux.bits.exp == 0) { /* subnormal x */
- ux.e *= 0x1.0p512;
- ix = ux.bits.exp - (BIAS + 512);
- } else {
- ix = ux.bits.exp - BIAS;
+ /* normalize x and y */
+ if (!ex) {
+ ux.f *= 0x1p120f;
+ ex = ux.i.se - 120;
}
-
- /* determine iy = ilogb(y) */
- if (uy.bits.exp == 0) { /* subnormal y */
- uy.e *= 0x1.0p512;
- iy = uy.bits.exp - (BIAS + 512);
- } else {
- iy = uy.bits.exp - BIAS;
+ if (!ey) {
+ uy.f *= 0x1p120f;
+ ey = uy.i.se - 120;
}
- /* set up {hx,lx}, {hy,ly} and align y to x */
- hx = SET_NBIT(ux.bits.manh);
- hy = SET_NBIT(uy.bits.manh);
- lx = ux.bits.manl;
- ly = uy.bits.manl;
-
- /* fix point fmod */
- n = ix - iy;
-
- while (n--) {
- hz = hx-hy;
- lz = lx-ly;
- if (lx < ly)
- hz -= 1;
- if (hz < 0) {
- hx = hx+hx+(lx>>MANL_SHIFT);
- lx = lx+lx;
+ /* x mod y */
+#if LDBL_MANT_DIG == 64
+ uint64_t i, mx, my;
+ mx = ux.i.m;
+ my = uy.i.m;
+ for (; ex > ey; ex--) {
+ i = mx - my;
+ if (mx >= my) {
+ if (i == 0)
+ return 0*x;
+ mx = 2*i;
+ } else if (2*mx < mx) {
+ mx = 2*mx - my;
} else {
- if ((hz|lz)==0) /* return sign(x)*0 */
- return Zero[sx];
- hx = hz+hz+(lz>>MANL_SHIFT);
- lx = lz+lz;
+ mx = 2*mx;
}
}
- hz = hx-hy;
- lz = lx-ly;
- if (lx < ly)
- hz -= 1;
- if (hz >= 0) {
- hx = hz;
- lx = lz;
+ i = mx - my;
+ if (mx >= my) {
+ if (i == 0)
+ return 0*x;
+ mx = i;
}
-
- /* convert back to floating value and restore the sign */
- if ((hx|lx) == 0) /* return sign(x)*0 */
- return Zero[sx];
- while (hx < (1ULL<<HFRAC_BITS)) { /* normalize x */
- hx = hx+hx+(lx>>MANL_SHIFT);
- lx = lx+lx;
- iy -= 1;
+ for (; mx >> 63 == 0; mx *= 2, ex--);
+ ux.i.m = mx;
+#elif LDBL_MANT_DIG == 113
+ uint64_t hi, lo, xhi, xlo, yhi, ylo;
+ xhi = (ux.i2.hi & -1ULL>>16) | 1ULL<<48;
+ yhi = (uy.i2.hi & -1ULL>>16) | 1ULL<<48;
+ xlo = ux.i2.lo;
+ ylo = ux.i2.lo;
+ for (; ex > ey; ex--) {
+ hi = xhi - yhi;
+ lo = xlo - ylo;
+ if (xlo < ylo)
+ hi -= 1;
+ if (hi >> 63 == 0) {
+ if ((hi|lo) == 0)
+ return 0*x;
+ xhi = 2*hi + (lo>>63);
+ xlo = 2*lo;
+ } else {
+ xhi = 2*xhi + (xlo>>63);
+ xlo = 2*xlo;
+ }
}
- ux.bits.manh = hx; /* The mantissa is truncated here if needed. */
- ux.bits.manl = lx;
- if (iy < LDBL_MIN_EXP) {
- ux.bits.exp = iy + (BIAS + 512);
- ux.e *= 0x1p-512;
- } else {
- ux.bits.exp = iy + BIAS;
+ hi = xhi - yhi;
+ lo = xlo - ylo;
+ if (xlo < ylo)
+ hi -= 1;
+ if (hi >> 63 == 0) {
+ if ((hi|lo) == 0)
+ return 0*x;
+ xhi = hi;
+ xlo = lo;
}
- return ux.e; /* exact output */
+ for (; xhi >> 48 == 0; xhi = 2*xhi + (xlo>>63), xlo = 2*xlo, ex--);
+ ux.i2.hi = xhi;
+ ux.i2.lo = xlo;
+#endif
+
+ /* scale result */
+ if (ex <= 0) {
+ ux.i.se = (ex+120)|sx;
+ ux.f *= 0x1p-120f;
+ } else
+ ux.i.se = ex|sx;
+ return ux.f;
}
#endif