#include #include double fmod(double x, double y) { union {double f; uint64_t i;} ux = {x}, uy = {y}; int ex = ux.i>>52 & 0x7ff; int ey = uy.i>>52 & 0x7ff; int sx = ux.i>>63; uint64_t i; /* in the followings uxi should be ux.i, but then gcc wrongly adds */ /* float load/store to inner loops ruining performance and code size */ uint64_t uxi = ux.i; if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff) return (x*y)/(x*y); if (uxi<<1 <= uy.i<<1) { if (uxi<<1 == uy.i<<1) return 0*x; return x; } /* normalize x and y */ if (!ex) { for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1); uxi <<= -ex + 1; } else { uxi &= -1ULL >> 12; uxi |= 1ULL << 52; } if (!ey) { for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1); uy.i <<= -ey + 1; } else { uy.i &= -1ULL >> 12; uy.i |= 1ULL << 52; } /* x mod y */ for (; ex > ey; ex--) { i = uxi - uy.i; if (i >> 63 == 0) { if (i == 0) return 0*x; uxi = i; } uxi <<= 1; } i = uxi - uy.i; if (i >> 63 == 0) { if (i == 0) return 0*x; uxi = i; } for (; uxi>>52 == 0; uxi <<= 1, ex--); /* scale result */ if (ex > 0) { uxi -= 1ULL << 52; uxi |= (uint64_t)ex << 52; } else { uxi >>= -ex + 1; } uxi |= (uint64_t)sx << 63; ux.i = uxi; return ux.f; }