diff options
| -rw-r--r-- | src/math/fma.c | 16 | ||||
| -rw-r--r-- | src/math/fmaf.c | 36 | ||||
| -rw-r--r-- | src/math/fmal.c | 16 | 
3 files changed, 55 insertions, 13 deletions
| diff --git a/src/math/fma.c b/src/math/fma.c index 84868be5..02f5c865 100644 --- a/src/math/fma.c +++ b/src/math/fma.c @@ -431,12 +431,24 @@ double fma(double x, double y, double z)  		/*  		 * There is no need to worry about double rounding in directed  		 * rounding modes. -		 * TODO: underflow is not raised properly, example in downward rounding: +		 * But underflow may not be raised properly, example in downward rounding:  		 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)  		 */ +		double ret; +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) +		int e = fetestexcept(FE_INEXACT); +		feclearexcept(FE_INEXACT); +#endif  		fesetround(oround);  		adj = r.lo + xy.lo; -		return scalbn(r.hi + adj, spread); +		ret = scalbn(r.hi + adj, spread); +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) +		if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT)) +			feraiseexcept(FE_UNDERFLOW); +		else if (e) +			feraiseexcept(FE_INEXACT); +#endif +		return ret;  	}  	adj = add_adjusted(r.lo, xy.lo); diff --git a/src/math/fmaf.c b/src/math/fmaf.c index 745ee393..aa57feb6 100644 --- a/src/math/fmaf.c +++ b/src/math/fmaf.c @@ -26,7 +26,8 @@   */  #include <fenv.h> -#include "libm.h" +#include <math.h> +#include <stdint.h>  /*   * Fused multiply-add: Compute x * y + z with a single rounding error. @@ -39,21 +40,35 @@ float fmaf(float x, float y, float z)  {  	#pragma STDC FENV_ACCESS ON  	double xy, result; -	uint32_t hr, lr; +	union {double f; uint64_t i;} u; +	int e;  	xy = (double)x * y;  	result = xy + z; -	EXTRACT_WORDS(hr, lr, result); +	u.f = result; +	e = u.i>>52 & 0x7ff;  	/* Common case: The double precision result is fine. */ -	if ((lr & 0x1fffffff) != 0x10000000 ||  /* not a halfway case */ -		(hr & 0x7ff00000) == 0x7ff00000 ||  /* NaN */ +	if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ +		e == 0x7ff ||                   /* NaN */  		result - xy == z ||                 /* exact */  		fegetround() != FE_TONEAREST)       /* not round-to-nearest */  	{  		/* -		TODO: underflow is not raised correctly, example in -		downward rouding: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) +		underflow may not be raised correctly, example: +		fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)  		*/ +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) +		if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT)) { +			feclearexcept(FE_INEXACT); +			/* TODO: gcc and clang bug workaround */ +			volatile float vz = z; +			result = xy + vz; +			if (fetestexcept(FE_INEXACT)) +				feraiseexcept(FE_UNDERFLOW); +			else +				feraiseexcept(FE_INEXACT); +		} +#endif  		z = result;  		return z;  	} @@ -68,8 +83,11 @@ float fmaf(float x, float y, float z)  	volatile double vxy = xy;  /* XXX work around gcc CSE bug */  	double adjusted_result = vxy + z;  	fesetround(FE_TONEAREST); -	if (result == adjusted_result) -		SET_LOW_WORD(adjusted_result, lr + 1); +	if (result == adjusted_result) { +		u.f = adjusted_result; +		u.i++; +		adjusted_result = u.f; +	}  	z = adjusted_result;  	return z;  } diff --git a/src/math/fmal.c b/src/math/fmal.c index c68db255..4506aac6 100644 --- a/src/math/fmal.c +++ b/src/math/fmal.c @@ -264,12 +264,24 @@ long double fmal(long double x, long double y, long double z)  		/*  		 * There is no need to worry about double rounding in directed  		 * rounding modes. -		 * TODO: underflow is not raised correctly, example in downward rounding: +		 * But underflow may not be raised correctly, example in downward rounding:  		 * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L)  		 */ +		long double ret; +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) +		int e = fetestexcept(FE_INEXACT); +		feclearexcept(FE_INEXACT); +#endif  		fesetround(oround);  		adj = r.lo + xy.lo; -		return scalbnl(r.hi + adj, spread); +		ret = scalbnl(r.hi + adj, spread); +#if defined(FE_INEXACT) && defined(FE_UNDERFLOW) +		if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT)) +			feraiseexcept(FE_UNDERFLOW); +		else if (e) +			feraiseexcept(FE_INEXACT); +#endif +		return ret;  	}  	adj = add_adjusted(r.lo, xy.lo); | 
