math: fix two fma issues (only affects non-nearest rounding mode, x86)

1) in downward rounding fma(1,1,-1) should be -0 but it was 0 with gcc, the code was correct but gcc does not support FENV_ACCESS ON so it used common subexpression elimination where it shouldn't have. now volatile memory access is used as a barrier after fesetround. 2) in directed rounding modes there is no double rounding issue so the complicated adjustments done for nearest rounding mode are not needed. the only exception to this rule is raising the underflow flag: assume "small" is an exactly representible subnormal value in double precision and "verysmall" is a much smaller value so that (long double)(small plus verysmall) == small then (double)(small plus verysmall) raises underflow because the result is an inexact subnormal, but (double)(long double)(small plus verysmall) does not because small is not a subnormal in long double precision and it is exact in double precision. now this problem is fixed by checking inexact using fenv when the result is subnormal
author: Szabolcs Nagy <nsz@port70.net> 2013-05-19 12:13:08 +0000
committer: Szabolcs Nagy <nsz@port70.net> 2013-05-19 12:13:08 +0000
commit: ffd8ac2dd50f99c3c83d7d9d845df9874ec3e7d5 (patch)
tree: 66808745c008d12935d2e46637457f623c547b22
parent: 83af1dd65a11f3da3f99ba861248a841c402ccaa (diff)
download: musl-ffd8ac2dd50f99c3c83d7d9d845df9874ec3e7d5.tar.gz
1 files changed, 38 insertions, 4 deletions
diff --git a/src/math/fma.c b/src/math/fma.c
index 17f1cdcc..89def795 100644
--- a/src/math/fma.c
+++ b/src/math/fma.c
@@ -119,9 +119,17 @@ double fma(double x, double y, double z)
 	} else if (ez > exy - 12) {
 		add(&hi, &lo2, xy, z);
 		if (hi == 0) {
+			/*
+			xy + z is 0, but it should be calculated with the
+			original rounding mode so the sign is correct, if the
+			compiler does not support FENV_ACCESS ON it does not
+			know about the changed rounding mode and eliminates
+			the xy + z below without the volatile memory access
+			*/
+			volatile double z_;
 			fesetround(round);
-			/* make sure that the sign of 0 is correct */
-			return (xy + z) + lo1;
+			z_ = z;
+			return (xy + z_) + lo1;
 		}
 	} else {
 		/*
@@ -135,10 +143,36 @@ double fma(double x, double y, double z)
 		hi = xy;
 		lo2 = z;
 	}
+	/*
+	the result is stored before return for correct precision and exceptions
+
+	one corner case is when the underflow flag should be raised because
+	the precise result is an inexact subnormal double, but the calculated
+	long double result is an exact subnormal double
+	(so rounding to double does not raise exceptions)
+
+	in nearest rounding mode dadd takes care of this: the last bit of the
+	result is adjusted so rounding sees an inexact value when it should
+
+	in non-nearest rounding mode fenv is used for the workaround
+	*/
 	fesetround(round);
 	if (round == FE_TONEAREST)
-		return dadd(hi, dadd(lo1, lo2));
-	return hi + (lo1 + lo2);
+		z = dadd(hi, dadd(lo1, lo2));
+	else {
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		int e = fetestexcept(FE_INEXACT);
+		feclearexcept(FE_INEXACT);
+#endif
+		z = hi + (lo1 + lo2);
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		if (getexp(z) < 0x3fff-1022 && fetestexcept(FE_INEXACT))
+			feraiseexcept(FE_UNDERFLOW);
+		else if (e)
+			feraiseexcept(FE_INEXACT);
+#endif
+	}
+	return z;
 }
 #else
 /* origin: FreeBSD /usr/src/lib/msun/src/s_fma.c */
author	Szabolcs Nagy <nsz@port70.net>	2013-05-19 12:13:08 +0000
committer	Szabolcs Nagy <nsz@port70.net>	2013-05-19 12:13:08 +0000
commit	ffd8ac2dd50f99c3c83d7d9d845df9874ec3e7d5 (patch)
tree	66808745c008d12935d2e46637457f623c547b22
parent	83af1dd65a11f3da3f99ba861248a841c402ccaa (diff)
download	musl-ffd8ac2dd50f99c3c83d7d9d845df9874ec3e7d5.tar.gz