From c2a0dfea629617a39af2f59bd400e1a3595d0783 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Tue, 3 Sep 2013 14:37:48 +0000
Subject: math: rewrite hypot

method: if there is a large difference between the scale of x and y
then the larger magnitude dominates, otherwise reduce x,y so the
argument of sqrt (x*x+y*y) does not overflow or underflow and calculate
the argument precisely using exact multiplication. If the argument
has less error than 1/sqrt(2) ~ 0.7 ulp, then the result has less error
than 1 ulp in nearest rounding mode.

the original fdlibm method was the same, except it used bit hacks
instead of dekker-veltkamp algorithm, which is problematic for long
double where different representations are supported. (the new hypot
and hypotl code should be smaller and faster on 32bit cpu archs with
fast fpu), the new code behaves differently in non-nearest rounding,
but the error should be still less than 2ulps.

ld80 and ld128 are supported
---
 src/math/hypotf.c | 107 ++++++++++++++----------------------------------------
 1 file changed, 28 insertions(+), 79 deletions(-)

(limited to 'src/math/hypotf.c')

diff --git a/src/math/hypotf.c b/src/math/hypotf.c
index 4d80178d..2fc214b7 100644
--- a/src/math/hypotf.c
+++ b/src/math/hypotf.c
@@ -1,86 +1,35 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_hypotf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#include "libm.h"
+#include <math.h>
+#include <stdint.h>
 
 float hypotf(float x, float y)
 {
-	float a,b,t1,t2,y1,y2,w;
-	int32_t j,k,ha,hb;
+	union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;
+	float_t z;
 
-	GET_FLOAT_WORD(ha,x);
-	ha &= 0x7fffffff;
-	GET_FLOAT_WORD(hb,y);
-	hb &= 0x7fffffff;
-	if (hb > ha) {
-		a = y;
-		b = x;
-		j=ha; ha=hb; hb=j;
-	} else {
-		a = x;
-		b = y;
-	}
-	a = fabsf(a);
-	b = fabsf(b);
-	if (ha - hb > 0xf000000)  /* x/y > 2**30 */
-		return a+b;
-	k = 0;
-	if (ha > 0x58800000) {    /* a > 2**50 */
-		if(ha >= 0x7f800000) {  /* Inf or NaN */
-			/* Use original arg order iff result is NaN; quieten sNaNs. */
-			w = fabsf(x+0.0f) - fabsf(y+0.0f);
-			if (ha == 0x7f800000) w = a;
-			if (hb == 0x7f800000) w = b;
-			return w;
-		}
-		/* scale a and b by 2**-68 */
-		ha -= 0x22000000; hb -= 0x22000000; k += 68;
-		SET_FLOAT_WORD(a, ha);
-		SET_FLOAT_WORD(b, hb);
-	}
-	if (hb < 0x26800000) {    /* b < 2**-50 */
-		if (hb <= 0x007fffff) {  /* subnormal b or 0 */
-			if (hb == 0)
-				return a;
-			SET_FLOAT_WORD(t1, 0x7e800000);  /* t1 = 2^126 */
-			b *= t1;
-			a *= t1;
-			k -= 126;
-		} else {   /* scale a and b by 2^68 */
-			ha += 0x22000000;  /* a *= 2^68 */
-			hb += 0x22000000;  /* b *= 2^68 */
-			k -= 68;
-			SET_FLOAT_WORD(a, ha);
-			SET_FLOAT_WORD(b, hb);
-		}
+	ux.i &= -1U>>1;
+	uy.i &= -1U>>1;
+	if (ux.i < uy.i) {
+		ut = ux;
+		ux = uy;
+		uy = ut;
 	}
-	/* medium size a and b */
-	w = a - b;
-	if (w > b) {
-		SET_FLOAT_WORD(t1, ha&0xfffff000);
-		t2 = a - t1;
-		w  = sqrtf(t1*t1-(b*(-b)-t2*(a+t1)));
-	} else {
-		a  = a + a;
-		SET_FLOAT_WORD(y1, hb&0xfffff000);
-		y2 = b - y1;
-		SET_FLOAT_WORD(t1,(ha+0x00800000)&0xfffff000);
-		t2 = a - t1;
-		w  = sqrtf(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+
+	x = ux.f;
+	y = uy.f;
+	if (uy.i == 0xff<<23)
+		return y;
+	if (ux.i >= 0xff<<23 || uy.i == 0 || ux.i - uy.i >= 25<<23)
+		return x + y;
+
+	z = 1;
+	if (ux.i >= (0x7f+60)<<23) {
+		z = 0x1p90f;
+		x *= 0x1p-90f;
+		y *= 0x1p-90f;
+	} else if (uy.i < (0x7f-60)<<23) {
+		z = 0x1p-90f;
+		x *= 0x1p90f;
+		y *= 0x1p90f;
 	}
-	if (k)
-		w = scalbnf(w, k);
-	return w;
+	return z*sqrtf((double)x*x + (double)y*y);
 }
-- 
cgit v1.2.1