math: fix x86 asin, atan, exp, log1p to raise underflow

underflow is raised by an inexact subnormal float store, since subnormal operations are slow, check the underflow flag and skip the store if it's already raised
author: Szabolcs Nagy <nsz@port70.net> 2013-08-15 10:56:57 +0000
committer: Szabolcs Nagy <nsz@port70.net> 2013-08-15 10:56:57 +0000
commit: 31c5fb80b9eae86f801be4f46025bc6532a554c5 (patch)
tree: c5d0912699727ebd96bb5194334ee4dd33dc9c5b /src
parent: 1b3973fb43fbef80dab1dfc9c788783e78ab5043 (diff)
download: musl-31c5fb80b9eae86f801be4f46025bc6532a554c5.tar.gz
6 files changed, 98 insertions, 3 deletions
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
index 932c7542..a9f691bf 100644
--- a/src/math/i386/asin.s
+++ b/src/math/i386/asin.s
@@ -2,7 +2,18 @@
 .type asinf,@function
 asinf:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret
 
 .global asinl
 .type asinl,@function
@@ -14,6 +25,16 @@ asinl:
 .type asin,@function
 asin:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
 1:	fld %st(0)
 	fld1
 	fsub %st(0),%st(1)
diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
index 7e28b395..d73137b2 100644
--- a/src/math/i386/atan.s
+++ b/src/math/i386/atan.s
@@ -2,6 +2,16 @@
 .type atan,@function
 atan:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
index 3cd40233..8caddefa 100644
--- a/src/math/i386/atanf.s
+++ b/src/math/i386/atanf.s
@@ -2,6 +2,18 @@
 .type atanf,@function
 atanf:
 	flds 4(%esp)
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index e3b42af5..e5f54588 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -2,7 +2,18 @@
 .type expm1f,@function
 expm1f:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fld %st(0)
+	fmul %st(1)
+	fstps 4(%esp)
+2:	ret
 
 .global expm1l
 .type expm1l,@function
@@ -14,10 +25,32 @@ expm1l:
 .type expm1,@function
 expm1:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	fsts 4(%esp)
+2:	ret
 1:	fldl2e
 	fmulp
+	mov $0xc2820000,%eax
+	push %eax
+	flds (%esp)
+	pop %eax
+	fucomp %st(1)
+	fnstsw %ax
+	sahf
 	fld1
-	fld %st(1)
+	jb 1f
+		# x*log2e < -65, return -1 without underflow
+	fstp %st(1)
+	fchs
+	ret
+1:	fld %st(1)
 	fabs
 	fucom %st(1)
 	fnstsw %ax
diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
index 9971e53c..6b6929c7 100644
--- a/src/math/i386/log1p.s
+++ b/src/math/i386/log1p.s
@@ -7,9 +7,18 @@ log1p:
 	fldl 4(%esp)
 	cmp $0x3fd28f00,%eax
 	ja 1f
+	cmp $0x00100000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	fsts 4(%esp)
+	fstp %st(1)
+1:	ret
diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
index 2680a8a6..c0bcd30f 100644
--- a/src/math/i386/log1pf.s
+++ b/src/math/i386/log1pf.s
@@ -7,9 +7,19 @@ log1pf:
 	flds 4(%esp)
 	cmp $0x3e940000,%eax
 	ja 1f
+	cmp $0x00800000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	fxch
+	fmul %st(1)
+	fstps 4(%esp)
+1:	ret
author	Szabolcs Nagy <nsz@port70.net>	2013-08-15 10:56:57 +0000
committer	Szabolcs Nagy <nsz@port70.net>	2013-08-15 10:56:57 +0000
commit	31c5fb80b9eae86f801be4f46025bc6532a554c5 (patch)
tree	c5d0912699727ebd96bb5194334ee4dd33dc9c5b /src
parent	1b3973fb43fbef80dab1dfc9c788783e78ab5043 (diff)
download	musl-31c5fb80b9eae86f801be4f46025bc6532a554c5.tar.gz