From 31c5fb80b9eae86f801be4f46025bc6532a554c5 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Thu, 15 Aug 2013 10:56:57 +0000 Subject: math: fix x86 asin, atan, exp, log1p to raise underflow underflow is raised by an inexact subnormal float store, since subnormal operations are slow, check the underflow flag and skip the store if it's already raised --- src/math/i386/exp.s | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'src/math/i386/exp.s') diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e3b42af5..e5f54588 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -2,7 +2,18 @@ .type expm1f,@function expm1f: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret .global expm1l .type expm1l,@function @@ -14,10 +25,32 @@ expm1l: .type expm1,@function expm1: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret 1: fldl2e fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf fld1 - fld %st(1) + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) fabs fucom %st(1) fnstsw %ax -- cgit v1.2.1