diff options
author | Szabolcs Nagy <nsz@port70.net> | 2013-08-15 10:56:57 +0000 |
---|---|---|
committer | Szabolcs Nagy <nsz@port70.net> | 2013-08-15 10:56:57 +0000 |
commit | 31c5fb80b9eae86f801be4f46025bc6532a554c5 (patch) | |
tree | c5d0912699727ebd96bb5194334ee4dd33dc9c5b | |
parent | 1b3973fb43fbef80dab1dfc9c788783e78ab5043 (diff) | |
download | musl-31c5fb80b9eae86f801be4f46025bc6532a554c5.tar.gz |
math: fix x86 asin, atan, exp, log1p to raise underflow
underflow is raised by an inexact subnormal float store,
since subnormal operations are slow, check the underflow
flag and skip the store if it's already raised
-rw-r--r-- | src/math/i386/asin.s | 23 | ||||
-rw-r--r-- | src/math/i386/atan.s | 10 | ||||
-rw-r--r-- | src/math/i386/atanf.s | 12 | ||||
-rw-r--r-- | src/math/i386/exp.s | 37 | ||||
-rw-r--r-- | src/math/i386/log1p.s | 9 | ||||
-rw-r--r-- | src/math/i386/log1pf.s | 10 |
6 files changed, 98 insertions, 3 deletions
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s index 932c7542..a9f691bf 100644 --- a/src/math/i386/asin.s +++ b/src/math/i386/asin.s @@ -2,7 +2,18 @@ .type asinf,@function asinf: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret .global asinl .type asinl,@function @@ -14,6 +25,16 @@ asinl: .type asin,@function asin: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret 1: fld %st(0) fld1 fsub %st(0),%st(1) diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s index 7e28b395..d73137b2 100644 --- a/src/math/i386/atan.s +++ b/src/math/i386/atan.s @@ -2,6 +2,16 @@ .type atan,@function atan: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s index 3cd40233..8caddefa 100644 --- a/src/math/i386/atanf.s +++ b/src/math/i386/atanf.s @@ -2,6 +2,18 @@ .type atanf,@function atanf: flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e3b42af5..e5f54588 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -2,7 +2,18 @@ .type expm1f,@function expm1f: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fld %st(0) + fmul %st(1) + fstps 4(%esp) +2: ret .global expm1l .type expm1l,@function @@ -14,10 +25,32 @@ expm1l: .type expm1,@function expm1: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + fsts 4(%esp) +2: ret 1: fldl2e fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf fld1 - fld %st(1) + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) fabs fucom %st(1) fnstsw %ax diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s index 9971e53c..6b6929c7 100644 --- a/src/math/i386/log1p.s +++ b/src/math/i386/log1p.s @@ -7,9 +7,18 @@ log1p: fldl 4(%esp) cmp $0x3fd28f00,%eax ja 1f + cmp $0x00100000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + fsts 4(%esp) + fstp %st(1) +1: ret diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s index 2680a8a6..c0bcd30f 100644 --- a/src/math/i386/log1pf.s +++ b/src/math/i386/log1pf.s @@ -7,9 +7,19 @@ log1pf: flds 4(%esp) cmp $0x3e940000,%eax ja 1f + cmp $0x00800000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + fxch + fmul %st(1) + fstps 4(%esp) +1: ret |