diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-08-15 10:56:57 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-08-15 10:56:57 +0000 | 
| commit | 31c5fb80b9eae86f801be4f46025bc6532a554c5 (patch) | |
| tree | c5d0912699727ebd96bb5194334ee4dd33dc9c5b /src | |
| parent | 1b3973fb43fbef80dab1dfc9c788783e78ab5043 (diff) | |
| download | musl-31c5fb80b9eae86f801be4f46025bc6532a554c5.tar.gz | |
math: fix x86 asin, atan, exp, log1p to raise underflow
underflow is raised by an inexact subnormal float store,
since subnormal operations are slow, check the underflow
flag and skip the store if it's already raised
Diffstat (limited to 'src')
| -rw-r--r-- | src/math/i386/asin.s | 23 | ||||
| -rw-r--r-- | src/math/i386/atan.s | 10 | ||||
| -rw-r--r-- | src/math/i386/atanf.s | 12 | ||||
| -rw-r--r-- | src/math/i386/exp.s | 37 | ||||
| -rw-r--r-- | src/math/i386/log1p.s | 9 | ||||
| -rw-r--r-- | src/math/i386/log1pf.s | 10 | 
6 files changed, 98 insertions, 3 deletions
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s index 932c7542..a9f691bf 100644 --- a/src/math/i386/asin.s +++ b/src/math/i386/asin.s @@ -2,7 +2,18 @@  .type asinf,@function  asinf:  	flds 4(%esp) -	jmp 1f +	mov 4(%esp),%eax +	add %eax,%eax +	cmp $0x01000000,%eax +	jae 1f +		# subnormal x, return x with underflow +	fnstsw %ax +	and $16,%ax +	jnz 2f +	fld %st(0) +	fmul %st(1) +	fstps 4(%esp) +2:	ret  .global asinl  .type asinl,@function @@ -14,6 +25,16 @@ asinl:  .type asin,@function  asin:  	fldl 4(%esp) +	mov 8(%esp),%eax +	add %eax,%eax +	cmp $0x00200000,%eax +	jae 1f +		# subnormal x, return x with underflow +	fnstsw %ax +	and $16,%ax +	jnz 2f +	fsts 4(%esp) +2:	ret  1:	fld %st(0)  	fld1  	fsub %st(0),%st(1) diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s index 7e28b395..d73137b2 100644 --- a/src/math/i386/atan.s +++ b/src/math/i386/atan.s @@ -2,6 +2,16 @@  .type atan,@function  atan:  	fldl 4(%esp) +	mov 8(%esp),%eax +	add %eax,%eax +	cmp $0x00200000,%eax +	jb 1f  	fld1  	fpatan  	ret +		# subnormal x, return x with underflow +1:	fnstsw %ax +	and $16,%ax +	jnz 2f +	fsts 4(%esp) +2:	ret diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s index 3cd40233..8caddefa 100644 --- a/src/math/i386/atanf.s +++ b/src/math/i386/atanf.s @@ -2,6 +2,18 @@  .type atanf,@function  atanf:  	flds 4(%esp) +	mov 4(%esp),%eax +	add %eax,%eax +	cmp $0x01000000,%eax +	jb 1f  	fld1  	fpatan  	ret +		# subnormal x, return x with underflow +1:	fnstsw %ax +	and $16,%ax +	jnz 2f +	fld %st(0) +	fmul %st(1) +	fstps 4(%esp) +2:	ret diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e3b42af5..e5f54588 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -2,7 +2,18 @@  .type expm1f,@function  expm1f:  	flds 4(%esp) -	jmp 1f +	mov 4(%esp),%eax +	add %eax,%eax +	cmp $0x01000000,%eax +	jae 1f +		# subnormal x, return x with underflow +	fnstsw %ax +	and $16,%ax +	jnz 2f +	fld %st(0) +	fmul %st(1) +	fstps 4(%esp) +2:	ret  .global expm1l  .type expm1l,@function @@ -14,10 +25,32 @@ expm1l:  .type expm1,@function  expm1:  	fldl 4(%esp) +	mov 8(%esp),%eax +	add %eax,%eax +	cmp $0x00200000,%eax +	jae 1f +		# subnormal x, return x with underflow +	fnstsw %ax +	and $16,%ax +	jnz 2f +	fsts 4(%esp) +2:	ret  1:	fldl2e  	fmulp +	mov $0xc2820000,%eax +	push %eax +	flds (%esp) +	pop %eax +	fucomp %st(1) +	fnstsw %ax +	sahf  	fld1 -	fld %st(1) +	jb 1f +		# x*log2e < -65, return -1 without underflow +	fstp %st(1) +	fchs +	ret +1:	fld %st(1)  	fabs  	fucom %st(1)  	fnstsw %ax diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s index 9971e53c..6b6929c7 100644 --- a/src/math/i386/log1p.s +++ b/src/math/i386/log1p.s @@ -7,9 +7,18 @@ log1p:  	fldl 4(%esp)  	cmp $0x3fd28f00,%eax  	ja 1f +	cmp $0x00100000,%eax +	jb 2f  	fyl2xp1  	ret  1:	fld1  	faddp  	fyl2x  	ret +		# subnormal x, return x with underflow +2:	fnstsw %ax +	and $16,%ax +	jnz 1f +	fsts 4(%esp) +	fstp %st(1) +1:	ret diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s index 2680a8a6..c0bcd30f 100644 --- a/src/math/i386/log1pf.s +++ b/src/math/i386/log1pf.s @@ -7,9 +7,19 @@ log1pf:  	flds 4(%esp)  	cmp $0x3e940000,%eax  	ja 1f +	cmp $0x00800000,%eax +	jb 2f  	fyl2xp1  	ret  1:	fld1  	faddp  	fyl2x  	ret +		# subnormal x, return x with underflow +2:	fnstsw %ax +	and $16,%ax +	jnz 1f +	fxch +	fmul %st(1) +	fstps 4(%esp) +1:	ret  | 
