diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2014-11-05 21:40:29 +0100 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2014-11-05 21:40:29 +0100 | 
| commit | a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e (patch) | |
| tree | 66494d38f5374808c3816c83395d5f5850abb607 | |
| parent | de2b9c21d94e0b76b629fec0060d043f535eef01 (diff) | |
| download | musl-a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e.tar.gz | |
math: fix x86_64 and x32 asm not to use sahf instruction
Some early x86_64 cpus (released before 2006) did not support sahf/lahf
instructions so they should be avoided (intel manual says they are only
supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1).
The workaround simplifies exp2l and expm1l because fucomip can be
used instead of the fucomp;fnstsw;sahf sequence copied from i386.
In fmodl and remainderl sahf is replaced by a simple bit test.
| -rw-r--r-- | src/math/x32/exp2l.s | 13 | ||||
| -rw-r--r-- | src/math/x32/fmodl.s | 4 | ||||
| -rw-r--r-- | src/math/x32/remainderl.s | 4 | ||||
| -rw-r--r-- | src/math/x86_64/exp2l.s | 13 | ||||
| -rw-r--r-- | src/math/x86_64/fmodl.s | 4 | ||||
| -rw-r--r-- | src/math/x86_64/remainderl.s | 4 | 
6 files changed, 14 insertions, 28 deletions
| diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s index d9f4d6ed..dfb2bc7c 100644 --- a/src/math/x32/exp2l.s +++ b/src/math/x32/exp2l.s @@ -6,9 +6,7 @@ expm1l:  	fmulp  	movl $0xc2820000,-4(%esp)  	flds -4(%esp) -	fucomp %st(1) -	fnstsw %ax -	sahf +	fucomip %st(1)  	fld1  	jb 1f  		# x*log2e <= -65, return -1 without underflow @@ -17,11 +15,8 @@ expm1l:  	ret  1:	fld %st(1)  	fabs -	fucom %st(1) -	fnstsw %ax +	fucomip %st(1)  	fstp %st(0) -	fstp %st(0) -	sahf  	ja 1f  	f2xm1  	ret @@ -53,9 +48,7 @@ exp2l:  	fld %st(1)  	fsub %st(1)  	faddp -	fucomp %st(1) -	fnstsw -	sahf +	fucomip %st(1)  	je 2f             # x - 0x1p63 + 0x1p63 == x  	movl $1,(%esp)  	flds (%esp)       # 0x1p-149 diff --git a/src/math/x32/fmodl.s b/src/math/x32/fmodl.s index 9e4378ab..b9513204 100644 --- a/src/math/x32/fmodl.s +++ b/src/math/x32/fmodl.s @@ -5,7 +5,7 @@ fmodl:  	fldt 8(%esp)  1:	fprem  	fstsw %ax -	sahf -	jp 1b +	testb $4,%ah +	jnz 1b  	fstp %st(1)  	ret diff --git a/src/math/x32/remainderl.s b/src/math/x32/remainderl.s index c97f68ad..79bf4feb 100644 --- a/src/math/x32/remainderl.s +++ b/src/math/x32/remainderl.s @@ -5,7 +5,7 @@ remainderl:  	fldt 8(%esp)  1:	fprem1  	fstsw %ax -	sahf -	jp 1b +	testb $4,%ah +	jnz 1b  	fstp %st(1)  	ret diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s index 0d6cd563..0e9bdf9f 100644 --- a/src/math/x86_64/exp2l.s +++ b/src/math/x86_64/exp2l.s @@ -6,9 +6,7 @@ expm1l:  	fmulp  	movl $0xc2820000,-4(%rsp)  	flds -4(%rsp) -	fucomp %st(1) -	fnstsw %ax -	sahf +	fucomip %st(1)  	fld1  	jb 1f  		# x*log2e <= -65, return -1 without underflow @@ -17,11 +15,8 @@ expm1l:  	ret  1:	fld %st(1)  	fabs -	fucom %st(1) -	fnstsw %ax +	fucomip %st(1)  	fstp %st(0) -	fstp %st(0) -	sahf  	ja 1f  	f2xm1  	ret @@ -53,9 +48,7 @@ exp2l:  	fld %st(1)  	fsub %st(1)  	faddp -	fucomp %st(1) -	fnstsw -	sahf +	fucomip %st(1)  	je 2f             # x - 0x1p63 + 0x1p63 == x  	movl $1,(%rsp)  	flds (%rsp)       # 0x1p-149 diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s index ca81e60c..cd8d2b7c 100644 --- a/src/math/x86_64/fmodl.s +++ b/src/math/x86_64/fmodl.s @@ -5,7 +5,7 @@ fmodl:  	fldt 8(%rsp)  1:	fprem  	fstsw %ax -	sahf -	jp 1b +	testb $4,%ah +	jnz 1b  	fstp %st(1)  	ret diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s index 75c12374..2c337cf5 100644 --- a/src/math/x86_64/remainderl.s +++ b/src/math/x86_64/remainderl.s @@ -5,7 +5,7 @@ remainderl:  	fldt 8(%rsp)  1:	fprem1  	fstsw %ax -	sahf -	jp 1b +	testb $4,%ah +	jnz 1b  	fstp %st(1)  	ret | 
