summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2014-11-05 21:40:29 +0100
committerSzabolcs Nagy <nsz@port70.net>2014-11-05 21:40:29 +0100
commita732e80d33b4fd6f510f7cec4f5573ef5d89bc4e (patch)
tree66494d38f5374808c3816c83395d5f5850abb607
parentde2b9c21d94e0b76b629fec0060d043f535eef01 (diff)
downloadmusl-a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e.tar.gz
math: fix x86_64 and x32 asm not to use sahf instruction
Some early x86_64 cpus (released before 2006) did not support sahf/lahf instructions so they should be avoided (intel manual says they are only supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1). The workaround simplifies exp2l and expm1l because fucomip can be used instead of the fucomp;fnstsw;sahf sequence copied from i386. In fmodl and remainderl sahf is replaced by a simple bit test.
-rw-r--r--src/math/x32/exp2l.s13
-rw-r--r--src/math/x32/fmodl.s4
-rw-r--r--src/math/x32/remainderl.s4
-rw-r--r--src/math/x86_64/exp2l.s13
-rw-r--r--src/math/x86_64/fmodl.s4
-rw-r--r--src/math/x86_64/remainderl.s4
6 files changed, 14 insertions, 28 deletions
diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s
index d9f4d6ed..dfb2bc7c 100644
--- a/src/math/x32/exp2l.s
+++ b/src/math/x32/exp2l.s
@@ -6,9 +6,7 @@ expm1l:
fmulp
movl $0xc2820000,-4(%esp)
flds -4(%esp)
- fucomp %st(1)
- fnstsw %ax
- sahf
+ fucomip %st(1)
fld1
jb 1f
# x*log2e <= -65, return -1 without underflow
@@ -17,11 +15,8 @@ expm1l:
ret
1: fld %st(1)
fabs
- fucom %st(1)
- fnstsw %ax
+ fucomip %st(1)
fstp %st(0)
- fstp %st(0)
- sahf
ja 1f
f2xm1
ret
@@ -53,9 +48,7 @@ exp2l:
fld %st(1)
fsub %st(1)
faddp
- fucomp %st(1)
- fnstsw
- sahf
+ fucomip %st(1)
je 2f # x - 0x1p63 + 0x1p63 == x
movl $1,(%esp)
flds (%esp) # 0x1p-149
diff --git a/src/math/x32/fmodl.s b/src/math/x32/fmodl.s
index 9e4378ab..b9513204 100644
--- a/src/math/x32/fmodl.s
+++ b/src/math/x32/fmodl.s
@@ -5,7 +5,7 @@ fmodl:
fldt 8(%esp)
1: fprem
fstsw %ax
- sahf
- jp 1b
+ testb $4,%ah
+ jnz 1b
fstp %st(1)
ret
diff --git a/src/math/x32/remainderl.s b/src/math/x32/remainderl.s
index c97f68ad..79bf4feb 100644
--- a/src/math/x32/remainderl.s
+++ b/src/math/x32/remainderl.s
@@ -5,7 +5,7 @@ remainderl:
fldt 8(%esp)
1: fprem1
fstsw %ax
- sahf
- jp 1b
+ testb $4,%ah
+ jnz 1b
fstp %st(1)
ret
diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s
index 0d6cd563..0e9bdf9f 100644
--- a/src/math/x86_64/exp2l.s
+++ b/src/math/x86_64/exp2l.s
@@ -6,9 +6,7 @@ expm1l:
fmulp
movl $0xc2820000,-4(%rsp)
flds -4(%rsp)
- fucomp %st(1)
- fnstsw %ax
- sahf
+ fucomip %st(1)
fld1
jb 1f
# x*log2e <= -65, return -1 without underflow
@@ -17,11 +15,8 @@ expm1l:
ret
1: fld %st(1)
fabs
- fucom %st(1)
- fnstsw %ax
+ fucomip %st(1)
fstp %st(0)
- fstp %st(0)
- sahf
ja 1f
f2xm1
ret
@@ -53,9 +48,7 @@ exp2l:
fld %st(1)
fsub %st(1)
faddp
- fucomp %st(1)
- fnstsw
- sahf
+ fucomip %st(1)
je 2f # x - 0x1p63 + 0x1p63 == x
movl $1,(%rsp)
flds (%rsp) # 0x1p-149
diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s
index ca81e60c..cd8d2b7c 100644
--- a/src/math/x86_64/fmodl.s
+++ b/src/math/x86_64/fmodl.s
@@ -5,7 +5,7 @@ fmodl:
fldt 8(%rsp)
1: fprem
fstsw %ax
- sahf
- jp 1b
+ testb $4,%ah
+ jnz 1b
fstp %st(1)
ret
diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s
index 75c12374..2c337cf5 100644
--- a/src/math/x86_64/remainderl.s
+++ b/src/math/x86_64/remainderl.s
@@ -5,7 +5,7 @@ remainderl:
fldt 8(%rsp)
1: fprem1
fstsw %ax
- sahf
- jp 1b
+ testb $4,%ah
+ jnz 1b
fstp %st(1)
ret