From a732e80d33b4fd6f510f7cec4f5573ef5d89bc4e Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Wed, 5 Nov 2014 21:40:29 +0100
Subject: math: fix x86_64 and x32 asm not to use sahf instruction

Some early x86_64 cpus (released before 2006) did not support sahf/lahf
instructions so they should be avoided (intel manual says they are only
supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1).

The workaround simplifies exp2l and expm1l because fucomip can be
used instead of the fucomp;fnstsw;sahf sequence copied from i386.

In fmodl and remainderl sahf is replaced by a simple bit test.
---
 src/math/x32/exp2l.s         | 13 +++----------
 src/math/x32/fmodl.s         |  4 ++--
 src/math/x32/remainderl.s    |  4 ++--
 src/math/x86_64/exp2l.s      | 13 +++----------
 src/math/x86_64/fmodl.s      |  4 ++--
 src/math/x86_64/remainderl.s |  4 ++--
 6 files changed, 14 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/math/x32/exp2l.s b/src/math/x32/exp2l.s
index d9f4d6ed..dfb2bc7c 100644
--- a/src/math/x32/exp2l.s
+++ b/src/math/x32/exp2l.s
@@ -6,9 +6,7 @@ expm1l:
 	fmulp
 	movl $0xc2820000,-4(%esp)
 	flds -4(%esp)
-	fucomp %st(1)
-	fnstsw %ax
-	sahf
+	fucomip %st(1)
 	fld1
 	jb 1f
 		# x*log2e <= -65, return -1 without underflow
@@ -17,11 +15,8 @@ expm1l:
 	ret
 1:	fld %st(1)
 	fabs
-	fucom %st(1)
-	fnstsw %ax
+	fucomip %st(1)
 	fstp %st(0)
-	fstp %st(0)
-	sahf
 	ja 1f
 	f2xm1
 	ret
@@ -53,9 +48,7 @@ exp2l:
 	fld %st(1)
 	fsub %st(1)
 	faddp
-	fucomp %st(1)
-	fnstsw
-	sahf
+	fucomip %st(1)
 	je 2f             # x - 0x1p63 + 0x1p63 == x
 	movl $1,(%esp)
 	flds (%esp)       # 0x1p-149
diff --git a/src/math/x32/fmodl.s b/src/math/x32/fmodl.s
index 9e4378ab..b9513204 100644
--- a/src/math/x32/fmodl.s
+++ b/src/math/x32/fmodl.s
@@ -5,7 +5,7 @@ fmodl:
 	fldt 8(%esp)
 1:	fprem
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
diff --git a/src/math/x32/remainderl.s b/src/math/x32/remainderl.s
index c97f68ad..79bf4feb 100644
--- a/src/math/x32/remainderl.s
+++ b/src/math/x32/remainderl.s
@@ -5,7 +5,7 @@ remainderl:
 	fldt 8(%esp)
 1:	fprem1
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s
index 0d6cd563..0e9bdf9f 100644
--- a/src/math/x86_64/exp2l.s
+++ b/src/math/x86_64/exp2l.s
@@ -6,9 +6,7 @@ expm1l:
 	fmulp
 	movl $0xc2820000,-4(%rsp)
 	flds -4(%rsp)
-	fucomp %st(1)
-	fnstsw %ax
-	sahf
+	fucomip %st(1)
 	fld1
 	jb 1f
 		# x*log2e <= -65, return -1 without underflow
@@ -17,11 +15,8 @@ expm1l:
 	ret
 1:	fld %st(1)
 	fabs
-	fucom %st(1)
-	fnstsw %ax
+	fucomip %st(1)
 	fstp %st(0)
-	fstp %st(0)
-	sahf
 	ja 1f
 	f2xm1
 	ret
@@ -53,9 +48,7 @@ exp2l:
 	fld %st(1)
 	fsub %st(1)
 	faddp
-	fucomp %st(1)
-	fnstsw
-	sahf
+	fucomip %st(1)
 	je 2f             # x - 0x1p63 + 0x1p63 == x
 	movl $1,(%rsp)
 	flds (%rsp)       # 0x1p-149
diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s
index ca81e60c..cd8d2b7c 100644
--- a/src/math/x86_64/fmodl.s
+++ b/src/math/x86_64/fmodl.s
@@ -5,7 +5,7 @@ fmodl:
 	fldt 8(%rsp)
 1:	fprem
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s
index 75c12374..2c337cf5 100644
--- a/src/math/x86_64/remainderl.s
+++ b/src/math/x86_64/remainderl.s
@@ -5,7 +5,7 @@ remainderl:
 	fldt 8(%rsp)
 1:	fprem1
 	fstsw %ax
-	sahf
-	jp 1b
+	testb $4,%ah
+	jnz 1b
 	fstp %st(1)
 	ret
-- 
cgit v1.2.1