From b173e4262f65800ea488b4494125284779a61547 Mon Sep 17 00:00:00 2001
From: Alexander Monakov <amonakov@ispras.ru>
Date: Tue, 14 Jan 2020 23:36:44 +0300
Subject: math: move x87-family remainder functions to C with inline asm

---
 src/math/i386/remainder.c    | 12 ++++++++++++
 src/math/i386/remainder.s    | 14 --------------
 src/math/i386/remainderf.c   | 12 ++++++++++++
 src/math/i386/remainderf.s   | 14 --------------
 src/math/i386/remainderl.c   |  9 +++++++++
 src/math/i386/remainderl.s   | 11 -----------
 src/math/x86_64/remainderl.c |  9 +++++++++
 src/math/x86_64/remainderl.s | 11 -----------
 8 files changed, 42 insertions(+), 50 deletions(-)
 create mode 100644 src/math/i386/remainder.c
 delete mode 100644 src/math/i386/remainder.s
 create mode 100644 src/math/i386/remainderf.c
 delete mode 100644 src/math/i386/remainderf.s
 create mode 100644 src/math/i386/remainderl.c
 delete mode 100644 src/math/i386/remainderl.s
 create mode 100644 src/math/x86_64/remainderl.c
 delete mode 100644 src/math/x86_64/remainderl.s

diff --git a/src/math/i386/remainder.c b/src/math/i386/remainder.c
new file mode 100644
index 00000000..c083df90
--- /dev/null
+++ b/src/math/i386/remainder.c
@@ -0,0 +1,12 @@
+#include <math.h>
+
+double remainder(double x, double y)
+{
+	unsigned short fpsr;
+	// fprem1 does not introduce excess precision into x
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
+
+weak_alias(remainder, drem);
diff --git a/src/math/i386/remainder.s b/src/math/i386/remainder.s
deleted file mode 100644
index ab1da95d..00000000
--- a/src/math/i386/remainder.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global remainder
-.type remainder,@function
-remainder:
-.weak drem
-.type drem,@function
-drem:
-	fldl 12(%esp)
-	fldl 4(%esp)
-1:	fprem1
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/remainderf.c b/src/math/i386/remainderf.c
new file mode 100644
index 00000000..280207d2
--- /dev/null
+++ b/src/math/i386/remainderf.c
@@ -0,0 +1,12 @@
+#include <math.h>
+
+float remainderf(float x, float y)
+{
+	unsigned short fpsr;
+	// fprem1 does not introduce excess precision into x
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
+
+weak_alias(remainderf, dremf);
diff --git a/src/math/i386/remainderf.s b/src/math/i386/remainderf.s
deleted file mode 100644
index 6a7378a3..00000000
--- a/src/math/i386/remainderf.s
+++ /dev/null
@@ -1,14 +0,0 @@
-.global remainderf
-.type remainderf,@function
-remainderf:
-.weak dremf
-.type dremf,@function
-dremf:
-	flds 8(%esp)
-	flds 4(%esp)
-1:	fprem1
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/i386/remainderl.c b/src/math/i386/remainderl.c
new file mode 100644
index 00000000..8cf75071
--- /dev/null
+++ b/src/math/i386/remainderl.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+long double remainderl(long double x, long double y)
+{
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/i386/remainderl.s b/src/math/i386/remainderl.s
deleted file mode 100644
index b41518ed..00000000
--- a/src/math/i386/remainderl.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global remainderl
-.type remainderl,@function
-remainderl:
-	fldt 16(%esp)
-	fldt 4(%esp)
-1:	fprem1
-	fnstsw %ax
-	sahf
-	jp 1b
-	fstp %st(1)
-	ret
diff --git a/src/math/x86_64/remainderl.c b/src/math/x86_64/remainderl.c
new file mode 100644
index 00000000..8cf75071
--- /dev/null
+++ b/src/math/x86_64/remainderl.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+long double remainderl(long double x, long double y)
+{
+	unsigned short fpsr;
+	do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
+	while (fpsr & 0x400);
+	return x;
+}
diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s
deleted file mode 100644
index cb3857b4..00000000
--- a/src/math/x86_64/remainderl.s
+++ /dev/null
@@ -1,11 +0,0 @@
-.global remainderl
-.type remainderl,@function
-remainderl:
-	fldt 24(%rsp)
-	fldt 8(%rsp)
-1:	fprem1
-	fnstsw %ax
-	testb $4,%ah
-	jnz 1b
-	fstp %st(1)
-	ret
-- 
cgit v1.2.1