diff options
Diffstat (limited to 'src/math/x86_64')
31 files changed, 153 insertions, 97 deletions
diff --git a/src/math/x86_64/fabs.c b/src/math/x86_64/fabs.c new file mode 100644 index 00000000..16562477 --- /dev/null +++ b/src/math/x86_64/fabs.c @@ -0,0 +1,10 @@ +#include <math.h> + +double fabs(double x) +{ + double t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrlq $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/src/math/x86_64/fabs.s b/src/math/x86_64/fabs.s deleted file mode 100644 index 5715005e..00000000 --- a/src/math/x86_64/fabs.s +++ /dev/null @@ -1,9 +0,0 @@ -.global fabs -.type fabs,@function -fabs: - xor %eax,%eax - dec %rax - shr %rax - movq %rax,%xmm1 - andpd %xmm1,%xmm0 - ret diff --git a/src/math/x86_64/fabsf.c b/src/math/x86_64/fabsf.c new file mode 100644 index 00000000..36ea7481 --- /dev/null +++ b/src/math/x86_64/fabsf.c @@ -0,0 +1,10 @@ +#include <math.h> + +float fabsf(float x) +{ + float t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrld $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/src/math/x86_64/fabsf.s b/src/math/x86_64/fabsf.s deleted file mode 100644 index 501a1f17..00000000 --- a/src/math/x86_64/fabsf.s +++ /dev/null @@ -1,7 +0,0 @@ -.global fabsf -.type fabsf,@function -fabsf: - mov $0x7fffffff,%eax - movq %rax,%xmm1 - andps %xmm1,%xmm0 - ret diff --git a/src/math/x86_64/fabsl.c b/src/math/x86_64/fabsl.c new file mode 100644 index 00000000..cc1c9ed9 --- /dev/null +++ b/src/math/x86_64/fabsl.c @@ -0,0 +1,7 @@ +#include <math.h> + +long double fabsl(long double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/x86_64/fabsl.s b/src/math/x86_64/fabsl.s deleted file mode 100644 index 4e7ab525..00000000 --- a/src/math/x86_64/fabsl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabsl -.type fabsl,@function -fabsl: - fldt 8(%rsp) - fabs - ret diff --git a/src/math/x86_64/fmodl.c b/src/math/x86_64/fmodl.c new file mode 100644 index 00000000..3daeab06 --- /dev/null +++ b/src/math/x86_64/fmodl.c @@ -0,0 +1,9 @@ +#include <math.h> + +long double fmodl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s deleted file mode 100644 index ea07b402..00000000 --- a/src/math/x86_64/fmodl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmodl -.type fmodl,@function -fmodl: - fldt 24(%rsp) - fldt 8(%rsp) -1: fprem - fnstsw %ax - testb $4,%ah - jnz 1b - fstp %st(1) - ret diff --git a/src/math/x86_64/llrint.c b/src/math/x86_64/llrint.c new file mode 100644 index 00000000..dd38a722 --- /dev/null +++ b/src/math/x86_64/llrint.c @@ -0,0 +1,8 @@ +#include <math.h> + +long long llrint(double x) +{ + long long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/llrint.s b/src/math/x86_64/llrint.s deleted file mode 100644 index bf476498..00000000 --- a/src/math/x86_64/llrint.s +++ /dev/null @@ -1,5 +0,0 @@ -.global llrint -.type llrint,@function -llrint: - cvtsd2si %xmm0,%rax - ret diff --git a/src/math/x86_64/llrintf.c b/src/math/x86_64/llrintf.c new file mode 100644 index 00000000..fc8625e8 --- /dev/null +++ b/src/math/x86_64/llrintf.c @@ -0,0 +1,8 @@ +#include <math.h> + +long long llrintf(float x) +{ + long long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/llrintf.s b/src/math/x86_64/llrintf.s deleted file mode 100644 index d7204ac0..00000000 --- a/src/math/x86_64/llrintf.s +++ /dev/null @@ -1,5 +0,0 @@ -.global llrintf -.type llrintf,@function -llrintf: - cvtss2si %xmm0,%rax - ret diff --git a/src/math/x86_64/llrintl.c b/src/math/x86_64/llrintl.c new file mode 100644 index 00000000..c439ef28 --- /dev/null +++ b/src/math/x86_64/llrintl.c @@ -0,0 +1,8 @@ +#include <math.h> + +long long llrintl(long double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86_64/llrintl.s b/src/math/x86_64/llrintl.s deleted file mode 100644 index 1ec0817d..00000000 --- a/src/math/x86_64/llrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global llrintl -.type llrintl,@function -llrintl: - fldt 8(%rsp) - fistpll 8(%rsp) - mov 8(%rsp),%rax - ret diff --git a/src/math/x86_64/lrint.c b/src/math/x86_64/lrint.c new file mode 100644 index 00000000..a742fec6 --- /dev/null +++ b/src/math/x86_64/lrint.c @@ -0,0 +1,8 @@ +#include <math.h> + +long lrint(double x) +{ + long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/lrint.s b/src/math/x86_64/lrint.s deleted file mode 100644 index 15fc2454..00000000 --- a/src/math/x86_64/lrint.s +++ /dev/null @@ -1,5 +0,0 @@ -.global lrint -.type lrint,@function -lrint: - cvtsd2si %xmm0,%rax - ret diff --git a/src/math/x86_64/lrintf.c b/src/math/x86_64/lrintf.c new file mode 100644 index 00000000..2ba5639d --- /dev/null +++ b/src/math/x86_64/lrintf.c @@ -0,0 +1,8 @@ +#include <math.h> + +long lrintf(float x) +{ + long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/lrintf.s b/src/math/x86_64/lrintf.s deleted file mode 100644 index 488423d2..00000000 --- a/src/math/x86_64/lrintf.s +++ /dev/null @@ -1,5 +0,0 @@ -.global lrintf -.type lrintf,@function -lrintf: - cvtss2si %xmm0,%rax - ret diff --git a/src/math/x86_64/lrintl.c b/src/math/x86_64/lrintl.c new file mode 100644 index 00000000..068e2e4d --- /dev/null +++ b/src/math/x86_64/lrintl.c @@ -0,0 +1,8 @@ +#include <math.h> + +long lrintl(long double x) +{ + long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86_64/lrintl.s b/src/math/x86_64/lrintl.s deleted file mode 100644 index d587b12b..00000000 --- a/src/math/x86_64/lrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrintl -.type lrintl,@function -lrintl: - fldt 8(%rsp) - fistpll 8(%rsp) - mov 8(%rsp),%rax - ret diff --git a/src/math/x86_64/remainderl.c b/src/math/x86_64/remainderl.c new file mode 100644 index 00000000..8cf75071 --- /dev/null +++ b/src/math/x86_64/remainderl.c @@ -0,0 +1,9 @@ +#include <math.h> + +long double remainderl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s deleted file mode 100644 index cb3857b4..00000000 --- a/src/math/x86_64/remainderl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global remainderl -.type remainderl,@function -remainderl: - fldt 24(%rsp) - fldt 8(%rsp) -1: fprem1 - fnstsw %ax - testb $4,%ah - jnz 1b - fstp %st(1) - ret diff --git a/src/math/x86_64/remquol.c b/src/math/x86_64/remquol.c new file mode 100644 index 00000000..60eef089 --- /dev/null +++ b/src/math/x86_64/remquol.c @@ -0,0 +1,32 @@ +#include <math.h> + +long double remquol(long double x, long double y, int *quo) +{ + signed char *cx = (void *)&x, *cy = (void *)&y; + /* By ensuring that addresses of x and y cannot be discarded, + * this empty asm guides GCC into representing extraction of + * their sign bits as memory loads rather than making x and y + * not-address-taken internally and using bitfield operations, + * which in the end wouldn't work out, as extraction from FPU + * registers needs to go through memory anyway. This way GCC + * should manage to use incoming stack slots without spills. */ + __asm__ ("" :: "X"(cx), "X"(cy)); + + long double t = x; + unsigned fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + /* C0, C1, C3 flags in x87 status word carry low bits of quotient: + * 15 14 13 12 11 10 9 8 + * . C3 . . . C2 C1 C0 + * . b1 . . . 0 b0 b2 */ + unsigned char i = fpsr >> 8; + i = i>>4 | i<<4; + /* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via + * in-register table lookup. */ + unsigned qbits = 0x7575313164642020 >> (i & 60); + qbits &= 7; + + *quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits; + return t; +} diff --git a/src/math/x86_64/rintl.c b/src/math/x86_64/rintl.c new file mode 100644 index 00000000..e1a92077 --- /dev/null +++ b/src/math/x86_64/rintl.c @@ -0,0 +1,7 @@ +#include <math.h> + +long double rintl(long double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/x86_64/rintl.s b/src/math/x86_64/rintl.s deleted file mode 100644 index 64e663cd..00000000 --- a/src/math/x86_64/rintl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rintl -.type rintl,@function -rintl: - fldt 8(%rsp) - frndint - ret diff --git a/src/math/x86_64/sqrt.c b/src/math/x86_64/sqrt.c new file mode 100644 index 00000000..657e09e3 --- /dev/null +++ b/src/math/x86_64/sqrt.c @@ -0,0 +1,7 @@ +#include <math.h> + +double sqrt(double x) +{ + __asm__ ("sqrtsd %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/src/math/x86_64/sqrt.s b/src/math/x86_64/sqrt.s deleted file mode 100644 index d3c609f9..00000000 --- a/src/math/x86_64/sqrt.s +++ /dev/null @@ -1,4 +0,0 @@ -.global sqrt -.type sqrt,@function -sqrt: sqrtsd %xmm0, %xmm0 - ret diff --git a/src/math/x86_64/sqrtf.c b/src/math/x86_64/sqrtf.c new file mode 100644 index 00000000..720baec6 --- /dev/null +++ b/src/math/x86_64/sqrtf.c @@ -0,0 +1,7 @@ +#include <math.h> + +float sqrtf(float x) +{ + __asm__ ("sqrtss %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/src/math/x86_64/sqrtf.s b/src/math/x86_64/sqrtf.s deleted file mode 100644 index eec48c60..00000000 --- a/src/math/x86_64/sqrtf.s +++ /dev/null @@ -1,4 +0,0 @@ -.global sqrtf -.type sqrtf,@function -sqrtf: sqrtss %xmm0, %xmm0 - ret diff --git a/src/math/x86_64/sqrtl.c b/src/math/x86_64/sqrtl.c new file mode 100644 index 00000000..864cfcc4 --- /dev/null +++ b/src/math/x86_64/sqrtl.c @@ -0,0 +1,7 @@ +#include <math.h> + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt" : "+t"(x)); + return x; +} diff --git a/src/math/x86_64/sqrtl.s b/src/math/x86_64/sqrtl.s deleted file mode 100644 index 23cd687d..00000000 --- a/src/math/x86_64/sqrtl.s +++ /dev/null @@ -1,5 +0,0 @@ -.global sqrtl -.type sqrtl,@function -sqrtl: fldt 8(%rsp) - fsqrt - ret |