summaryrefslogtreecommitdiff
path: root/src/math/i386
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-03-19 09:00:30 -0400
committerRich Felker <dalias@aerifal.cx>2012-03-19 09:00:30 -0400
commit02db27d9deaee71b244c91e720ec819c74dab150 (patch)
treeef2543fd54a6fdbca8839cb14c71bb10ffdaa8f1 /src/math/i386
parentda7458a602a6f0bdea25d6b9b613372048a974e6 (diff)
downloadmusl-02db27d9deaee71b244c91e720ec819c74dab150.tar.gz
optimize exponential asm for i386
up to 30% faster exp2 by avoiding slow frndint and fscale functions. expm1 also takes a much more direct path for small arguments (the expected usage case).
Diffstat (limited to 'src/math/i386')
-rw-r--r--src/math/i386/exp.s87
-rw-r--r--src/math/i386/expm1.s48
2 files changed, 77 insertions, 58 deletions
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index f4769d59..76ab4d64 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -1,3 +1,37 @@
+.global expm1f
+.type expm1f,@function
+expm1f:
+ flds 4(%esp)
+ jmp 1f
+
+.global expm1l
+.type expm1l,@function
+expm1l:
+ fldt 4(%esp)
+ jmp 1f
+
+.global expm1
+.type expm1,@function
+expm1:
+ fldl 4(%esp)
+1: fldl2e
+ fmulp
+ fld1
+ fld %st(1)
+ fabs
+ fucom %st(1)
+ fnstsw %ax
+ fstp %st(0)
+ fstp %st(0)
+ sahf
+ ja 1f
+ f2xm1
+ ret
+1: call 1f
+ fld1
+ fsubrp
+ ret
+
.global exp2f
.type exp2f,@function
exp2f:
@@ -34,22 +68,53 @@ exp:
.type exp2,@function
exp2:
fldl 4(%esp)
-1: fxam
- fnstsw %ax
+1: mov $0x47000000,%eax
+ push %eax
+ flds (%esp)
+ shl $7,%eax
+ push %eax
+ add %eax,%eax
+ push %eax
+ fld %st(1)
+ fabs
+ fucom %st(1)
+ fnstsw
sahf
- jnp 1f
- jnc 1f
- fstps 4(%esp)
- mov $0xfe,%al
- and %al,7(%esp)
- flds 4(%esp)
-1: fld %st(0)
- frndint
+ ja 2f
+ fstp %st(0)
+ fstp %st(0)
+ fld %st(0)
+ fistpl 8(%esp)
+ fildl 8(%esp)
fxch %st(1)
fsub %st(1)
+ mov $0x3fff,%eax
+ add %eax,8(%esp)
f2xm1
fld1
faddp
- fscale
+ fldt (%esp)
+ fmulp
fstp %st(1)
+ add $12,%esp
+ ret
+
+2: fstp %st(0)
+ fstp %st(0)
+ fsts 8(%esp)
+ mov 8(%esp),%eax
+ lea (%eax,%eax),%ecx
+ cmp $0xff000000,%ecx
+ ja 2f
+ fstp %st(0)
+ xor %ecx,%ecx
+ inc %ecx
+ add %eax,%eax
+ jc 1f
+ mov $0x7ffe,%ecx
+1: mov %ecx,8(%esp)
+ fldt (%esp)
+ fld %st(0)
+ fmulp
+2: add $12,%esp
ret
diff --git a/src/math/i386/expm1.s b/src/math/i386/expm1.s
index bbb5d12e..f335a3e5 100644
--- a/src/math/i386/expm1.s
+++ b/src/math/i386/expm1.s
@@ -1,47 +1 @@
-.global expm1f
-.type expm1f,@function
-expm1f:
- flds 4(%esp)
- jmp 1f
-
-.global expm1l
-.type expm1l,@function
-expm1l:
- fldt 4(%esp)
- jmp 1f
-
-.global expm1
-.type expm1,@function
-expm1:
- fldl 4(%esp)
-1: fxam
- fnstsw %ax
- sahf
- jnp 1f
- jnc 1f
- fstps 4(%esp)
- mov $0xfe,%al
- and %al,7(%esp)
- flds 4(%esp)
-1: fldl2e
- fmulp
- fld %st(0)
- frndint
- fldz
- fcomp
- fnstsw %ax
- sahf
- jnz 1f
- fstp %st(0)
- f2xm1
- ret
-1: fxch %st(1)
- fsub %st(1)
- f2xm1
- fld1
- faddp
- fscale
- fld1
- fsubrp
- fstp %st(1)
- ret
+# see exp.s