summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/fenv/i386/fenv.s67
-rw-r--r--src/fenv/x86_64/fenv.s3
2 files changed, 61 insertions, 9 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s
index 471d2af8..9bba40a5 100644
--- a/src/fenv/i386/fenv.s
+++ b/src/fenv/i386/fenv.s
@@ -1,14 +1,26 @@
+.hidden __hwcap
+
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
mov 4(%esp),%ecx
not %ecx
- test $0x3f,%ecx
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ stmxcsr 4(%esp)
+ and %ecx,4(%esp)
+ ldmxcsr 4(%esp)
+1: test $0x3f,%ecx
jnz 2f
1: fnclex
xor %eax,%eax
ret
2: fnstsw %ax
+ # TODO: only load/store fenv if exceptions arent clear yet
and %ecx,%eax
jz 1b
sub $32,%esp
@@ -41,7 +53,18 @@ fesetround:
andb $0xf3,1(%esp)
or %ch,1(%esp)
fldcw (%esp)
- pop %ecx
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jmp 1f
+ stmxcsr (%esp)
+ shl $3,%ch
+ andb $0x9f,1(%esp)
+ or %ch,1(%esp)
+ ldmxcsr (%esp)
+1: pop %ecx
ret
.global fegetround
@@ -59,7 +82,18 @@ fegetenv:
mov 4(%esp),%ecx
xor %eax,%eax
fnstenv (%ecx)
- ret
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ push %eax
+ stmxcsr (%esp)
+ pop %edx
+ and $0x3f,%edx
+ or %edx,4(%ecx)
+1: ret
.global fesetenv
.type fesetenv,@function
@@ -69,7 +103,8 @@ fesetenv:
inc %ecx
jz 1f
fldenv -1(%ecx)
- ret
+ movl -1(%ecx),%ecx
+ jmp 2f
1: push %eax
push %eax
push %eax
@@ -79,12 +114,32 @@ fesetenv:
pushl $0x37f
fldenv (%esp)
add $28,%esp
- ret
+ # consider sse fenv as well if the cpu has XMM capability
+2: call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ # mxcsr := same rounding mode, cleared exceptions, default mask
+ and $0xc00,%ecx
+ shl $3,%ecx
+ or $0x1f80,%ecx
+ mov %ecx,4(%esp)
+ ldmxcsr 4(%esp)
+1: ret
.global fetestexcept
.type fetestexcept,@function
fetestexcept:
mov 4(%esp),%ecx
fnstsw %ax
- and %ecx,%eax
+ # consider sse fenv as well if the cpu has XMM capability
+ call 1f
+1: addl $__hwcap-1b,(%esp)
+ pop %edx
+ testl $0x02000000,(%edx)
+ jz 1f
+ stmxcsr 4(%esp)
+ or 4(%esp),%eax
+1: and %ecx,%eax
ret
diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s
index 443e35a2..c48dade3 100644
--- a/src/fenv/x86_64/fenv.s
+++ b/src/fenv/x86_64/fenv.s
@@ -28,9 +28,6 @@ feraiseexcept:
stmxcsr -8(%rsp)
or %edi,-8(%rsp)
ldmxcsr -8(%rsp)
- fnstenv -32(%rsp)
- or %edi,-28(%rsp)
- fldenv -32(%rsp)
xor %eax,%eax
ret