diff options
| -rw-r--r-- | src/fenv/i386/fenv.s | 39 | ||||
| -rw-r--r-- | src/fenv/x86_64/fenv.s | 26 | 
2 files changed, 38 insertions, 27 deletions
| diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s index 9bba40a5..a8540add 100644 --- a/src/fenv/i386/fenv.s +++ b/src/fenv/i386/fenv.s @@ -4,26 +4,41 @@  .type feclearexcept,@function  feclearexcept:	  	mov 4(%esp),%ecx -	not %ecx +	fnstsw %ax  		# consider sse fenv as well if the cpu has XMM capability  	call 1f  1:	addl $__hwcap-1b,(%esp)  	pop %edx  	testl $0x02000000,(%edx) +	jz 2f +		# maintain exceptions in the sse mxcsr, clear x87 exceptions +	test %eax,%ecx  	jz 1f -	stmxcsr 4(%esp) -	and %ecx,4(%esp) -	ldmxcsr 4(%esp) -1:	test $0x3f,%ecx -	jnz 2f -1:	fnclex -	xor %eax,%eax +	fnclex +1:	push %edx +	stmxcsr (%esp) +	pop %edx +	and $0x3f,%eax +	or %eax,%edx +	test %edx,%ecx +	jz 1f +	not %ecx +	and %ecx,%edx +	push %edx +	ldmxcsr (%esp) +	pop %edx +1:	xor %eax,%eax  	ret -2:	fnstsw %ax -		# TODO: only load/store fenv if exceptions arent clear yet -	and %ecx,%eax +		# only do the expensive x87 fenv load/store when needed +2:	test %eax,%ecx  	jz 1b -	sub $32,%esp +	not %ecx +	and %ecx,%eax +	test $0x3f,%eax +	jz 1f +	fnclex +	jmp 1b +1:	sub $32,%esp  	fnstenv (%esp)  	mov %al,4(%esp)  	fldenv (%esp) diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s index c48dade3..dda6b61a 100644 --- a/src/fenv/x86_64/fenv.s +++ b/src/fenv/x86_64/fenv.s @@ -1,25 +1,21 @@  .global feclearexcept  .type feclearexcept,@function  feclearexcept: +		# maintain exceptions in the sse mxcsr, clear x87 exceptions  	mov %edi,%ecx +	fnstsw %ax +	test %eax,%ecx +	jz 1f +	fnclex +1:	stmxcsr -8(%rsp) +	and $0x3f,%eax +	or %eax,-8(%rsp) +	test %ecx,-8(%rsp) +	jz 1f  	not %ecx -	stmxcsr -8(%rsp)  	and %ecx,-8(%rsp)  	ldmxcsr -8(%rsp) -	test $0x3f,%ecx -	jnz 2f -1:	fnclex -	xor %eax,%eax -	ret -2:	fnstsw %ax -	and %ecx,%eax -	jz 1b -	sub $32,%rsp -	fnstenv (%rsp) -	mov %al,4(%rsp) -	fldenv (%rsp) -	add $32,%rsp -	xor %eax,%eax +1:	xor %eax,%eax  	ret  .global feraiseexcept | 
