diff options
| -rw-r--r-- | src/fenv/i386/fenv.s | 67 | ||||
| -rw-r--r-- | src/fenv/x86_64/fenv.s | 3 | 
2 files changed, 61 insertions, 9 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s index 471d2af8..9bba40a5 100644 --- a/src/fenv/i386/fenv.s +++ b/src/fenv/i386/fenv.s @@ -1,14 +1,26 @@ +.hidden __hwcap +  .global feclearexcept  .type feclearexcept,@function  feclearexcept:	  	mov 4(%esp),%ecx  	not %ecx -	test $0x3f,%ecx +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +	stmxcsr 4(%esp) +	and %ecx,4(%esp) +	ldmxcsr 4(%esp) +1:	test $0x3f,%ecx  	jnz 2f  1:	fnclex  	xor %eax,%eax  	ret  2:	fnstsw %ax +		# TODO: only load/store fenv if exceptions arent clear yet  	and %ecx,%eax  	jz 1b  	sub $32,%esp @@ -41,7 +53,18 @@ fesetround:  	andb $0xf3,1(%esp)  	or %ch,1(%esp)  	fldcw (%esp) -	pop %ecx +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jmp 1f +	stmxcsr (%esp) +	shl $3,%ch +	andb $0x9f,1(%esp) +	or %ch,1(%esp) +	ldmxcsr (%esp) +1:	pop %ecx  	ret  .global fegetround @@ -59,7 +82,18 @@ fegetenv:  	mov 4(%esp),%ecx  	xor %eax,%eax  	fnstenv (%ecx) -	ret +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +	push %eax +	stmxcsr (%esp) +	pop %edx +	and $0x3f,%edx +	or %edx,4(%ecx) +1:	ret  .global fesetenv  .type fesetenv,@function @@ -69,7 +103,8 @@ fesetenv:  	inc %ecx  	jz 1f  	fldenv -1(%ecx) -	ret +	movl -1(%ecx),%ecx +	jmp 2f  1:	push %eax  	push %eax  	push %eax @@ -79,12 +114,32 @@ fesetenv:  	pushl $0x37f  	fldenv (%esp)  	add $28,%esp -	ret +		# consider sse fenv as well if the cpu has XMM capability +2:	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +		# mxcsr := same rounding mode, cleared exceptions, default mask +	and $0xc00,%ecx +	shl $3,%ecx +	or $0x1f80,%ecx +	mov %ecx,4(%esp) +	ldmxcsr 4(%esp) +1:	ret  .global fetestexcept  .type fetestexcept,@function  fetestexcept:  	mov 4(%esp),%ecx  	fnstsw %ax -	and %ecx,%eax +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +	stmxcsr 4(%esp) +	or 4(%esp),%eax +1:	and %ecx,%eax  	ret diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s index 443e35a2..c48dade3 100644 --- a/src/fenv/x86_64/fenv.s +++ b/src/fenv/x86_64/fenv.s @@ -28,9 +28,6 @@ feraiseexcept:  	stmxcsr -8(%rsp)  	or %edi,-8(%rsp)  	ldmxcsr -8(%rsp) -	fnstenv -32(%rsp) -	or %edi,-28(%rsp) -	fldenv -32(%rsp)  	xor %eax,%eax  	ret  | 
