diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-08-17 02:40:44 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-08-18 16:27:20 +0000 | 
| commit | ebc10fa17634a6ddb87a3aedd71b7d9617d12c19 (patch) | |
| tree | 8e3f1ffffe8c0ed8d768ac7bbdb156038d12556e /src | |
| parent | d6841499109fc397cd3a57a726304fec9b08f510 (diff) | |
| download | musl-ebc10fa17634a6ddb87a3aedd71b7d9617d12c19.tar.gz | |
add sse fenv support on i386 through hwcap
the sse and x87 rounding modes should be always the same,
the visible exception flags are the bitwise or of the two
fenv states (so it's enough to query the rounding mode or
raise exceptions on one fenv)
Diffstat (limited to 'src')
| -rw-r--r-- | src/fenv/i386/fenv.s | 67 | ||||
| -rw-r--r-- | src/fenv/x86_64/fenv.s | 3 | 
2 files changed, 61 insertions, 9 deletions
| diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s index 471d2af8..9bba40a5 100644 --- a/src/fenv/i386/fenv.s +++ b/src/fenv/i386/fenv.s @@ -1,14 +1,26 @@ +.hidden __hwcap +  .global feclearexcept  .type feclearexcept,@function  feclearexcept:	  	mov 4(%esp),%ecx  	not %ecx -	test $0x3f,%ecx +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +	stmxcsr 4(%esp) +	and %ecx,4(%esp) +	ldmxcsr 4(%esp) +1:	test $0x3f,%ecx  	jnz 2f  1:	fnclex  	xor %eax,%eax  	ret  2:	fnstsw %ax +		# TODO: only load/store fenv if exceptions arent clear yet  	and %ecx,%eax  	jz 1b  	sub $32,%esp @@ -41,7 +53,18 @@ fesetround:  	andb $0xf3,1(%esp)  	or %ch,1(%esp)  	fldcw (%esp) -	pop %ecx +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jmp 1f +	stmxcsr (%esp) +	shl $3,%ch +	andb $0x9f,1(%esp) +	or %ch,1(%esp) +	ldmxcsr (%esp) +1:	pop %ecx  	ret  .global fegetround @@ -59,7 +82,18 @@ fegetenv:  	mov 4(%esp),%ecx  	xor %eax,%eax  	fnstenv (%ecx) -	ret +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +	push %eax +	stmxcsr (%esp) +	pop %edx +	and $0x3f,%edx +	or %edx,4(%ecx) +1:	ret  .global fesetenv  .type fesetenv,@function @@ -69,7 +103,8 @@ fesetenv:  	inc %ecx  	jz 1f  	fldenv -1(%ecx) -	ret +	movl -1(%ecx),%ecx +	jmp 2f  1:	push %eax  	push %eax  	push %eax @@ -79,12 +114,32 @@ fesetenv:  	pushl $0x37f  	fldenv (%esp)  	add $28,%esp -	ret +		# consider sse fenv as well if the cpu has XMM capability +2:	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +		# mxcsr := same rounding mode, cleared exceptions, default mask +	and $0xc00,%ecx +	shl $3,%ecx +	or $0x1f80,%ecx +	mov %ecx,4(%esp) +	ldmxcsr 4(%esp) +1:	ret  .global fetestexcept  .type fetestexcept,@function  fetestexcept:  	mov 4(%esp),%ecx  	fnstsw %ax -	and %ecx,%eax +		# consider sse fenv as well if the cpu has XMM capability +	call 1f +1:	addl $__hwcap-1b,(%esp) +	pop %edx +	testl $0x02000000,(%edx) +	jz 1f +	stmxcsr 4(%esp) +	or 4(%esp),%eax +1:	and %ecx,%eax  	ret diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s index 443e35a2..c48dade3 100644 --- a/src/fenv/x86_64/fenv.s +++ b/src/fenv/x86_64/fenv.s @@ -28,9 +28,6 @@ feraiseexcept:  	stmxcsr -8(%rsp)  	or %edi,-8(%rsp)  	ldmxcsr -8(%rsp) -	fnstenv -32(%rsp) -	or %edi,-28(%rsp) -	fldenv -32(%rsp)  	xor %eax,%eax  	ret | 
