diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-08-18 15:34:07 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-08-18 16:27:21 +0000 | 
| commit | d8764bf84022397ff9d22310f78fcd78d801e2bf (patch) | |
| tree | 289574d06451582fd67b5cde646cd6bfd368fa66 | |
| parent | baba2630c9cf778b41e218dfa980fcfdbec808e6 (diff) | |
| download | musl-d8764bf84022397ff9d22310f78fcd78d801e2bf.tar.gz | |
optimize x86 feclearexcept: only use save/restore x87 fenv if needed
the x87 exception summary (ES) and stack fault (SF) flags may be
spuriously cleared by feclearexcept using the fnclex instruction,
but these flags are not observable through libc hence maintaining
their state is not critical.
| -rw-r--r-- | src/fenv/i386/fenv.s | 39 | ||||
| -rw-r--r-- | src/fenv/x86_64/fenv.s | 26 | 
2 files changed, 38 insertions, 27 deletions
diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s index 9bba40a5..a8540add 100644 --- a/src/fenv/i386/fenv.s +++ b/src/fenv/i386/fenv.s @@ -4,26 +4,41 @@  .type feclearexcept,@function  feclearexcept:	  	mov 4(%esp),%ecx -	not %ecx +	fnstsw %ax  		# consider sse fenv as well if the cpu has XMM capability  	call 1f  1:	addl $__hwcap-1b,(%esp)  	pop %edx  	testl $0x02000000,(%edx) +	jz 2f +		# maintain exceptions in the sse mxcsr, clear x87 exceptions +	test %eax,%ecx  	jz 1f -	stmxcsr 4(%esp) -	and %ecx,4(%esp) -	ldmxcsr 4(%esp) -1:	test $0x3f,%ecx -	jnz 2f -1:	fnclex -	xor %eax,%eax +	fnclex +1:	push %edx +	stmxcsr (%esp) +	pop %edx +	and $0x3f,%eax +	or %eax,%edx +	test %edx,%ecx +	jz 1f +	not %ecx +	and %ecx,%edx +	push %edx +	ldmxcsr (%esp) +	pop %edx +1:	xor %eax,%eax  	ret -2:	fnstsw %ax -		# TODO: only load/store fenv if exceptions arent clear yet -	and %ecx,%eax +		# only do the expensive x87 fenv load/store when needed +2:	test %eax,%ecx  	jz 1b -	sub $32,%esp +	not %ecx +	and %ecx,%eax +	test $0x3f,%eax +	jz 1f +	fnclex +	jmp 1b +1:	sub $32,%esp  	fnstenv (%esp)  	mov %al,4(%esp)  	fldenv (%esp) diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s index c48dade3..dda6b61a 100644 --- a/src/fenv/x86_64/fenv.s +++ b/src/fenv/x86_64/fenv.s @@ -1,25 +1,21 @@  .global feclearexcept  .type feclearexcept,@function  feclearexcept: +		# maintain exceptions in the sse mxcsr, clear x87 exceptions  	mov %edi,%ecx +	fnstsw %ax +	test %eax,%ecx +	jz 1f +	fnclex +1:	stmxcsr -8(%rsp) +	and $0x3f,%eax +	or %eax,-8(%rsp) +	test %ecx,-8(%rsp) +	jz 1f  	not %ecx -	stmxcsr -8(%rsp)  	and %ecx,-8(%rsp)  	ldmxcsr -8(%rsp) -	test $0x3f,%ecx -	jnz 2f -1:	fnclex -	xor %eax,%eax -	ret -2:	fnstsw %ax -	and %ecx,%eax -	jz 1b -	sub $32,%rsp -	fnstenv (%rsp) -	mov %al,4(%rsp) -	fldenv (%rsp) -	add $32,%rsp -	xor %eax,%eax +1:	xor %eax,%eax  	ret  .global feraiseexcept  | 
