diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-09-05 10:58:48 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-09-05 11:30:09 +0000 | 
| commit | 07039ed8563b850624146c938ae201a1099d2f75 (patch) | |
| tree | 2a751e8684596943be0bca4391289759b718f2b4 | |
| parent | 8dba5486288e719ed290cccefcd932ed32756d7c (diff) | |
| download | musl-07039ed8563b850624146c938ae201a1099d2f75.tar.gz | |
math: fix exp2l asm on x86 (raise underflow correctly)
there were two problems:
* omitted underflow on subnormal results: exp2l(-16383.5) was calculated
as sqrt(2)*2^-16384, the last bits of sqrt(2) are zero so the down scaling
does not underflow eventhough the result is in subnormal range
* spurious underflow for subnormal inputs: exp2l(0x1p-16400) was evaluated
as f2xm1(x)+1 and f2xm1 raised underflow (because inexact subnormal result)
the first issue is fixed by raising underflow manually if x is in
(-32768,-16382] and not integer (x-0x1p63+0x1p63 != x)
the second issue is fixed by treating x in (-0x1p64,0x1p64) specially
for these fixes the special case handling was completely rewritten
| -rw-r--r-- | src/math/i386/exp.s | 70 | ||||
| -rw-r--r-- | src/math/x86_64/exp2l.s | 75 | 
2 files changed, 78 insertions, 67 deletions
| diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e5f54588..abb90369 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -95,42 +95,32 @@ exp:  .type exp2,@function  exp2:  	fldl 4(%esp) -1:	pushl $0x467ff000 -	flds (%esp)       # 16380 -	xorl %eax,%eax -	pushl $0x80000000 -	push %eax -	fld %st(1) -	fabs -	fucomp %st(1) -	fnstsw -	fstp %st(0) -	sahf -	ja 3f             # |x| > 16380 -	jp 2f             # x is nan (avoid invalid except in fistp) +1:	sub $12,%esp  	fld %st(0) -	fistpl 8(%esp) -	fildl 8(%esp) -	fxch %st(1) -	fsub %st(1) -	mov $0x3fff,%eax -	add %eax,8(%esp) -	f2xm1 -	fld1 -	faddp             # 2^(x-rint(x)) -	fldt (%esp)       # 2^rint(x) -	fmulp -	fstp %st(1) -2:	add $12,%esp -	ret - -3:	fld %st(0)  	fstpt (%esp) -	fld1  	mov 8(%esp),%ax  	and $0x7fff,%ax -	cmp $0x7fff,%ax -	je 1f             # x = +-inf +	cmp $0x3fff+13,%ax +	jb 4f             # |x| < 8192 +	cmp $0x3fff+15,%ax +	jae 3f            # |x| >= 32768 +	fsts (%esp) +	cmpl $0xc67ff800,(%esp) +	jb 2f             # x > -16382 +	movl $0x5f000000,(%esp) +	flds (%esp)       # 0x1p63 +	fld %st(1) +	fsub %st(1) +	faddp +	fucomp %st(1) +	fnstsw +	sahf +	je 2f             # x - 0x1p63 + 0x1p63 == x +	movl $1,(%esp) +	flds (%esp)       # 0x1p-149 +	fdiv %st(1) +	fstps (%esp)      # raise underflow +2:	fld1  	fld %st(1)  	frndint  	fxch %st(2) @@ -141,3 +131,19 @@ exp2:  	fstp %st(1)  	add $12,%esp  	ret +3:	xor %eax,%eax +4:	cmp $0x3fff-64,%ax +	fld1 +	jb 1b             # |x| < 0x1p-64 +	fstpt (%esp) +	fistl 8(%esp) +	fildl 8(%esp) +	fsubrp %st(1) +	addl $0x3fff,8(%esp) +	f2xm1 +	fld1 +	faddp             # 2^(x-rint(x)) +	fldt (%esp)       # 2^rint(x) +	fmulp +	add $12,%esp +	ret diff --git a/src/math/x86_64/exp2l.s b/src/math/x86_64/exp2l.s index 1f8ed7bb..e7145881 100644 --- a/src/math/x86_64/exp2l.s +++ b/src/math/x86_64/exp2l.s @@ -26,44 +26,32 @@ expm1l:  .type exp2l,@function  exp2l:  	fldt 8(%rsp) -1:	mov $0x467ff000,%eax -	mov %eax,-16(%rsp) -	mov $0x80000000,%eax -	mov %eax,-20(%rsp) -	xor %eax,%eax -	mov %eax,-24(%rsp) -	flds -16(%rsp)    # 16380 +1:	fld %st(0) +	sub $16,%rsp +	fstpt (%rsp) +	mov 8(%rsp),%ax +	and $0x7fff,%ax +	cmp $0x3fff+13,%ax +	jb 4f             # |x| < 8192 +	cmp $0x3fff+15,%ax +	jae 3f            # |x| >= 32768 +	fsts (%rsp) +	cmpl $0xc67ff800,(%rsp) +	jb 2f             # x > -16382 +	movl $0x5f000000,(%rsp) +	flds (%rsp)       # 0x1p63  	fld %st(1) -	fabs -	fucom %st(1) +	fsub %st(1) +	faddp +	fucomp %st(1)  	fnstsw -	fstp %st(0) -	fstp %st(0)  	sahf -	ja 3f             # |x| > 16380 -	jp 2f             # x is nan (avoid invalid except in fistp) -	fld %st(0) -	fistpl -16(%rsp) -	fildl -16(%rsp) -	fxch %st(1) -	fsub %st(1) -	mov $0x3fff,%eax -	add %eax,-16(%rsp) -	f2xm1 -	fld1 -	faddp             # 2^(x-rint(x)) -	fldt -24(%rsp)    # 2^rint(x) -	fmulp -2:	fstp %st(1) -	ret - -3:	fld %st(0) -	fstpt -24(%rsp) -	fld1 -	mov -15(%rsp),%ax -	and $0x7fff,%ax -	cmp $0x7fff,%ax -	je 1f             # x = +-inf +	je 2f             # x - 0x1p63 + 0x1p63 == x +	movl $1,(%rsp) +	flds (%rsp)       # 0x1p-149 +	fdiv %st(1) +	fstps (%rsp)      # raise underflow +2:	fld1  	fld %st(1)  	frndint  	fxch %st(2) @@ -72,4 +60,21 @@ exp2l:  	faddp             # 2^(x-rint(x))  1:	fscale  	fstp %st(1) +	add $16,%rsp +	ret +3:	xor %eax,%eax +4:	cmp $0x3fff-64,%ax +	fld1 +	jb 1b             # |x| < 0x1p-64 +	fstpt (%rsp) +	fistl 8(%rsp) +	fildl 8(%rsp) +	fsubrp %st(1) +	addl $0x3fff,8(%rsp) +	f2xm1 +	fld1 +	faddp             # 2^(x-rint(x)) +	fldt (%rsp)       # 2^rint(x) +	fmulp +	add $16,%rsp  	ret | 
