From f697d66b81912af59128ac1b96bc0e2a4514b758 Mon Sep 17 00:00:00 2001 From: nsz Date: Sat, 5 May 2012 01:11:56 +0200 Subject: math: change the formula used for acos.s old: 2*atan2(sqrt(1-x),sqrt(1+x)) new: atan2(fabs(sqrt((1-x)*(1+x))),x) improvements: * all edge cases are fixed (sign of zero in downward rounding) * a bit faster (here a single call is about 131ns vs 162ns) * a bit more precise (at most 1ulp error on 1M uniform random samples in [0,1), the old formula gave some 2ulp errors as well) --- src/math/i386/acos.s | 18 ++++++++---------- src/math/x86_64/acosl.s | 18 ++++++++---------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/math/i386/acos.s b/src/math/i386/acos.s index bfff0c5c..47f365ef 100644 --- a/src/math/i386/acos.s +++ b/src/math/i386/acos.s @@ -1,3 +1,5 @@ +# use acos(x) = atan2(fabs(sqrt((1-x)*(1+x))), x) + .global acosf .type acosf,@function acosf: @@ -14,17 +16,13 @@ acosl: .type acos,@function acos: fldl 4(%esp) -1: fld1 - fld %st(1) +1: fld %st(0) fld1 - fsubp - fsqrt - fxch %st(2) - faddp + fsub %st(0),%st(1) + fadd %st(2) + fmulp fsqrt + fabs # fix sign of zero (matters in downward rounding mode) + fxch %st(1) fpatan - fld1 - fld1 - faddp - fmulp ret diff --git a/src/math/x86_64/acosl.s b/src/math/x86_64/acosl.s index db68d2de..88e01b49 100644 --- a/src/math/x86_64/acosl.s +++ b/src/math/x86_64/acosl.s @@ -1,18 +1,16 @@ +# see ../i386/acos.s + .global acosl .type acosl,@function acosl: fldt 8(%rsp) +1: fld %st(0) fld1 - fld %st(1) - fld1 - fsubp - fsqrt - fxch %st(2) - faddp + fsub %st(0),%st(1) + fadd %st(2) + fmulp fsqrt + fabs + fxch %st(1) fpatan - fld1 - fld1 - faddp - fmulp ret -- cgit v1.2.1