diff options
| author | Szabolcs Nagy <nsz@port70.net> | 2013-05-15 23:08:52 +0000 | 
|---|---|---|
| committer | Szabolcs Nagy <nsz@port70.net> | 2013-05-15 23:08:52 +0000 | 
| commit | e216951f509b71da193da2fc63e25b998740d58b (patch) | |
| tree | 391381eafd8b0b7eb4fdbb693608072df5c181d5 /src | |
| parent | 2c184264eae3797de028403ed1e86c1f7ae5b813 (diff) | |
| download | musl-e216951f509b71da193da2fc63e25b998740d58b.tar.gz | |
math: use double_t for temporaries to avoid stores on i386
When FLT_EVAL_METHOD!=0 (only i386 with x87 fp) the excess
precision of an expression must be removed in an assignment.
(gcc needs -fexcess-precision=standard or -std=c99 for this)
This is done by extra load/store instructions which adds code
bloat when lot of temporaries are used and it makes the result
less precise in many cases.
Using double_t and float_t avoids these issues on i386 and
it makes no difference on other archs.
For now only a few functions are modified where the excess
precision is clearly beneficial (mostly polynomial evaluations
with temporaries).
object size differences on i386, gcc-4.8:
             old   new
__cosdf.o    123    95
__cos.o      199   169
__sindf.o    131    95
__sin.o      225   203
__tandf.o    207   151
__tan.o      605   499
erff.o      1470  1416
erf.o       1703  1649
j0f.o       1779  1745
j0.o        2308  2274
j1f.o       1602  1568
j1.o        2286  2252
tgamma.o    1431  1424
math/*.o   64164 63635
Diffstat (limited to 'src')
| -rw-r--r-- | src/math/__cos.c | 2 | ||||
| -rw-r--r-- | src/math/__cosdf.c | 2 | ||||
| -rw-r--r-- | src/math/__log1p.h | 2 | ||||
| -rw-r--r-- | src/math/__log1pf.h | 2 | ||||
| -rw-r--r-- | src/math/__sin.c | 2 | ||||
| -rw-r--r-- | src/math/__sindf.c | 2 | ||||
| -rw-r--r-- | src/math/__tan.c | 5 | ||||
| -rw-r--r-- | src/math/__tandf.c | 2 | ||||
| -rw-r--r-- | src/math/acos.c | 2 | ||||
| -rw-r--r-- | src/math/acosf.c | 2 | ||||
| -rw-r--r-- | src/math/asin.c | 2 | ||||
| -rw-r--r-- | src/math/asinf.c | 2 | ||||
| -rw-r--r-- | src/math/atan.c | 2 | ||||
| -rw-r--r-- | src/math/atanf.c | 2 | ||||
| -rw-r--r-- | src/math/erf.c | 5 | ||||
| -rw-r--r-- | src/math/erff.c | 5 | ||||
| -rw-r--r-- | src/math/j0.c | 4 | ||||
| -rw-r--r-- | src/math/j0f.c | 4 | ||||
| -rw-r--r-- | src/math/j1.c | 4 | ||||
| -rw-r--r-- | src/math/j1f.c | 4 | ||||
| -rw-r--r-- | src/math/tgamma.c | 2 | 
21 files changed, 31 insertions, 28 deletions
| diff --git a/src/math/__cos.c b/src/math/__cos.c index 8699c1d5..46cefb38 100644 --- a/src/math/__cos.c +++ b/src/math/__cos.c @@ -60,7 +60,7 @@ C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */  double __cos(double x, double y)  { -	double hz,z,r,w; +	double_t hz,z,r,w;  	z  = x*x;  	w  = z*z; diff --git a/src/math/__cosdf.c b/src/math/__cosdf.c index a65f7f21..2124989b 100644 --- a/src/math/__cosdf.c +++ b/src/math/__cosdf.c @@ -25,7 +25,7 @@ C3  =  0x199342e0ee5069.0p-68; /*  0.0000243904487962774090654 */  float __cosdf(double x)  { -	double r, w, z; +	double_t r, w, z;  	/* Try to optimize for parallel evaluation as in __tandf.c. */  	z = x*x; diff --git a/src/math/__log1p.h b/src/math/__log1p.h index ec2c77b9..57187115 100644 --- a/src/math/__log1p.h +++ b/src/math/__log1p.h @@ -81,7 +81,7 @@ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */   */  static inline double __log1p(double f)  { -	double hfsq,s,z,R,w,t1,t2; +	double_t hfsq,s,z,R,w,t1,t2;  	s = f/(2.0+f);  	z = s*s; diff --git a/src/math/__log1pf.h b/src/math/__log1pf.h index 99492c5a..f2fbef29 100644 --- a/src/math/__log1pf.h +++ b/src/math/__log1pf.h @@ -22,7 +22,7 @@ Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */  static inline float __log1pf(float f)  { -	float hfsq,s,z,R,w,t1,t2; +	float_t hfsq,s,z,R,w,t1,t2;  	s = f/(2.0f + f);  	z = s*s; diff --git a/src/math/__sin.c b/src/math/__sin.c index 9aead04b..40309496 100644 --- a/src/math/__sin.c +++ b/src/math/__sin.c @@ -51,7 +51,7 @@ S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */  double __sin(double x, double y, int iy)  { -	double z,r,v,w; +	double_t z,r,v,w;  	z = x*x;  	w = z*z; diff --git a/src/math/__sindf.c b/src/math/__sindf.c index 83c0d7a5..8fec2a3f 100644 --- a/src/math/__sindf.c +++ b/src/math/__sindf.c @@ -25,7 +25,7 @@ S4 =  0x16cd878c3b46a7.0p-71; /*  0.0000027183114939898219064 */  float __sindf(double x)  { -	double r, s, w, z; +	double_t r, s, w, z;  	/* Try to optimize for parallel evaluation as in __tandf.c. */  	z = x*x; diff --git a/src/math/__tan.c b/src/math/__tan.c index 01e3fe48..fc739f95 100644 --- a/src/math/__tan.c +++ b/src/math/__tan.c @@ -65,7 +65,7 @@ pio4lo =     3.06161699786838301793e-17; /* 3C81A626, 33145C07 */  double __tan(double x, double y, int iy)  { -	double z, r, v, w, s, sign; +	double_t z, r, v, w, s, sign;  	int32_t ix, hx;  	GET_HIGH_WORD(hx,x); @@ -106,7 +106,8 @@ double __tan(double x, double y, int iy)  		 * -1.0 / (x+r) here  		 */  		/* compute -1.0 / (x+r) accurately */ -		double a, t; +		double_t a; +		double z, t;  		z = w;  		SET_LOW_WORD(z,0);  		v = r - (z - x);        /* z+v = r+x */ diff --git a/src/math/__tandf.c b/src/math/__tandf.c index 36a8214e..3e632fdf 100644 --- a/src/math/__tandf.c +++ b/src/math/__tandf.c @@ -27,7 +27,7 @@ static const double T[] = {  float __tandf(double x, int iy)  { -	double z,r,w,s,t,u; +	double_t z,r,w,s,t,u;  	z = x*x;  	/* diff --git a/src/math/acos.c b/src/math/acos.c index cd5d06a6..ea9c87bf 100644 --- a/src/math/acos.c +++ b/src/math/acos.c @@ -51,7 +51,7 @@ qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */  static double R(double z)  { -	double p, q; +	double_t p, q;  	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));  	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));  	return p/q; diff --git a/src/math/acosf.c b/src/math/acosf.c index 5d7c0270..8ee1a71d 100644 --- a/src/math/acosf.c +++ b/src/math/acosf.c @@ -25,7 +25,7 @@ qS1 = -7.0662963390e-01;  static float R(float z)  { -	float p, q; +	float_t p, q;  	p = z*(pS0+z*(pS1+z*pS2));  	q = 1.0f+z*qS1;  	return p/q; diff --git a/src/math/asin.c b/src/math/asin.c index d61c04b4..3e8f99ed 100644 --- a/src/math/asin.c +++ b/src/math/asin.c @@ -58,7 +58,7 @@ qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */  static double R(double z)  { -	double p, q; +	double_t p, q;  	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));  	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));  	return p/q; diff --git a/src/math/asinf.c b/src/math/asinf.c index 462bf043..51fe6c61 100644 --- a/src/math/asinf.c +++ b/src/math/asinf.c @@ -26,7 +26,7 @@ qS1 = -7.0662963390e-01;  static float R(float z)  { -	float p, q; +	float_t p, q;  	p = z*(pS0+z*(pS1+z*pS2));  	q = 1.0f+z*qS1;  	return p/q; diff --git a/src/math/atan.c b/src/math/atan.c index 3c9a59ff..5a1d33e6 100644 --- a/src/math/atan.c +++ b/src/math/atan.c @@ -62,7 +62,7 @@ static const double aT[] = {  double atan(double x)  { -	double w,s1,s2,z; +	double_t w,s1,s2,z;  	uint32_t ix,sign;  	int id; diff --git a/src/math/atanf.c b/src/math/atanf.c index 4b59509a..ac8bfd06 100644 --- a/src/math/atanf.c +++ b/src/math/atanf.c @@ -40,7 +40,7 @@ static const float aT[] = {  float atanf(float x)  { -	float w,s1,s2,z; +	float_t w,s1,s2,z;  	uint32_t ix,sign;  	int id; diff --git a/src/math/erf.c b/src/math/erf.c index c0fc41db..2f30a298 100644 --- a/src/math/erf.c +++ b/src/math/erf.c @@ -176,7 +176,7 @@ sb7  = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */  static double erfc1(double x)  { -	double s,P,Q; +	double_t s,P,Q;  	s = fabs(x) - 1;  	P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); @@ -186,7 +186,8 @@ static double erfc1(double x)  static double erfc2(uint32_t ix, double x)  { -	double s,z,R,S; +	double_t s,R,S; +	double z;  	if (ix < 0x3ff40000)  /* |x| < 1.25 */  		return erfc1(x); diff --git a/src/math/erff.c b/src/math/erff.c index e2cfc984..ed5f3975 100644 --- a/src/math/erff.c +++ b/src/math/erff.c @@ -86,7 +86,7 @@ sb7  = -2.2440952301e+01; /* 0xc1b38712 */  static float erfc1(float x)  { -	float s,P,Q; +	float_t s,P,Q;  	s = fabsf(x) - 1;  	P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); @@ -96,7 +96,8 @@ static float erfc1(float x)  static float erfc2(uint32_t ix, float x)  { -	float s,z,R,S; +	float_t s,R,S; +	float z;  	if (ix < 0x3fa00000)  /* |x| < 1.25 */  		return erfc1(x); diff --git a/src/math/j0.c b/src/math/j0.c index b281e136..d722d942 100644 --- a/src/math/j0.c +++ b/src/math/j0.c @@ -263,7 +263,7 @@ static const double pS2[5] = {  static double pzero(double x)  {  	const double *p,*q; -	double z,r,s; +	double_t z,r,s;  	uint32_t ix;  	GET_HIGH_WORD(ix, x); @@ -359,7 +359,7 @@ static const double qS2[6] = {  static double qzero(double x)  {  	const double *p,*q; -	double s,r,z; +	double_t s,r,z;  	uint32_t ix;  	GET_HIGH_WORD(ix, x); diff --git a/src/math/j0f.c b/src/math/j0f.c index 79bab62a..4b0ee3b7 100644 --- a/src/math/j0f.c +++ b/src/math/j0f.c @@ -201,7 +201,7 @@ static const float pS2[5] = {  static float pzerof(float x)  {  	const float *p,*q; -	float z,r,s; +	float_t z,r,s;  	uint32_t ix;  	GET_FLOAT_WORD(ix, x); @@ -297,7 +297,7 @@ static const float qS2[6] = {  static float qzerof(float x)  {  	const float *p,*q; -	float s,r,z; +	float_t s,r,z;  	uint32_t ix;  	GET_FLOAT_WORD(ix, x); diff --git a/src/math/j1.c b/src/math/j1.c index ac7bb1eb..df724d17 100644 --- a/src/math/j1.c +++ b/src/math/j1.c @@ -250,7 +250,7 @@ static const double ps2[5] = {  static double pone(double x)  {  	const double *p,*q; -	double z,r,s; +	double_t z,r,s;  	uint32_t ix;  	GET_HIGH_WORD(ix, x); @@ -346,7 +346,7 @@ static const double qs2[6] = {  static double qone(double x)  {  	const double *p,*q; -	double  s,r,z; +	double_t s,r,z;  	uint32_t ix;  	GET_HIGH_WORD(ix, x); diff --git a/src/math/j1f.c b/src/math/j1f.c index 5a760f71..6abde349 100644 --- a/src/math/j1f.c +++ b/src/math/j1f.c @@ -198,7 +198,7 @@ static const float ps2[5] = {  static float ponef(float x)  {  	const float *p,*q; -	float z,r,s; +	float_t z,r,s;  	uint32_t ix;  	GET_FLOAT_WORD(ix, x); @@ -294,7 +294,7 @@ static const float qs2[6] = {  static float qonef(float x)  {  	const float *p,*q; -	float s,r,z; +	float_t s,r,z;  	uint32_t ix;  	GET_FLOAT_WORD(ix, x); diff --git a/src/math/tgamma.c b/src/math/tgamma.c index a3f203c1..691e86a4 100644 --- a/src/math/tgamma.c +++ b/src/math/tgamma.c @@ -89,7 +89,7 @@ static const double fact[] = {  /* S(x) rational function for positive x */  static double S(double x)  { -	double num = 0, den = 0; +	double_t num = 0, den = 0;  	int i;  	/* to avoid overflow handle large x differently */ | 
