math: add a non-dummy tgamma implementation

uses the lanczos approximation method with the usual tweaks. same parameters were selected as in boost and python. (avoides some extra work and special casing found in boost so the precision is not that good: measured error is <5ulp for positive x and <10ulp for negative) an alternative lgamma_r implementation is also given in the same file which is simpler and smaller than the current one, but less precise so it's ifdefed out for now.
author: Szabolcs Nagy <nsz@port70.net> 2012-12-12 01:43:43 +0100
committer: Szabolcs Nagy <nsz@port70.net> 2012-12-12 01:43:43 +0100
commit: 0f53c1a4266ad4cca28115e2c3bcfdc86337d8ca (patch)
tree: 4dbe18341f662914d8fd916f1f38549444478589 /src
parent: 14cc9c7f38c80094c05353fcb11fe9e441340583 (diff)
download: musl-0f53c1a4266ad4cca28115e2c3bcfdc86337d8ca.tar.gz
2 files changed, 215 insertions, 20 deletions
diff --git a/src/math/tgamma.c b/src/math/tgamma.c
index f3bbe370..a3f203c1 100644
--- a/src/math/tgamma.c
+++ b/src/math/tgamma.c
@@ -1,16 +1,221 @@
-#include <math.h>
+/*
+"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964)
+"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001)
+"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004)
 
-// FIXME: use lanczos approximation
+approximation method:
 
-double __lgamma_r(double, int *);
+                        (x - 0.5)         S(x)
+Gamma(x) = (x + g - 0.5)         *  ----------------
+                                    exp(x + g - 0.5)
+
+with
+                 a1      a2      a3            aN
+S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ]
+               x + 1   x + 2   x + 3         x + N
+
+with a0, a1, a2, a3,.. aN constants which depend on g.
+
+for x < 0 the following reflection formula is used:
+
+Gamma(x)*Gamma(-x) = -pi/(x sin(pi x))
+
+most ideas and constants are from boost and python
+*/
+#include "libm.h"
+
+static const double pi = 3.141592653589793238462643383279502884;
+
+/* sin(pi x) with x > 0 && isnormal(x) assumption */
+static double sinpi(double x)
+{
+	int n;
+
+	/* argument reduction: x = |x| mod 2 */
+	/* spurious inexact when x is odd int */
+	x = x * 0.5;
+	x = 2 * (x - floor(x));
+
+	/* reduce x into [-.25,.25] */
+	n = 4 * x;
+	n = (n+1)/2;
+	x -= n * 0.5;
+
+	x *= pi;
+	switch (n) {
+	default: /* case 4 */
+	case 0:
+		return __sin(x, 0, 0);
+	case 1:
+		return __cos(x, 0);
+	case 2:
+		/* sin(0-x) and -sin(x) have different sign at 0 */
+		return __sin(0-x, 0, 0);
+	case 3:
+		return -__cos(x, 0);
+	}
+}
+
+#define N 12
+//static const double g = 6.024680040776729583740234375;
+static const double gmhalf = 5.524680040776729583740234375;
+static const double Snum[N+1] = {
+	23531376880.410759688572007674451636754734846804940,
+	42919803642.649098768957899047001988850926355848959,
+	35711959237.355668049440185451547166705960488635843,
+	17921034426.037209699919755754458931112671403265390,
+	6039542586.3520280050642916443072979210699388420708,
+	1439720407.3117216736632230727949123939715485786772,
+	248874557.86205415651146038641322942321632125127801,
+	31426415.585400194380614231628318205362874684987640,
+	2876370.6289353724412254090516208496135991145378768,
+	186056.26539522349504029498971604569928220784236328,
+	8071.6720023658162106380029022722506138218516325024,
+	210.82427775157934587250973392071336271166969580291,
+	2.5066282746310002701649081771338373386264310793408,
+};
+static const double Sden[N+1] = {
+	0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
+	2637558, 357423, 32670, 1925, 66, 1,
+};
+/* n! for small integer n */
+static const double fact[] = {
+	1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
+	479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
+	355687428096000.0, 6402373705728000.0, 121645100408832000.0,
+	2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
+};
+
+/* S(x) rational function for positive x */
+static double S(double x)
+{
+	double num = 0, den = 0;
+	int i;
+
+	/* to avoid overflow handle large x differently */
+	if (x < 8)
+		for (i = N; i >= 0; i--) {
+			num = num * x + Snum[i];
+			den = den * x + Sden[i];
+		}
+	else
+		for (i = 0; i <= N; i++) {
+			num = num / x + Snum[i];
+			den = den / x + Sden[i];
+		}
+	return num/den;
+}
 
 double tgamma(double x)
 {
-	int sign;
-	double y;
+	double absx, y, dy, z, r;
 
-	y = exp(__lgamma_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	/* special cases */
+	if (!isfinite(x))
+		/* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
+		return x + INFINITY;
+
+	/* integer arguments */
+	/* raise inexact when non-integer */
+	if (x == floor(x)) {
+		if (x == 0)
+			/* tgamma(+-0)=+-inf with divide-by-zero */
+			return 1/x;
+		if (x < 0)
+			return 0/0.0;
+		if (x <= sizeof fact/sizeof *fact)
+			return fact[(int)x - 1];
+	}
+
+	absx = fabs(x);
+
+	/* x ~ 0: tgamma(x) ~ 1/x */
+	if (absx < 0x1p-54)
+		return 1/x;
+
+	/* x >= 172: tgamma(x)=inf with overflow */
+	/* x =< -184: tgamma(x)=+-0 with underflow */
+	if (absx >= 184) {
+		if (x < 0) {
+			if (floor(x) * 0.5 == floor(x * 0.5))
+				return 0;
+			return -0.0;
+		}
+		x *= 0x1p1023;
+		return x;
+	}
+
+	/* handle the error of x + g - 0.5 */
+	y = absx + gmhalf;
+	if (absx > gmhalf) {
+		dy = y - absx;
+		dy -= gmhalf;
+	} else {
+		dy = y - gmhalf;
+		dy -= absx;
+	}
+
+	z = absx - 0.5;
+	r = S(absx) * exp(-y);
+	if (x < 0) {
+		/* reflection formula for negative x */
+		r = -pi / (sinpi(absx) * absx * r);
+		dy = -dy;
+		z = -z;
+	}
+	r += dy * (gmhalf+0.5) * r / y;
+	z = pow(y, 0.5*z);
+	r = r * z * z;
+	return r;
 }
+
+#if 0
+double __lgamma_r(double x, int *sign)
+{
+	double r, absx, z, zz, w;
+
+	*sign = 1;
+
+	/* special cases */
+	if (!isfinite(x))
+		/* lgamma(nan)=nan, lgamma(+-inf)=inf */
+		return x*x;
+
+	/* integer arguments */
+	if (x == floor(x) && x <= 2) {
+		/* n <= 0: lgamma(n)=inf with divbyzero */
+		/* n == 1,2: lgamma(n)=0 */
+		if (x <= 0)
+			return 1/0.0;
+		return 0;
+	}
+
+	absx = fabs(x);
+
+	/* lgamma(x) ~ -log(|x|) for tiny |x| */
+	if (absx < 0x1p-54) {
+		*sign = 1 - 2*!!signbit(x);
+		return -log(absx);
+	}
+
+	/* use tgamma for smaller |x| */
+	if (absx < 128) {
+		x = tgamma(x);
+		*sign = 1 - 2*!!signbit(x);
+		return log(fabs(x));
+	}
+
+	/* second term (log(S)-g) could be more precise here.. */
+	/* or with stirling: (|x|-0.5)*(log(|x|)-1) + poly(1/|x|) */
+	r = (absx-0.5)*(log(absx+gmhalf)-1) + (log(S(absx)) - (gmhalf+0.5));
+	if (x < 0) {
+		/* reflection formula for negative x */
+		x = sinpi(absx);
+		*sign = 2*!!signbit(x) - 1;
+		r = log(pi/(fabs(x)*absx)) - r;
+	}
+	return r;
+}
+
+weak_alias(__lgamma_r, lgamma_r);
+#endif
diff --git a/src/math/tgammaf.c b/src/math/tgammaf.c
index 16df8076..b4ca51c9 100644
--- a/src/math/tgammaf.c
+++ b/src/math/tgammaf.c
@@ -1,16 +1,6 @@
 #include <math.h>
 
-// FIXME: use lanczos approximation
-
-float __lgammaf_r(float, int *);
-
 float tgammaf(float x)
 {
-	int sign;
-	float y;
-
-	y = exp(__lgammaf_r(x, &sign));
-	if (sign < 0)
-		y = -y;
-	return y;
+	return tgamma(x);
 }
author	Szabolcs Nagy <nsz@port70.net>	2012-12-12 01:43:43 +0100
committer	Szabolcs Nagy <nsz@port70.net>	2012-12-12 01:43:43 +0100
commit	0f53c1a4266ad4cca28115e2c3bcfdc86337d8ca (patch)
tree	4dbe18341f662914d8fd916f1f38549444478589 /src
parent	14cc9c7f38c80094c05353fcb11fe9e441340583 (diff)
download	musl-0f53c1a4266ad4cca28115e2c3bcfdc86337d8ca.tar.gz