[Rd] pgamma discontinuity (PR#7307)
terra at gnome.org
terra at gnome.org
Tue Jan 11 19:57:45 CET 2005
FYI,
this version of pgamma appears to be quite good and certainly a vast
improvement over current R code. (I last looked at 2.0.1) Apart from
type naming, this is what I have been using for Gnumeric 1.4.1.
This could be included into R as-is, but you might want to benefit from
making logcf, log1pmx, lgamma1p, and possibly logspace_add/logspace_sub
available to other parts of R.
Comments welcome.
Morten
#ifndef GNUMERIC_VERSION
/* Gnumeric already has these. */
#define SQR(x) ((x)*(x))
static const double scalefactor = SQR(SQR(SQR(4294967296.0)));
#undef SQR
static double
logcf (double x, double i, double d)
{
double c1 = 2 * d;
double c2 = i + d;
double c4 = c2 + d;
double a1 = c2;
double b1 = i * (c2 - i * x);
double b2 = d * d * x;
double a2 = c4 * c2 - b2;
const double cfVSmall = 1.0e-14;
#if 0
assert (i > 0);
assert (d >= 0);
#endif
b2 = c4 * b1 - i * b2;
while (fabs (a2 * b1 - a1 * b2) > fabs (cfVSmall * b1 * b2)) {
double c3 = c2*c2*x;
c2 += d;
c4 += d;
a1 = c4 * a2 - c3 * a1;
b1 = c4 * b2 - c3 * b1;
c3 = c1 * c1 * x;
c1 += d;
c4 += d;
a2 = c4 * a1 - c3 * a2;
b2 = c4 * b1 - c3 * b2;
if (fabs (b2) > scalefactor) {
a1 *= 1 / scalefactor;
b1 *= 1 / scalefactor;
a2 *= 1 / scalefactor;
b2 *= 1 / scalefactor;
} else if (fabs (b2) < 1 / scalefactor) {
a1 *= scalefactor;
b1 *= scalefactor;
a2 *= scalefactor;
b2 *= scalefactor;
}
}
return a2 / b2;
}
/* Accurate calculation of log(1+x)-x, particularly for small x. */
static double
log1pmx (double x)
{
static const double minLog1Value = -0.79149064;
static const double two = 2;
if (fabs (x) < 1.0e-2) {
double term = x / (2 + x);
double y = term * term;
return term * ((((two / 9 * y + two / 7) * y + two / 5) * y + two / 3) * y - x);
} else if (x < minLog1Value || x > 1) {
return log1p (x) - x;
} else {
double term = x / (2 + x);
double y = term * term;
return term * (2 * y * logcf (y, 3, 2) - x);
}
}
/* Calculates log(gamma(a+1) accurately for for small a (0 < a & a < 0.5). */
static double
lgamma1p (double a)
{
const double eulers_const = 0.5772156649015328606065120900824024;
/* coeffs[i] holds (zeta(i+2)-1)/(i+2) */
static const double coeffs[40] = {
0.3224670334241132182362075833230126e-0,
0.6735230105319809513324605383715000e-1,
0.2058080842778454787900092413529198e-1,
0.7385551028673985266273097291406834e-2,
0.2890510330741523285752988298486755e-2,
0.1192753911703260977113935692828109e-2,
0.5096695247430424223356548135815582e-3,
0.2231547584535793797614188036013401e-3,
0.9945751278180853371459589003190170e-4,
0.4492623673813314170020750240635786e-4,
0.2050721277567069155316650397830591e-4,
0.9439488275268395903987425104415055e-5,
0.4374866789907487804181793223952411e-5,
0.2039215753801366236781900709670839e-5,
0.9551412130407419832857179772951265e-6,
0.4492469198764566043294290331193655e-6,
0.2120718480555466586923135901077628e-6,
0.1004322482396809960872083050053344e-6,
0.4769810169363980565760193417246730e-7,
0.2271109460894316491031998116062124e-7,
0.1083865921489695409107491757968159e-7,
0.5183475041970046655121248647057669e-8,
0.2483674543802478317185008663991718e-8,
0.1192140140586091207442548202774640e-8,
0.5731367241678862013330194857961011e-9,
0.2759522885124233145178149692816341e-9,
0.1330476437424448948149715720858008e-9,
0.6422964563838100022082448087644648e-10,
0.3104424774732227276239215783404066e-10,
0.1502138408075414217093301048780668e-10,
0.7275974480239079662504549924814047e-11,
0.3527742476575915083615072228655483e-11,
0.1711991790559617908601084114443031e-11,
0.8315385841420284819798357793954418e-12,
0.4042200525289440065536008957032895e-12,
0.1966475631096616490411045679010286e-12,
0.9573630387838555763782200936508615e-13,
0.4664076026428374224576492565974577e-13,
0.2273736960065972320633279596737272e-13,
0.1109139947083452201658320007192334e-13
};
const int N = sizeof (coeffs) / sizeof (coeffs[0]);
const double c = 0.2273736845824652515226821577978691e-12; /* zeta(N+2)-1 */
double lgam;
int i;
if (fabs (a) >= 0.5)
return lgammafn (a + 1);
/* Abramowitz & Stegun 6.1.33 */
/* http://functions.wolfram.com/06.11.06.0008.01 */
lgam = c * logcf (-a / 2, N + 2, 1);
for (i = N - 1; i >= 0; i--)
lgam = coeffs[i] - a * lgam;
return (a * lgam - eulers_const) * a - log1pmx (a);
}
#endif /* GNUMERIC_VERSION */
/*
* Compute the log of a sum from logs of terms, i.e.,
*
* log (exp (logx) + exp (logy))
*
* without causing overflows and without throwing away large handfuls
* of accuracy.
*/
static double
logspace_add (double logx, double logy)
{
return fmax2 (logx, logy) + log1p (exp (-fabs (logx - logy)));
}
/*
* Compute the log of a difference from logs of terms, i.e.,
*
* log (exp (logx) - exp (logy))
*
* without causing overflows and without throwing away large handfuls
* of accuracy.
*/
static double
logspace_sub (double logx, double logy)
{
return logx + log1p (-exp (logy - logx));
}
static double
dpois_wrap (double x_plus_1, double lambda, int give_log)
{
#if 0
printf ("x+1=%.14g lambda=%.14g\n", x_plus_1, lambda);
#endif
if (x_plus_1 > 1)
return dpois_raw (x_plus_1 - 1, lambda, give_log);
else {
double d = dpois_raw (x_plus_1, lambda, give_log);
return give_log
? d + log (x_plus_1 / lambda)
: d * (x_plus_1 / lambda);
}
}
/*
* Abramowitz and Stegun 6.5.29 [right]
*/
static double
pgamma_smallx (double x, double alph, int lower_tail, int log_p)
{
double sum = 0, c = alph, n = 0, term;
#if 0
printf ("x:%.14g alph:%.14g\n", x, alph);
#endif
/*
* Relative to 6.5.29 all terms have been multiplied by alph
* and the first, thus being 1, is omitted.
*/
do {
n++;
c *= -x / n;
term = c / (alph + n);
sum += term;
} while (fabs (term) > DBL_EPSILON * fabs (sum));
if (lower_tail) {
double f1 = log_p ? log1p (sum) : 1 + sum;
double f2;
if (alph > 1) {
f2 = dpois_raw (alph, x, log_p);
f2 = log_p ? f2 + x : f2 * exp (x);
} else if (log_p)
f2 = alph * log (x) - lgamma1p (alph);
else
f2 = pow (x, alph) / exp (lgamma1p (alph));
return log_p ? f1 + f2 : f1 * f2;
} else {
double lf2 = alph * log (x) - lgamma1p (alph);
#if 0
printf ("1:%.14g 2:%.14g\n", alph * log (x), lgamma1p (alph));
printf ("sum=%.14g log(1+sum)=%.14g lf2=%.14g\n", sum, log1p (sum), lf2);
#endif
if (log_p)
return R_Log1_Exp (log1p (sum) + lf2);
else {
double f1m1 = sum;
double f2m1 = expm1 (lf2);
return -(f1m1 + f2m1 + f1m1 * f2m1);
}
}
}
static double
pd_upper_series (double x, double y, int log_p)
{
double term = x / y;
double sum = term;
do {
y++;
term *= x / y;
sum += term;
} while (term > sum * DBL_EPSILON);
return log_p ? log (sum) : sum;
}
static double
pd_lower_cf (double i, double d)
{
double f = 0, of;
double a1 = 0;
double b1 = 1;
double a2 = i;
double b2 = d;
double c1 = 0;
double c2 = a2;
double c3;
double c4 = b2;
while (1) {
c1++;
c2--;
c3 = c1 * c2;
c4 += 2;
a1 = c4 * a2 + c3 * a1;
b1 = c4 * b2 + c3 * b1;
c1++;
c2--;
c3 = c1 * c2;
c4 += 2;
a2 = c4 * a1 + c3 * a2;
b2 = c4 * b1 + c3 * b2;
if (b2 > scalefactor) {
a1 = a1 / scalefactor;
b1 = b1 / scalefactor;
a2 = a2 / scalefactor;
b2 = b2 / scalefactor;
}
if (b2 != 0) {
of = f;
f = a2 / b2;
if (fabs (f - of) <= DBL_EPSILON * fmin2 (1.0, fabs (f)))
return f;
}
}
}
static double
pd_lower_series (double lambda, double y)
{
double term = 1, sum = 0;
while (y >= 1 && term > sum * DBL_EPSILON) {
term *= y / lambda;
sum += term;
y--;
}
if (y != floor (y)) {
/*
* The series does not converge as the terms start getting
* bigger (besides flipping sign) for y < -lambda.
*/
double f = pd_lower_cf (y, lambda + 1 - y);
sum += term * f;
}
return sum;
}
/*
* Asymptotic expansion to calculate the probability that poisson variate
* has value <= x.
*/
static double
ppois_asymp (double x, double lambda,
int lower_tail, int log_p)
{
static const double coef15 = 1/12.0;
static const double coef25 = 1/288.0;
static const double coef35 = -139/51840.0;
static const double coef45 = -571/2488320.0;
static const double coef55 = 163879/209018880.0;
static const double coef65 = 5246819/75246796800.0;
static const double coef75 = -534703531/902961561600.0;
static const double coef1 = 2/3.0;
static const double coef2 = -4/135.0;
static const double coef3 = 8/2835.0;
static const double coef4 = 16/8505.0;
static const double coef5 = -8992/12629925.0;
static const double coef6 = -334144/492567075.0;
static const double coef7 = 698752/1477701225.0;
static const double two = 2;
double dfm, pt,s2pt,res1,res2,elfb,term;
double ig2,ig3,ig4,ig5,ig6,ig7,ig25,ig35,ig45,ig55,ig65,ig75;
double f, np, nd;
dfm = lambda - x;
pt = -x * log1pmx (dfm / x);
s2pt = sqrt (2 * pt);
if (dfm < 0) s2pt = -s2pt;
ig2 = 1.0 + pt;
term = pt * pt * 0.5;
ig3 = ig2 + term;
term *= pt / 3;
ig4 = ig3 + term;
term *= pt / 4;
ig5 = ig4 + term;
term *= pt / 5;
ig6 = ig5 + term;
term *= pt / 6;
ig7 = ig6 + term;
term = pt * (two / 3);
ig25 = 1.0 + term;
term *= pt * (two / 5);
ig35 = ig25 + term;
term *= pt * (two / 7);
ig45 = ig35 + term;
term *= pt * (two / 9);
ig55 = ig45 + term;
term *= pt * (two / 11);
ig65 = ig55 + term;
term *= pt * (two / 13);
ig75 = ig65 + term;
elfb = ((((((coef75/x + coef65)/x + coef55)/x + coef45)/x + coef35)/x + coef25)/x + coef15) + x;
res1 = ((((((ig7*coef7/x + ig6*coef6)/x + ig5*coef5)/x + ig4*coef4)/x + ig3*coef3)/x + ig2*coef2)/x + coef1)*sqrt(x);
res2 = ((((((ig75*coef75/x + ig65*coef65)/x + ig55*coef55)/x + ig45*coef45)/x + ig35*coef35)/x + ig25*coef25)/x + coef15)*s2pt;
if (!lower_tail) elfb = -elfb;
f = (res1 + res2) / elfb;
np = pnorm (s2pt, 0.0, 1.0, !lower_tail, log_p);
nd = dnorm (s2pt, 0.0, 1.0, log_p);
#if 0
printf ("f=%.14g np=%.14g nd=%.14g f*nd=%.14g\n", f, np, nd, f * nd);
#endif
if (log_p)
return (f >= 0)
? logspace_add (np, log (fabs (f)) + nd)
: logspace_sub (np, log (fabs (f)) + nd);
else
return np + f * nd;
}
static double
pgamma_raw (double x, double alph, int lower_tail, int log_p)
{
double res;
#if 0
printf ("x=%.14g alph=%.14g low=%d log=%d\n", x, alph, lower_tail, log_p);
#endif
if (x < 1) {
res = pgamma_smallx (x, alph, lower_tail, log_p);
} else if (x <= alph - 1 && x < 0.8 * (alph + 50)) {
double sum = pd_upper_series (x, alph, log_p);
double d = dpois_wrap (alph, x, log_p);
if (!lower_tail)
res = log_p
? R_Log1_Exp (d + sum)
: 1 - d * sum;
else
res = log_p ? sum + d : sum * d;
} else if (alph - 1 < x && alph < 0.8 * (x + 50)) {
double sum;
double d = dpois_wrap (alph, x, log_p);
if (alph < 1) {
double f = pd_lower_cf (alph, x - (alph - 1))
* x / alph;
sum = log_p ? log (f) : f;
} else {
sum = pd_lower_series (x, alph - 1);
sum = log_p ? log1p (sum) : 1 + sum;
}
if (!lower_tail)
res = log_p ? sum + d : sum * d;
else
res = log_p
? R_Log1_Exp (d + sum)
: 1 - d * sum;
} else {
res = ppois_asymp (alph - 1, x, !lower_tail, log_p);
}
/*
* We lose a fair amount of accuracy to underflow in the cases
* where the final result is very close to DBL_MIN. In those
* cases, simply redo via log space.
*/
if (!log_p && res < DBL_MIN / DBL_EPSILON)
return exp (pgamma_raw (x, alph, lower_tail, 1));
else
return res;
}
double pgamma(double x, double alph, double scale, int lower_tail, int log_p)
{
#ifdef IEEE_754
if (ISNAN(x) || ISNAN(alph) || ISNAN(scale))
return x + alph + scale;
#endif
if(alph <= 0. || scale <= 0.)
ML_ERR_return_NAN;
if (x <= 0.)
return R_DT_0;
x /= scale;
#ifdef IEEE_754
if (ISNAN(x)) /* eg. original x = scale = +Inf */
return x;
#endif
return pgamma_raw (x, alph, lower_tail, log_p);
}
More information about the R-devel
mailing list