htswanalysis/MACS/lib/gsl/gsl-1.11/randist/binomial_tpe.c

   1 /* randist/binomial_tpe.c
   2  *
   3  * Copyright (C) 1996, 2003, 2007 James Theiler, Brian Gough
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 3 of the License, or (at
   8  * your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  */
  19
  20 #include <config.h>
  21 #include <math.h>
  22 #include <gsl/gsl_rng.h>
  23 #include <gsl/gsl_randist.h>
  24 #include <gsl/gsl_pow_int.h>
  25 #include <gsl/gsl_sf_gamma.h>
  26
  27 /* The binomial distribution has the form,
  28
  29    f(x) =  n!/(x!(n-x)!) * p^x (1-p)^(n-x) for integer 0 <= x <= n
  30         =  0                               otherwise
  31
  32    This implementation follows the public domain ranlib function
  33    "ignbin", the bulk of which is the BTPE (Binomial Triangle
  34    Parallelogram Exponential) algorithm introduced in
  35    Kachitvichyanukul and Schmeiser[1].  It has been translated to use
  36    modern C coding standards.
  37
  38    If n is small and/or p is near 0 or near 1 (specifically, if
  39    n*min(p,1-p) < SMALL_MEAN), then a different algorithm, called
  40    BINV, is used which has an average runtime that scales linearly
  41    with n*min(p,1-p).
  42
  43    But for larger problems, the BTPE algorithm takes the form of two
  44    functions b(x) and t(x) -- "bottom" and "top" -- for which b(x) <
  45    f(x)/f(M) < t(x), with M = floor(n*p+p).  b(x) defines a triangular
  46    region, and t(x) includes a parallelogram and two tails.  Details
  47    (including a nice drawing) are in the paper.
  48
  49    [1] Kachitvichyanukul, V. and Schmeiser, B. W.  Binomial Random
  50    Variate Generation.  Communications of the ACM, 31, 2 (February,
  51    1988) 216.
  52
  53    Note, Bruce Schmeiser (personal communication) points out that if
  54    you want very fast binomial deviates, and you are happy with
  55    approximate results, and/or n and n*p are both large, then you can
  56    just use gaussian estimates: mean=n*p, variance=n*p*(1-p).
  57
  58    This implementation by James Theiler, April 2003, after obtaining
  59    permission -- and some good advice -- from Drs. Kachitvichyanukul
  60    and Schmeiser to use their code as a starting point, and then doing
  61    a little bit of tweaking.
  62
  63    Additional polishing for GSL coding standards by Brian Gough.  */
  64
  65 #define SMALL_MEAN 14           /* If n*p < SMALL_MEAN then use BINV
  66                                    algorithm. The ranlib
  67                                    implementation used cutoff=30; but
  68                                    on my computer 14 works better */
  69
  70 #define BINV_CUTOFF 110         /* In BINV, do not permit ix too large */
  71
  72 #define FAR_FROM_MEAN 20        /* If ix-n*p is larger than this, then
  73                                    use the "squeeze" algorithm.
  74                                    Ranlib used 20, and this seems to
  75                                    be the best choice on my machine as
  76                                    well */
  77
  78 #define LNFACT(x) gsl_sf_lnfact(x)
  79
  80 inline static double
  81 Stirling (double y1)
  82 {
  83   double y2 = y1 * y1;
  84   double s =
  85     (13860.0 -
  86      (462.0 - (132.0 - (99.0 - 140.0 / y2) / y2) / y2) / y2) / y1 / 166320.0;
  87   return s;
  88 }
  89
  90 unsigned int
  91 gsl_ran_binomial_tpe (const gsl_rng * rng, double p, unsigned int n)
  92 {
  93   return gsl_ran_binomial (rng, p, n);
  94 }
  95
  96 unsigned int
  97 gsl_ran_binomial (const gsl_rng * rng, double p, unsigned int n)
  98 {
  99   int ix;                       /* return value */
 100   int flipped = 0;
 101   double q, s, np;
 102
 103   if (n == 0)
 104     return 0;
 105
 106   if (p > 0.5)
 107     {
 108       p = 1.0 - p;              /* work with small p */
 109       flipped = 1;
 110     }
 111
 112   q = 1 - p;
 113   s = p / q;
 114   np = n * p;
 115
 116   /* Inverse cdf logic for small mean (BINV in K+S) */
 117
 118   if (np < SMALL_MEAN)
 119     {
 120       double f0 = gsl_pow_int (q, n);   /* f(x), starting with x=0 */
 121
 122       while (1)
 123         {
 124           /* This while(1) loop will almost certainly only loop once; but
 125            * if u=1 to within a few epsilons of machine precision, then it
 126            * is possible for roundoff to prevent the main loop over ix to
 127            * achieve its proper value.  following the ranlib implementation,
 128            * we introduce a check for that situation, and when it occurs,
 129            * we just try again.
 130            */
 131
 132           double f = f0;
 133           double u = gsl_rng_uniform (rng);
 134
 135           for (ix = 0; ix <= BINV_CUTOFF; ++ix)
 136             {
 137               if (u < f)
 138                 goto Finish;
 139               u -= f;
 140               /* Use recursion f(x+1) = f(x)*[(n-x)/(x+1)]*[p/(1-p)] */
 141               f *= s * (n - ix) / (ix + 1);
 142             }
 143
 144           /* It should be the case that the 'goto Finish' was encountered
 145            * before this point was ever reached.  But if we have reached
 146            * this point, then roundoff has prevented u from decreasing
 147            * all the way to zero.  This can happen only if the initial u
 148            * was very nearly equal to 1, which is a rare situation.  In
 149            * that rare situation, we just try again.
 150            *
 151            * Note, following the ranlib implementation, we loop ix only to
 152            * a hardcoded value of SMALL_MEAN_LARGE_N=110; we could have
 153            * looped to n, and 99.99...% of the time it won't matter.  This
 154            * choice, I think is a little more robust against the rare
 155            * roundoff error.  If n>LARGE_N, then it is technically
 156            * possible for ix>LARGE_N, but it is astronomically rare, and
 157            * if ix is that large, it is more likely due to roundoff than
 158            * probability, so better to nip it at LARGE_N than to take a
 159            * chance that roundoff will somehow conspire to produce an even
 160            * larger (and more improbable) ix.  If n<LARGE_N, then once
 161            * ix=n, f=0, and the loop will continue until ix=LARGE_N.
 162            */
 163         }
 164     }
 165   else
 166     {
 167       /* For n >= SMALL_MEAN, we invoke the BTPE algorithm */
 168
 169       int k;
 170
 171       double ffm = np + p;      /* ffm = n*p+p             */
 172       int m = (int) ffm;        /* m = int floor[n*p+p]    */
 173       double fm = m;            /* fm = double m;          */
 174       double xm = fm + 0.5;     /* xm = half integer mean (tip of triangle)  */
 175       double npq = np * q;      /* npq = n*p*q            */
 176
 177       /* Compute cumulative area of tri, para, exp tails */
 178
 179       /* p1: radius of triangle region; since height=1, also: area of region */
 180       /* p2: p1 + area of parallelogram region */
 181       /* p3: p2 + area of left tail */
 182       /* p4: p3 + area of right tail */
 183       /* pi/p4: probability of i'th area (i=1,2,3,4) */
 184
 185       /* Note: magic numbers 2.195, 4.6, 0.134, 20.5, 15.3 */
 186       /* These magic numbers are not adjustable...at least not easily! */
 187
 188       double p1 = floor (2.195 * sqrt (npq) - 4.6 * q) + 0.5;
 189
 190       /* xl, xr: left and right edges of triangle */
 191       double xl = xm - p1;
 192       double xr = xm + p1;
 193
 194       /* Parameter of exponential tails */
 195       /* Left tail:  t(x) = c*exp(-lambda_l*[xl - (x+0.5)]) */
 196       /* Right tail: t(x) = c*exp(-lambda_r*[(x+0.5) - xr]) */
 197
 198       double c = 0.134 + 20.5 / (15.3 + fm);
 199       double p2 = p1 * (1.0 + c + c);
 200
 201       double al = (ffm - xl) / (ffm - xl * p);
 202       double lambda_l = al * (1.0 + 0.5 * al);
 203       double ar = (xr - ffm) / (xr * q);
 204       double lambda_r = ar * (1.0 + 0.5 * ar);
 205       double p3 = p2 + c / lambda_l;
 206       double p4 = p3 + c / lambda_r;
 207
 208       double var, accept;
 209       double u, v;              /* random variates */
 210
 211     TryAgain:
 212
 213       /* generate random variates, u specifies which region: Tri, Par, Tail */
 214       u = gsl_rng_uniform (rng) * p4;
 215       v = gsl_rng_uniform (rng);
 216
 217       if (u <= p1)
 218         {
 219           /* Triangular region */
 220           ix = (int) (xm - p1 * v + u);
 221           goto Finish;
 222         }
 223       else if (u <= p2)
 224         {
 225           /* Parallelogram region */
 226           double x = xl + (u - p1) / c;
 227           v = v * c + 1.0 - fabs (x - xm) / p1;
 228           if (v > 1.0 || v <= 0.0)
 229             goto TryAgain;
 230           ix = (int) x;
 231         }
 232       else if (u <= p3)
 233         {
 234           /* Left tail */
 235           ix = (int) (xl + log (v) / lambda_l);
 236           if (ix < 0)
 237             goto TryAgain;
 238           v *= ((u - p2) * lambda_l);
 239         }
 240       else
 241         {
 242           /* Right tail */
 243           ix = (int) (xr - log (v) / lambda_r);
 244           if (ix > (double) n)
 245             goto TryAgain;
 246           v *= ((u - p3) * lambda_r);
 247         }
 248
 249       /* At this point, the goal is to test whether v <= f(x)/f(m)
 250        *
 251        *  v <= f(x)/f(m) = (m!(n-m)! / (x!(n-x)!)) * (p/q)^{x-m}
 252        *
 253        */
 254
 255       /* Here is a direct test using logarithms.  It is a little
 256        * slower than the various "squeezing" computations below, but
 257        * if things are working, it should give exactly the same answer
 258        * (given the same random number seed).  */
 259
 260 #ifdef DIRECT
 261       var = log (v);
 262
 263       accept =
 264         LNFACT (m) + LNFACT (n - m) - LNFACT (ix) - LNFACT (n - ix)
 265         + (ix - m) * log (p / q);
 266
 267 #else /* SQUEEZE METHOD */
 268
 269       /* More efficient determination of whether v < f(x)/f(M) */
 270
 271       k = abs (ix - m);
 272
 273       if (k <= FAR_FROM_MEAN)
 274         {
 275           /*
 276            * If ix near m (ie, |ix-m|<FAR_FROM_MEAN), then do
 277            * explicit evaluation using recursion relation for f(x)
 278            */
 279           double g = (n + 1) * s;
 280           double f = 1.0;
 281
 282           var = v;
 283
 284           if (m < ix)
 285             {
 286               int i;
 287               for (i = m + 1; i <= ix; i++)
 288                 {
 289                   f *= (g / i - s);
 290                 }
 291             }
 292           else if (m > ix)
 293             {
 294               int i;
 295               for (i = ix + 1; i <= m; i++)
 296                 {
 297                   f /= (g / i - s);
 298                 }
 299             }
 300
 301           accept = f;
 302         }
 303       else
 304         {
 305           /* If ix is far from the mean m: k=ABS(ix-m) large */
 306
 307           var = log (v);
 308
 309           if (k < npq / 2 - 1)
 310             {
 311               /* "Squeeze" using upper and lower bounds on
 312                * log(f(x)) The squeeze condition was derived
 313                * under the condition k < npq/2-1 */
 314               double amaxp =
 315                 k / npq * ((k * (k / 3.0 + 0.625) + (1.0 / 6.0)) / npq + 0.5);
 316               double ynorm = -(k * k / (2.0 * npq));
 317               if (var < ynorm - amaxp)
 318                 goto Finish;
 319               if (var > ynorm + amaxp)
 320                 goto TryAgain;
 321             }
 322
 323           /* Now, again: do the test log(v) vs. log f(x)/f(M) */
 324
 325 #if USE_EXACT
 326           /* This is equivalent to the above, but is a little (~20%) slower */
 327           /* There are five log's vs three above, maybe that's it? */
 328
 329           accept = LNFACT (m) + LNFACT (n - m)
 330             - LNFACT (ix) - LNFACT (n - ix) + (ix - m) * log (p / q);
 331
 332 #else /* USE STIRLING */
 333           /* The "#define Stirling" above corresponds to the first five
 334            * terms in asymptoic formula for
 335            * log Gamma (y) - (y-0.5)log(y) + y - 0.5 log(2*pi);
 336            * See Abramowitz and Stegun, eq 6.1.40
 337            */
 338
 339           /* Note below: two Stirling's are added, and two are
 340            * subtracted.  In both K+S, and in the ranlib
 341            * implementation, all four are added.  I (jt) believe that
 342            * is a mistake -- this has been confirmed by personal
 343            * correspondence w/ Dr. Kachitvichyanukul.  Note, however,
 344            * the corrections are so small, that I couldn't find an
 345            * example where it made a difference that could be
 346            * observed, let alone tested.  In fact, define'ing Stirling
 347            * to be zero gave identical results!!  In practice, alv is
 348            * O(1), ranging 0 to -10 or so, while the Stirling
 349            * correction is typically O(10^{-5}) ...setting the
 350            * correction to zero gives about a 2% performance boost;
 351            * might as well keep it just to be pendantic.  */
 352
 353           {
 354             double x1 = ix + 1.0;
 355             double w1 = n - ix + 1.0;
 356             double f1 = fm + 1.0;
 357             double z1 = n + 1.0 - fm;
 358
 359             accept = xm * log (f1 / x1) + (n - m + 0.5) * log (z1 / w1)
 360               + (ix - m) * log (w1 * p / (x1 * q))
 361               + Stirling (f1) + Stirling (z1) - Stirling (x1) - Stirling (w1);
 362           }
 363 #endif
 364 #endif
 365         }
 366
 367
 368       if (var <= accept)
 369         {
 370           goto Finish;
 371         }
 372       else
 373         {
 374           goto TryAgain;
 375         }
 376     }
 377
 378 Finish:
 379
 380   return (flipped) ? (n - ix) : (unsigned int)ix;
 381 }