1 /* randist/binomial_tpe.c
3 * Copyright (C) 1996, 2003, 2007 James Theiler, Brian Gough
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or (at
8 * your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 #include <gsl/gsl_rng.h>
23 #include <gsl/gsl_randist.h>
24 #include <gsl/gsl_pow_int.h>
25 #include <gsl/gsl_sf_gamma.h>
27 /* The binomial distribution has the form,
29 f(x) = n!/(x!(n-x)!) * p^x (1-p)^(n-x) for integer 0 <= x <= n
32 This implementation follows the public domain ranlib function
33 "ignbin", the bulk of which is the BTPE (Binomial Triangle
34 Parallelogram Exponential) algorithm introduced in
35 Kachitvichyanukul and Schmeiser[1]. It has been translated to use
36 modern C coding standards.
38 If n is small and/or p is near 0 or near 1 (specifically, if
39 n*min(p,1-p) < SMALL_MEAN), then a different algorithm, called
40 BINV, is used which has an average runtime that scales linearly
43 But for larger problems, the BTPE algorithm takes the form of two
44 functions b(x) and t(x) -- "bottom" and "top" -- for which b(x) <
45 f(x)/f(M) < t(x), with M = floor(n*p+p). b(x) defines a triangular
46 region, and t(x) includes a parallelogram and two tails. Details
47 (including a nice drawing) are in the paper.
49 [1] Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random
50 Variate Generation. Communications of the ACM, 31, 2 (February,
53 Note, Bruce Schmeiser (personal communication) points out that if
54 you want very fast binomial deviates, and you are happy with
55 approximate results, and/or n and n*p are both large, then you can
56 just use gaussian estimates: mean=n*p, variance=n*p*(1-p).
58 This implementation by James Theiler, April 2003, after obtaining
59 permission -- and some good advice -- from Drs. Kachitvichyanukul
60 and Schmeiser to use their code as a starting point, and then doing
61 a little bit of tweaking.
63 Additional polishing for GSL coding standards by Brian Gough. */
65 #define SMALL_MEAN 14 /* If n*p < SMALL_MEAN then use BINV
67 implementation used cutoff=30; but
68 on my computer 14 works better */
70 #define BINV_CUTOFF 110 /* In BINV, do not permit ix too large */
72 #define FAR_FROM_MEAN 20 /* If ix-n*p is larger than this, then
73 use the "squeeze" algorithm.
74 Ranlib used 20, and this seems to
75 be the best choice on my machine as
78 #define LNFACT(x) gsl_sf_lnfact(x)
86 (462.0 - (132.0 - (99.0 - 140.0 / y2) / y2) / y2) / y2) / y1 / 166320.0;
91 gsl_ran_binomial_tpe (const gsl_rng * rng, double p, unsigned int n)
93 return gsl_ran_binomial (rng, p, n);
97 gsl_ran_binomial (const gsl_rng * rng, double p, unsigned int n)
99 int ix; /* return value */
108 p = 1.0 - p; /* work with small p */
116 /* Inverse cdf logic for small mean (BINV in K+S) */
120 double f0 = gsl_pow_int (q, n); /* f(x), starting with x=0 */
124 /* This while(1) loop will almost certainly only loop once; but
125 * if u=1 to within a few epsilons of machine precision, then it
126 * is possible for roundoff to prevent the main loop over ix to
127 * achieve its proper value. following the ranlib implementation,
128 * we introduce a check for that situation, and when it occurs,
133 double u = gsl_rng_uniform (rng);
135 for (ix = 0; ix <= BINV_CUTOFF; ++ix)
140 /* Use recursion f(x+1) = f(x)*[(n-x)/(x+1)]*[p/(1-p)] */
141 f *= s * (n - ix) / (ix + 1);
144 /* It should be the case that the 'goto Finish' was encountered
145 * before this point was ever reached. But if we have reached
146 * this point, then roundoff has prevented u from decreasing
147 * all the way to zero. This can happen only if the initial u
148 * was very nearly equal to 1, which is a rare situation. In
149 * that rare situation, we just try again.
151 * Note, following the ranlib implementation, we loop ix only to
152 * a hardcoded value of SMALL_MEAN_LARGE_N=110; we could have
153 * looped to n, and 99.99...% of the time it won't matter. This
154 * choice, I think is a little more robust against the rare
155 * roundoff error. If n>LARGE_N, then it is technically
156 * possible for ix>LARGE_N, but it is astronomically rare, and
157 * if ix is that large, it is more likely due to roundoff than
158 * probability, so better to nip it at LARGE_N than to take a
159 * chance that roundoff will somehow conspire to produce an even
160 * larger (and more improbable) ix. If n<LARGE_N, then once
161 * ix=n, f=0, and the loop will continue until ix=LARGE_N.
167 /* For n >= SMALL_MEAN, we invoke the BTPE algorithm */
171 double ffm = np + p; /* ffm = n*p+p */
172 int m = (int) ffm; /* m = int floor[n*p+p] */
173 double fm = m; /* fm = double m; */
174 double xm = fm + 0.5; /* xm = half integer mean (tip of triangle) */
175 double npq = np * q; /* npq = n*p*q */
177 /* Compute cumulative area of tri, para, exp tails */
179 /* p1: radius of triangle region; since height=1, also: area of region */
180 /* p2: p1 + area of parallelogram region */
181 /* p3: p2 + area of left tail */
182 /* p4: p3 + area of right tail */
183 /* pi/p4: probability of i'th area (i=1,2,3,4) */
185 /* Note: magic numbers 2.195, 4.6, 0.134, 20.5, 15.3 */
186 /* These magic numbers are not adjustable...at least not easily! */
188 double p1 = floor (2.195 * sqrt (npq) - 4.6 * q) + 0.5;
190 /* xl, xr: left and right edges of triangle */
194 /* Parameter of exponential tails */
195 /* Left tail: t(x) = c*exp(-lambda_l*[xl - (x+0.5)]) */
196 /* Right tail: t(x) = c*exp(-lambda_r*[(x+0.5) - xr]) */
198 double c = 0.134 + 20.5 / (15.3 + fm);
199 double p2 = p1 * (1.0 + c + c);
201 double al = (ffm - xl) / (ffm - xl * p);
202 double lambda_l = al * (1.0 + 0.5 * al);
203 double ar = (xr - ffm) / (xr * q);
204 double lambda_r = ar * (1.0 + 0.5 * ar);
205 double p3 = p2 + c / lambda_l;
206 double p4 = p3 + c / lambda_r;
209 double u, v; /* random variates */
213 /* generate random variates, u specifies which region: Tri, Par, Tail */
214 u = gsl_rng_uniform (rng) * p4;
215 v = gsl_rng_uniform (rng);
219 /* Triangular region */
220 ix = (int) (xm - p1 * v + u);
225 /* Parallelogram region */
226 double x = xl + (u - p1) / c;
227 v = v * c + 1.0 - fabs (x - xm) / p1;
228 if (v > 1.0 || v <= 0.0)
235 ix = (int) (xl + log (v) / lambda_l);
238 v *= ((u - p2) * lambda_l);
243 ix = (int) (xr - log (v) / lambda_r);
246 v *= ((u - p3) * lambda_r);
249 /* At this point, the goal is to test whether v <= f(x)/f(m)
251 * v <= f(x)/f(m) = (m!(n-m)! / (x!(n-x)!)) * (p/q)^{x-m}
255 /* Here is a direct test using logarithms. It is a little
256 * slower than the various "squeezing" computations below, but
257 * if things are working, it should give exactly the same answer
258 * (given the same random number seed). */
264 LNFACT (m) + LNFACT (n - m) - LNFACT (ix) - LNFACT (n - ix)
265 + (ix - m) * log (p / q);
267 #else /* SQUEEZE METHOD */
269 /* More efficient determination of whether v < f(x)/f(M) */
273 if (k <= FAR_FROM_MEAN)
276 * If ix near m (ie, |ix-m|<FAR_FROM_MEAN), then do
277 * explicit evaluation using recursion relation for f(x)
279 double g = (n + 1) * s;
287 for (i = m + 1; i <= ix; i++)
295 for (i = ix + 1; i <= m; i++)
305 /* If ix is far from the mean m: k=ABS(ix-m) large */
311 /* "Squeeze" using upper and lower bounds on
312 * log(f(x)) The squeeze condition was derived
313 * under the condition k < npq/2-1 */
315 k / npq * ((k * (k / 3.0 + 0.625) + (1.0 / 6.0)) / npq + 0.5);
316 double ynorm = -(k * k / (2.0 * npq));
317 if (var < ynorm - amaxp)
319 if (var > ynorm + amaxp)
323 /* Now, again: do the test log(v) vs. log f(x)/f(M) */
326 /* This is equivalent to the above, but is a little (~20%) slower */
327 /* There are five log's vs three above, maybe that's it? */
329 accept = LNFACT (m) + LNFACT (n - m)
330 - LNFACT (ix) - LNFACT (n - ix) + (ix - m) * log (p / q);
332 #else /* USE STIRLING */
333 /* The "#define Stirling" above corresponds to the first five
334 * terms in asymptoic formula for
335 * log Gamma (y) - (y-0.5)log(y) + y - 0.5 log(2*pi);
336 * See Abramowitz and Stegun, eq 6.1.40
339 /* Note below: two Stirling's are added, and two are
340 * subtracted. In both K+S, and in the ranlib
341 * implementation, all four are added. I (jt) believe that
342 * is a mistake -- this has been confirmed by personal
343 * correspondence w/ Dr. Kachitvichyanukul. Note, however,
344 * the corrections are so small, that I couldn't find an
345 * example where it made a difference that could be
346 * observed, let alone tested. In fact, define'ing Stirling
347 * to be zero gave identical results!! In practice, alv is
348 * O(1), ranging 0 to -10 or so, while the Stirling
349 * correction is typically O(10^{-5}) ...setting the
350 * correction to zero gives about a 2% performance boost;
351 * might as well keep it just to be pendantic. */
354 double x1 = ix + 1.0;
355 double w1 = n - ix + 1.0;
356 double f1 = fm + 1.0;
357 double z1 = n + 1.0 - fm;
359 accept = xm * log (f1 / x1) + (n - m + 0.5) * log (z1 / w1)
360 + (ix - m) * log (w1 * p / (x1 * q))
361 + Stirling (f1) + Stirling (z1) - Stirling (x1) - Stirling (w1);
380 return (flipped) ? (n - ix) : (unsigned int)ix;