lcsim-math/src/main/java/org/lcsim/util/probability
diff -N BivariateDistribution.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ BivariateDistribution.java 30 Nov 2010 23:53:24 -0000 1.1
@@ -0,0 +1,260 @@
+/*
+ * Class BivariateDistribution
+ */
+package org.lcsim.util.probability;
+
+import org.lcsim.util.probability.Erf;
+
+/**
+ * Calculate the probability integral for a set of bins in the x-y plane
+ * of a bivariate normal distribution (i.e., a 2D Gaussian probability).
+ *<p>
+ * The evaluation of the probability integrals is described in:
+ *<p>
+ * Alan Genz, "Numerical Computation of Rectangular Bivariate and Trivariate
+ * Normal and t Probabilities" in Statistics and Computing 14, 151 (2004).
+ *<p>
+ * The integration code is adapted from the FORTRAN source at:
+ *<p>
+ * http://www.math.wsu.edu/faculty/genz/homepage
+ *<p>
+ * @author Richard Partridge
+ */
+public class BivariateDistribution {
+
+ private int _nx;
+ private int _ny;
+ private double _xmin;
+ private double _ymin;
+ private double _dx;
+ private double _dy;
+ private double[] _h;
+ private double[] _k;
+
+ // Weights and coordinates for 6 point Gauss-Legendre integration
+ private double[] _w6 = {0.1713244923791705, 0.3607615730481384, 0.4679139345726904};
+ private double[] _x6 = {0.9324695142031522, 0.6612093864662647, 0.2386191860831970};
+
+ // Weights and coordinates for 12 point Gauss-Legendre integration
+ private double[] _w12 = {.04717533638651177, 0.1069393259953183, 0.1600783285433464,
+ 0.2031674267230659, 0.2334925365383547, 0.2491470458134029};
+ private double[] _x12 = {0.9815606342467191, 0.9041172563704750, 0.7699026741943050,
+ 0.5873179542866171, 0.3678314989981802, 0.1252334085114692};
+
+ // Weights and coordinates for 20 point Gauss-Legendre integration
+ private double[] _w20 = {.01761400713915212, .04060142980038694, .06267204833410906,
+ .08327674157670475, 0.1019301198172404, 0.1181945319615184,
+ 0.1316886384491766, 0.1420961093183821, 0.1491729864726037,
+ 0.1527533871307259};
+ private double[] _x20 = {0.9931285991850949, 0.9639719272779138, 0.9122344282513259,
+ 0.8391169718222188, 0.7463319064601508, 0.6360536807265150,
+ 0.5108670019508271, 0.3737060887154196, 0.2277858511416451,
+ 0.07652652113349733};
+
+ /**
+ * Set the locations of the x-coordinate bins
+ *
+ * @param nx number of x coordinate bins
+ * @param xmin minimum x coordinate
+ * @param dx width of x coordinate bins
+ */
+ public void xBins(int nx, double xmin, double dx) {
+ _nx = nx;
+ _xmin = xmin;
+ _dx = dx;
+ _h = new double[_nx + 1];
+ }
+
+ /**
+ * Set the locations of the y-coordinate bins
+ *
+ * @param ny number of y coordinate bins
+ * @param ymin minimum y coordinate
+ * @param dy width of y coordinate bins
+ */
+ public void yBins(int ny, double ymin, double dy) {
+ _ny = ny;
+ _ymin = ymin;
+ _dy = dy;
+ _k = new double[_ny + 1];
+ }
+
+ /**
+ * Integrate the Gaussian probability distribution over each x-y bins,
+ * which must be defined before calling this method.
+ * <p>
+ * The output is a double array that gives the binned probability
+ * distribution. The first array index is used to indicate the bin in x
+ * and the second array index is used to indicate the bin in y.
+ * <p>
+ * @param x0 mean x coordinate of Gaussian distribution
+ * @param y0 mean y coordinate of Gaussian distribution
+ * @param sigx x coordinate standard deviation
+ * @param sigy y coordinate standard deviation
+ * @param rho x-y correlation coefficient
+ * @return probability distribution
+ */
+ public double[][] Calculate(double x0, double y0, double sigx, double sigy,
+ double rho) {
+
+ // Calculate the scaled x coordinate for each bin edge
+ for (int i = 0; i < _nx + 1; i++) {
+ _h[i] = (_xmin + i * _dx - x0) / sigx;
+ }
+
+ // Calculate the scaled y coordinate for each bin edge
+ for (int j = 0; j < _ny + 1; j++) {
+ _k[j] = (_ymin + j * _dy - y0) / sigy;
+ }
+
+ // Create the array that will hold the binned probabilities
+ double[][] bi = new double[_nx][_ny];
+
+ // Loop over the bin vertices
+ for (int i = 0; i < _nx + 1; i++) {
+ for (int j = 0; j < _ny + 1; j++) {
+
+ // Calculate the probability for x>h and y>k for this vertex
+ double prob = GenzCalc(_h[i], _k[j], rho);
+
+ // Add or subtract this probability from the affected bins.
+ // The bin probability for bin (0,0) is the sum of the Genz
+ // probabilities for the (0,0) and (1,1) vertices MINUS the
+ // sum of the probabilities for the (0,1) and (1,0) vertices
+ if (i > 0 && j > 0) {
+ bi[i - 1][j - 1] += prob;
+ }
+ if (i > 0 && j < _ny) {
+ bi[i - 1][j] -= prob;
+ }
+ if (i < _nx && j > 0) {
+ bi[i][j - 1] -= prob;
+ }
+ if (i < _nx && j < _ny) {
+ bi[i][j] += prob;
+ }
+ }
+ }
+
+ return bi;
+ }
+
+ private double GenzCalc(double dh, double dk, double rho) {
+
+ double twopi = 2. * Math.PI;
+
+ // Declare the Gauss-Legendre constants
+ int ng;
+ double[] w;
+ double[] x;
+
+ if (Math.abs(rho) < 0.3) {
+ // for rho < 0.3 use 6 point Gauss-Legendre integration
+ ng = 3;
+ w = _w6;
+ x = _x6;
+ } else if (Math.abs(rho) < 0.75) {
+ // for 0.3 < rho < 0.75 use 12 point Gauss-Legendre integration
+ ng = 6;
+ w = _w12;
+ x = _x12;
+ } else {
+ // for rho > 0.75 use 20 point Gauss-Legendre integration
+ ng = 10;
+ w = _w20;
+ x = _x20;
+ }
+
+ // Initialize the probability and some local variables
+ double bvn = 0.;
+ double h = dh;
+ double k = dk;
+ double hk = h * k;
+
+ // For rho < 0.925, integrate equation 3 in the Genz paper
+ if (Math.abs(rho) < 0.925) {
+
+ // More or less direct port of Genz code follows
+ // It is fairly easy to match this calculation against equation 3 of
+ // Genz's paper if you take into account that you need to change
+ // variables so the integration argument spans the range -1 to 1
+ double hs = (h * h + k * k) / 2.;
+ double asr = Math.asin(rho);
+ double sn;
+ for (int i = 0; i < ng; i++) {
+ sn = Math.sin(asr * (1 - x[i]) / 2.);
+ bvn += w[i] * Math.exp((sn * hk - hs) / (1 - sn * sn));
+ sn = Math.sin(asr * (1 + x[i]) / 2.);
+ bvn += w[i] * Math.exp((sn * hk - hs) / (1 - sn * sn));
+ }
+ // The factor of asr/2 comes from changing variables so the
+ // integration is over the range -1 to 1 instead of 0 - asin(rho)
+ bvn = bvn * asr / (2. * twopi) + Erf.phi(-h) * Erf.phi(-k);
+
+ } else {
+ // rho > 0.925 - integrate equation 6 in Genz paper with the
+ // extra term in the Taylor expansion given in equation 7.
+ // The rest of this code is pretty dense and is a pretty direct
+ // port of Genz's code.
+
+ if (rho < 0.) {
+ k = -k;
+ hk = -hk;
+ }
+
+ if (Math.abs(rho) < 1.) {
+
+ double as = (1 - rho) * (1 + rho);
+ double a = Math.sqrt(as);
+ double bs = (h - k) * (h - k);
+ double c = (4. - hk) / 8.;
+ double d = (12. - hk) / 16.;
+ double asr = -(bs / as + hk) / 2.;
+
+ if (asr > -100.) {
+ bvn = a * Math.exp(asr) *
+ (1. - c * (bs - as) * (1. - d * bs / 5.) / 3. +
+ c * d * as * as / 5.);
+ }
+
+ if (-hk < 100.) {
+ double b = Math.sqrt(bs);
+ bvn -= Math.exp(-hk / 2.) * Math.sqrt(twopi) * Erf.phi(-b / a) *
+ b * (1 - c * bs * (1 - d * bs / 5.) / 3.);
+ }
+
+ a = a / 2.;
+ for (int i = 0; i < ng; i++) {
+ for (int j = 0; j < 2; j++) {
+ int is = -1;
+ if (j > 0) {
+ is = 1;
+ }
+ double xs = Math.pow(a * (is * x[i] + 1), 2);
+ double rs = Math.sqrt(1 - xs);
+ asr = -(bs / xs + hk) / 2;
+
+ if (asr > -100) {
+ double sp = (1 + c * xs * (1 + d * xs));
+ double ep = Math.exp(-hk * (1 - rs) / (2 * (1 + rs))) / rs;
+ bvn += a * w[i] * Math.exp(asr) * (ep - sp);
+ }
+ }
+ }
+
+ bvn = -bvn / twopi;
+ }
+
+ if (rho > 0) {
+ bvn = bvn + Erf.phi(-Math.max(h, k));
+ } else {
+ bvn = -bvn;
+ if (k > h) {
+ bvn += Erf.phi(k) - Erf.phi(h);
+ }
+ }
+ }
+
+ return Math.max(0, Math.min(1, bvn));
+ }
+}
\ No newline at end of file
lcsim-math/src/main/java/org/lcsim/util/probability
diff -N Erf.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ Erf.java 30 Nov 2010 23:53:25 -0000 1.1
@@ -0,0 +1,283 @@
+/*
+ * Class Erf
+ *
+ */
+package org.lcsim.util.probability;
+
+/**
+ *
+ * Calculates the following probability integrals:
+ *<p>
+ * erf(x) <br>
+ * erfc(x) = 1 - erf(x) <br>
+ * phi(x) = 0.5 * erfc(-x/sqrt(2)) <br>
+ * phic(x) = 0.5 * erfc(x/sqrt(2))
+ *<p>
+ * Note that phi(x) gives the probability for an observation smaller than x for
+ * a Gaussian probability distribution with zero mean and unit standard
+ * deviation, while phic(x) gives the probability for an observation larger
+ * than x.
+ *<p>
+ * The algorithms for erf(x) and erfc(x) are based on Schonfelder's work.
+ * See J.L. Schonfelder, "Chebyshev Expansions for the Error and Related
+ * Functions", Math. Comp. 32, 1232 (1978). The calculations of phi(x)
+ * and phic(x) are trivially calculated using the erfc(x) algorithm.
+ *<p>
+ * Schonfelder's algorithms are advertised to provide "30 digit" accurracy.
+ * Since this level of accuracy exceeds the machine precision for doubles,
+ * summation terms whose relative weight is below machine precision are
+ * dropped.
+ *<p>
+ * In this algorithm, we calculate
+ *<p>
+ * erf(x) = x* y(t) for |x| < 2 <br>
+ * erf(x) = 1 - exp(-x*x) * y(t) / x for x >= 2 <br>
+ * erfc(|x|) = exp(-x*x)*y(|x|) <br>
+ *<p>
+ * The functions y(x) are expanded in terms of Chebyshev polynomials, where
+ * there is a different set of coefficients a[r] for each of the above 3 cases.
+ *<p>
+ * y(x) = Sum'( a[r] * T(r, t) )
+ *<p>
+ * The notation Sum' indicates that the r = 0 term is divided by 2.
+ *<p>
+ * The variable t is defined as
+ *<p>
+ * t = ( x*x - 2 ) / 2 for erf(x) with x < 2 <br>
+ * t = ( 21 - 2*x*x ) / (5 + 2*x*x) for erf(x) with x >= 2 <br>
+ * t = ( 4*|x| - 15 ) / ( 4*|x| + 15 ) for erfc(x)
+ *<p>
+ * The code and implementation are based on Alan Genz's FORTRAN source code
+ * that can be found at http://www.math.wsu.edu/faculty/genz/homepage.
+ *<p>
+ * Genz's code was a bit tricky to "reverse engineer", so we go through the
+ * way these calculations are performed in some detail. Rather than calculate
+ * y(x) directly, he calculates
+ *<p>
+ * bm = Sum( a[r] * U(r, t) ) r = 0 : N <br>
+ * bp = Sum( a[r] * U(r-2, t) ) r = 2 : N
+ *<p>
+ * where U(r, t) are Chebyshev polynomials of the second kind. The coefficients
+ * a[r] decrease with r, and the value of N is chosen where a[N] / a[0] is
+ * ~10^-16, reflecting the machine precision for doubles.
+ *<p>
+ * The Chebyshev polynomials of the second kind U(r, t) are calculated using the
+ * recursion relation:
+ *<p>
+ * U(r, t) = 2 * t * U(r-1, t) - U(r-2, t)
+ *<p>
+ * Genz uses the identity
+ *<p>
+ * T(r, t) = ( U(r, t) - U(r-2, t) ) / 2
+ *<p>
+ * to calculate y(x)
+ *<p>
+ * y(x) = ( bm - bp ) / 2.
+ *<p>
+ * Note that we get the correct contributions for the r = 0 and r = 1 terms by
+ * ignoring these terms in the bp sum, including getting the desired factor
+ * of 1/2 in the contribution from the r = 0 term.
+ *
+ * @author Richard Partridge
+ */
+public class Erf {
+
+ private static double rtwo = 1.414213562373095048801688724209e0;
+
+ // Coefficients for the erf(x) calculation with |x| < 2
+ private static double[] a1 = {
+ 1.483110564084803581889448079057e0,
+ -3.01071073386594942470731046311e-1,
+ 6.8994830689831566246603180718e-2,
+ -1.3916271264722187682546525687e-2,
+ 2.420799522433463662891678239e-3,
+ -3.65863968584808644649382577e-4,
+ 4.8620984432319048282887568e-5,
+ -5.749256558035684835054215e-6,
+ 6.11324357843476469706758e-7,
+ -5.8991015312958434390846e-8,
+ 5.207009092068648240455e-9,
+ -4.23297587996554326810e-10,
+ 3.1881135066491749748e-11,
+ -2.236155018832684273e-12,
+ 1.46732984799108492e-13,
+ -9.044001985381747e-15,
+ 5.25481371547092e-16};
+
+ // Coefficients for the err(x) calculation with x > 2
+ private static double[] a2 = {
+ 1.077977852072383151168335910348e0,
+ -2.6559890409148673372146500904e-2,
+ -1.487073146698099509605046333e-3,
+ -1.38040145414143859607708920e-4,
+ -1.1280303332287491498507366e-5,
+ -1.172869842743725224053739e-6,
+ -1.03476150393304615537382e-7,
+ -1.1899114085892438254447e-8,
+ -1.016222544989498640476e-9,
+ -1.37895716146965692169e-10,
+ -9.369613033737303335e-12,
+ -1.918809583959525349e-12,
+ -3.7573017201993707e-14,
+ -3.7053726026983357e-14,
+ 2.627565423490371e-15,
+ -1.121322876437933e-15,
+ 1.84136028922538e-16};
+
+ // Coefficients for the erfc(x) calculation
+ private static double[] a3 = {
+ 6.10143081923200417926465815756e-1,
+ -4.34841272712577471828182820888e-1,
+ 1.76351193643605501125840298123e-1,
+ -6.0710795609249414860051215825e-2,
+ 1.7712068995694114486147141191e-2,
+ -4.321119385567293818599864968e-3,
+ 8.54216676887098678819832055e-4,
+ -1.27155090609162742628893940e-4,
+ 1.1248167243671189468847072e-5,
+ 3.13063885421820972630152e-7,
+ -2.70988068537762022009086e-7,
+ 3.0737622701407688440959e-8,
+ 2.515620384817622937314e-9,
+ -1.028929921320319127590e-9,
+ 2.9944052119949939363e-11,
+ 2.6051789687266936290e-11,
+ -2.634839924171969386e-12,
+ -6.43404509890636443e-13,
+ 1.12457401801663447e-13,
+ 1.7281533389986098e-14,
+ -4.264101694942375e-15,
+ -5.45371977880191e-16,
+ 1.58697607761671e-16,
+ 2.0899837844334e-17,
+ -5.900526869409e-18};
+
+ /**
+ * Calculate the error function
+ *
+ * @param x argument
+ * @return error function
+ */
+ public static double erf(double x) {
+
+ // Initialize
+ double xa = Math.abs(x);
+ double erf;
+
+ // Case 1: |x| < 2
+ if (xa < 2.) {
+
+ // First calculate 2*t
+ double tt = x*x - 2.;
+
+ // Initialize the recursion variables.
+ double bm = 0.;
+ double b = 0.;
+ double bp = 0.;
+
+ // Calculate bm and bp as defined above
+ for (int i = 16; i >= 0; i--) {
+ bp = b;
+ b = bm;
+ bm = tt * b - bp + a1[i];
+ }
+
+ // Finally, calculate erfc using the Chebyshev polynomial identity
+ erf = x * (bm - bp) / 2.;
+
+ } else {
+
+ // Case 2: |x| >= 2
+
+ // First calculate 2*t
+ double tt = (42. - 4 * xa*xa) / (5. + 2 * xa*xa);
+
+ // Initialize the recursion variables.
+ double bm = 0.;
+ double b = 0.;
+ double bp = 0.;
+
+ // Calculate bm and bp as defined above
+ for (int i = 16; i >= 0; i--) {
+ bp = b;
+ b = bm;
+ bm = tt * b - bp + a2[i];
+ }
+
+ // Finally, calculate erfc using the Chebyshev polynomial identity
+ erf = 1. - Math.exp(-x * x) * (bm - bp) / (2. * xa);
+
+ // Take care of negative argument for case 2
+ if (x < 0.) erf = -erf;
+ }
+
+ // Finished both cases!
+ return erf;
+ }
+
+ /**
+ * Calculate the error function complement
+ * @param x argument
+ * @return error function complement
+ */
+ public static double erfc(double x) {
+
+ // Initialize
+ double xa = Math.abs(x);
+ double erfc;
+
+ // Set phi to 0 when the argument is too big
+ if (xa > 100.) {
+ erfc = 0.;
+ } else {
+
+ // First calculate 2*t
+ double tt = (8. * xa - 30.) / (4. * xa + 15.);
+
+ // Initialize the recursion variables.
+ double bm = 0.;
+ double b = 0.;
+ double bp = 0.;
+
+ // Calculate bm and bp as defined above
+ for (int i = 24; i >= 0; i--) {
+ bp = b;
+ b = bm;
+ bm = tt * b - bp + a3[i];
+ }
+
+ // Finally, calculate erfc using the Chebyshev polynomial identity
+ erfc = Math.exp(-x * x) * (bm - bp) / 2.;
+ }
+
+ // Cacluate erfc for negative arguments
+ if (x < 0.) erfc = 2. - erfc;
+
+ return erfc;
+ }
+
+ /**
+ * Calcualate the probability for an observation smaller than x for a
+ * Gaussian probability distribution with zero mean and unit standard
+ * deviation
+ *
+ * @param x argument
+ * @return probability integral
+ */
+ public static double phi(double x) {
+ return 0.5 * erfc( -x / rtwo);
+ }
+
+ /**
+ * Calculate the probability for an observation larger than x for a
+ * Gaussian probability distribution with zero mean and unit standard
+ * deviation
+ *
+ * @param x argument
+ * @return probability integral
+ */
+ public static double phic(double x) {
+ return 0.5 * erfc(x / rtwo);
+ }
+}
+