LISTSERV 16.5 - LCD-CVS Archives

lcsim-math/src/main/java/org/lcsim/util/probability

BivariateDistribution.java added at 1.1

diff -N BivariateDistribution.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ BivariateDistribution.java	30 Nov 2010 23:53:24 -0000	1.1
@@ -0,0 +1,260 @@

+/*
+ *  Class BivariateDistribution
+ */
+package org.lcsim.util.probability;
+
+import org.lcsim.util.probability.Erf;
+
+/**
+ * Calculate the probability integral for a set of bins in the x-y plane
+ * of a bivariate normal distribution (i.e., a 2D Gaussian probability).
+ *<p>
+ * The evaluation of the probability integrals is described in:
+ *<p>
+ * Alan Genz, "Numerical Computation of Rectangular Bivariate and Trivariate
+ * Normal and t Probabilities" in Statistics and Computing 14, 151 (2004).
+ *<p>
+ * The integration code is adapted from the FORTRAN source at:
+ *<p>
+ *   http://www.math.wsu.edu/faculty/genz/homepage
+ *<p>
+ * @author Richard Partridge
+ */
+public class BivariateDistribution {
+
+    private int _nx;
+    private int _ny;
+    private double _xmin;
+    private double _ymin;
+    private double _dx;
+    private double _dy;
+    private double[] _h;
+    private double[] _k;
+
+    //  Weights and coordinates for 6 point Gauss-Legendre integration
+    private double[] _w6 = {0.1713244923791705, 0.3607615730481384, 0.4679139345726904};
+    private double[] _x6 = {0.9324695142031522, 0.6612093864662647, 0.2386191860831970};
+
+    //  Weights and coordinates for 12 point Gauss-Legendre integration
+    private double[] _w12 = {.04717533638651177, 0.1069393259953183, 0.1600783285433464,
+        0.2031674267230659, 0.2334925365383547, 0.2491470458134029};
+    private double[] _x12 = {0.9815606342467191, 0.9041172563704750, 0.7699026741943050,
+        0.5873179542866171, 0.3678314989981802, 0.1252334085114692};
+
+    //  Weights and coordinates for 20 point Gauss-Legendre integration
+    private double[] _w20 = {.01761400713915212, .04060142980038694, .06267204833410906,
+        .08327674157670475, 0.1019301198172404, 0.1181945319615184,
+        0.1316886384491766, 0.1420961093183821, 0.1491729864726037,
+        0.1527533871307259};
+    private double[] _x20 = {0.9931285991850949, 0.9639719272779138, 0.9122344282513259,
+        0.8391169718222188, 0.7463319064601508, 0.6360536807265150,
+        0.5108670019508271, 0.3737060887154196, 0.2277858511416451,
+        0.07652652113349733};
+
+    /**
+     * Set the locations of the x-coordinate bins
+     *
+     * @param nx number of x coordinate bins
+     * @param xmin minimum x coordinate
+     * @param dx width of x coordinate bins
+     */
+    public void xBins(int nx, double xmin, double dx) {
+        _nx = nx;
+        _xmin = xmin;
+        _dx = dx;
+        _h = new double[_nx + 1];
+    }
+
+    /**
+     * Set the locations of the y-coordinate bins
+     *
+     * @param ny number of y coordinate bins
+     * @param ymin minimum y coordinate
+     * @param dy width of y coordinate bins
+     */
+    public void yBins(int ny, double ymin, double dy) {
+        _ny = ny;
+        _ymin = ymin;
+        _dy = dy;
+        _k = new double[_ny + 1];
+    }
+
+    /**
+     * Integrate the Gaussian probability distribution over each x-y bins,
+     * which must be defined before calling this method.
+     * <p>
+     * The output is a double array that gives the binned probability
+     * distribution.  The first array index is used to indicate the bin in x
+     * and the second array index is used to indicate the bin in y.
+     * <p>
+     * @param x0 mean x coordinate of Gaussian distribution
+     * @param y0 mean y coordinate of Gaussian distribution
+     * @param sigx x coordinate standard deviation
+     * @param sigy y coordinate standard deviation
+     * @param rho x-y correlation coefficient
+     * @return probability distribution
+     */
+    public double[][] Calculate(double x0, double y0, double sigx, double sigy,
+            double rho) {
+
+        //  Calculate the scaled x coordinate for each bin edge
+        for (int i = 0; i < _nx + 1; i++) {
+            _h[i] = (_xmin + i * _dx - x0) / sigx;
+        }
+
+        //  Calculate the scaled y coordinate for each bin edge
+        for (int j = 0; j < _ny + 1; j++) {
+            _k[j] = (_ymin + j * _dy - y0) / sigy;
+        }
+
+        //  Create the array that will hold the binned probabilities
+        double[][] bi = new double[_nx][_ny];
+
+        //  Loop over the bin vertices
+        for (int i = 0; i < _nx + 1; i++) {
+            for (int j = 0; j < _ny + 1; j++) {
+
+                //  Calculate the probability for x>h and y>k for this vertex
+                double prob = GenzCalc(_h[i], _k[j], rho);
+
+                //  Add or subtract this probability from the affected bins.
+                //  The bin probability for bin (0,0) is the sum of the Genz
+                //  probabilities for the (0,0) and (1,1) vertices MINUS the
+                //  sum of the probabilities for the (0,1) and (1,0) vertices
+                if (i > 0 && j > 0) {
+                    bi[i - 1][j - 1] += prob;
+                }
+                if (i > 0 && j < _ny) {
+                    bi[i - 1][j] -= prob;
+                }
+                if (i < _nx && j > 0) {
+                    bi[i][j - 1] -= prob;
+                }
+                if (i < _nx && j < _ny) {
+                    bi[i][j] += prob;
+                }
+            }
+        }
+
+        return bi;
+    }
+
+    private double GenzCalc(double dh, double dk, double rho) {
+
+        double twopi = 2. * Math.PI;
+
+        //  Declare the Gauss-Legendre constants
+        int ng;
+        double[] w;
+        double[] x;
+
+        if (Math.abs(rho) < 0.3) {
+            //  for rho < 0.3 use 6 point Gauss-Legendre integration
+            ng = 3;
+            w = _w6;
+            x = _x6;
+        } else if (Math.abs(rho) < 0.75) {
+            //  for 0.3 < rho < 0.75 use 12 point Gauss-Legendre integration
+            ng = 6;
+            w = _w12;
+            x = _x12;
+        } else {
+            //  for rho > 0.75 use 20 point Gauss-Legendre integration
+            ng = 10;
+            w = _w20;
+            x = _x20;
+        }
+
+        //  Initialize the probability and some local variables
+        double bvn = 0.;
+        double h = dh;
+        double k = dk;
+        double hk = h * k;
+
+        //  For rho < 0.925, integrate equation 3 in the Genz paper
+        if (Math.abs(rho) < 0.925) {
+
+            //  More or less direct port of Genz code follows
+            //  It is fairly easy to match this calculation against equation 3 of
+            //  Genz's paper if you take into account that you need to change
+            //  variables so the integration argument spans the range -1 to 1
+            double hs = (h * h + k * k) / 2.;
+            double asr = Math.asin(rho);
+            double sn;
+            for (int i = 0; i < ng; i++) {
+                sn = Math.sin(asr * (1 - x[i]) / 2.);
+                bvn += w[i] * Math.exp((sn * hk - hs) / (1 - sn * sn));
+                sn = Math.sin(asr * (1 + x[i]) / 2.);
+                bvn += w[i] * Math.exp((sn * hk - hs) / (1 - sn * sn));
+            }
+            //  The factor of asr/2 comes from changing variables so the
+            //  integration is over the range -1 to 1 instead of 0 - asin(rho)
+            bvn = bvn * asr / (2. * twopi) + Erf.phi(-h) * Erf.phi(-k);
+
+        } else {
+            //  rho > 0.925 - integrate equation 6 in Genz paper with the
+            //  extra term in the Taylor expansion given in equation 7.
+            //  The rest of this code is pretty dense and is a pretty direct
+            //  port of Genz's code.
+
+            if (rho < 0.) {
+                k = -k;
+                hk = -hk;
+            }
+
+            if (Math.abs(rho) < 1.) {
+
+                double as = (1 - rho) * (1 + rho);
+                double a = Math.sqrt(as);
+                double bs = (h - k) * (h - k);
+                double c = (4. - hk) / 8.;
+                double d = (12. - hk) / 16.;
+                double asr = -(bs / as + hk) / 2.;
+
+                if (asr > -100.) {
+                    bvn = a * Math.exp(asr) *
+                            (1. - c * (bs - as) * (1. - d * bs / 5.) / 3. +
+                            c * d * as * as / 5.);
+                }
+
+                if (-hk < 100.) {
+                    double b = Math.sqrt(bs);
+                    bvn -= Math.exp(-hk / 2.) * Math.sqrt(twopi) * Erf.phi(-b / a) *
+                            b * (1 - c * bs * (1 - d * bs / 5.) / 3.);
+                }
+
+                a = a / 2.;
+                for (int i = 0; i < ng; i++) {
+                    for (int j = 0; j < 2; j++) {
+                        int is = -1;
+                        if (j > 0) {
+                            is = 1;
+                        }
+                        double xs = Math.pow(a * (is * x[i] + 1), 2);
+                        double rs = Math.sqrt(1 - xs);
+                        asr = -(bs / xs + hk) / 2;
+
+                        if (asr > -100) {
+                            double sp = (1 + c * xs * (1 + d * xs));
+                            double ep = Math.exp(-hk * (1 - rs) / (2 * (1 + rs))) / rs;
+                            bvn += a * w[i] * Math.exp(asr) * (ep - sp);
+                        }
+                    }
+                }
+
+                bvn = -bvn / twopi;
+            }
+
+            if (rho > 0) {
+                bvn = bvn + Erf.phi(-Math.max(h, k));
+            } else {
+                bvn = -bvn;
+                if (k > h) {
+                    bvn += Erf.phi(k) - Erf.phi(h);
+                }
+            }
+        }
+         
+        return Math.max(0, Math.min(1, bvn));
+    }
+}

\ No newline at end of file

lcsim-math/src/main/java/org/lcsim/util/probability

Erf.java added at 1.1

diff -N Erf.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ Erf.java	30 Nov 2010 23:53:25 -0000	1.1
@@ -0,0 +1,283 @@

+/*
+ *  Class Erf
+ * 
+ */
+package org.lcsim.util.probability;
+
+/**
+ *
+ * Calculates the following probability integrals:
+ *<p>
+ *   erf(x) <br>
+ *   erfc(x) = 1 - erf(x) <br>
+ *   phi(x) = 0.5 * erfc(-x/sqrt(2)) <br>
+ *   phic(x) = 0.5 * erfc(x/sqrt(2))
+ *<p>
+ * Note that phi(x) gives the probability for an observation smaller than x for
+ * a Gaussian probability distribution with zero mean and unit standard
+ * deviation, while phic(x) gives the probability for an observation larger
+ * than x.
+ *<p>
+ * The algorithms for erf(x) and erfc(x) are based on Schonfelder's work.
+ * See J.L. Schonfelder, "Chebyshev Expansions for the Error and Related
+ * Functions", Math. Comp. 32, 1232 (1978).  The calculations of phi(x)
+ * and phic(x) are trivially calculated using the erfc(x) algorithm.
+ *<p>
+ * Schonfelder's algorithms are advertised to provide "30 digit" accurracy.
+ * Since this level of accuracy exceeds the machine precision for doubles,
+ * summation terms whose relative weight is below machine precision are
+ * dropped.
+ *<p>
+ * In this algorithm, we calculate
+ *<p>
+ *   erf(x) = x* y(t) for |x| < 2 <br>
+ *   erf(x) = 1 - exp(-x*x) * y(t) / x for x >= 2 <br>
+ *   erfc(|x|) = exp(-x*x)*y(|x|) <br>
+ *<p>
+ * The functions y(x) are expanded in terms of Chebyshev polynomials, where
+ * there is a different set of coefficients a[r] for each of the above 3 cases.
+ *<p>
+ *   y(x) = Sum'( a[r] * T(r, t) )
+ *<p>
+ * The notation Sum' indicates that the r = 0 term is divided by 2.
+ *<p>
+ * The variable t is defined as
+ *<p>
+ *   t = ( x*x - 2 ) / 2   for erf(x) with x < 2 <br>
+ *   t = ( 21 - 2*x*x ) / (5 + 2*x*x)   for erf(x) with x >= 2 <br>
+ *   t = ( 4*|x| - 15 ) / ( 4*|x| + 15 )   for erfc(x)
+ *<p>
+ * The code and implementation are based on Alan Genz's FORTRAN source code
+ * that can be found at http://www.math.wsu.edu/faculty/genz/homepage.
+ *<p>
+ * Genz's code was a bit tricky to "reverse engineer", so we go through the
+ * way these calculations are performed in some detail.  Rather than calculate
+ * y(x) directly,  he calculates
+ *<p>
+ *   bm = Sum( a[r] * U(r, t) )    r = 0 : N <br>
+ *   bp = Sum( a[r] * U(r-2, t) )  r = 2 : N
+ *<p>
+ * where U(r, t) are Chebyshev polynomials of the second kind.  The coefficients
+ * a[r] decrease with r, and the value of N is chosen where a[N] / a[0] is
+ * ~10^-16, reflecting the machine precision for doubles.
+ *<p>
+ * The Chebyshev polynomials of the second kind U(r, t) are calculated using the
+ * recursion relation:
+ *<p>
+ *   U(r, t) = 2 * t * U(r-1, t) - U(r-2, t)
+ *<p>
+ * Genz uses the identity
+ *<p>
+ *   T(r, t) = ( U(r, t) - U(r-2, t) ) / 2
+ *<p>
+ * to calculate y(x)
+ *<p>
+ *   y(x) = ( bm - bp ) / 2.
+ *<p>
+ * Note that we get the correct contributions for the r = 0 and r = 1 terms by
+ * ignoring these terms in the bp sum, including getting the desired factor
+ * of 1/2 in the contribution from the r = 0 term.
+ *
+ * @author Richard Partridge
+ */
+public class Erf {
+
+    private static double rtwo = 1.414213562373095048801688724209e0;
+
+    //  Coefficients for the erf(x) calculation with |x| < 2
+    private static double[] a1 = {
+        1.483110564084803581889448079057e0,
+        -3.01071073386594942470731046311e-1,
+        6.8994830689831566246603180718e-2,
+        -1.3916271264722187682546525687e-2,
+        2.420799522433463662891678239e-3,
+        -3.65863968584808644649382577e-4,
+        4.8620984432319048282887568e-5,
+        -5.749256558035684835054215e-6,
+        6.11324357843476469706758e-7,
+        -5.8991015312958434390846e-8,
+        5.207009092068648240455e-9,
+        -4.23297587996554326810e-10,
+        3.1881135066491749748e-11,
+        -2.236155018832684273e-12,
+        1.46732984799108492e-13,
+        -9.044001985381747e-15,
+        5.25481371547092e-16};
+
+    //  Coefficients for the err(x) calculation with x > 2
+    private static double[] a2 = {
+      1.077977852072383151168335910348e0,
+      -2.6559890409148673372146500904e-2,
+      -1.487073146698099509605046333e-3,
+      -1.38040145414143859607708920e-4,
+      -1.1280303332287491498507366e-5,
+      -1.172869842743725224053739e-6,
+      -1.03476150393304615537382e-7,
+      -1.1899114085892438254447e-8,
+      -1.016222544989498640476e-9,
+      -1.37895716146965692169e-10,
+      -9.369613033737303335e-12,
+      -1.918809583959525349e-12,
+      -3.7573017201993707e-14,
+      -3.7053726026983357e-14,
+      2.627565423490371e-15,
+      -1.121322876437933e-15,
+      1.84136028922538e-16};
+
+    //  Coefficients for the erfc(x) calculation
+    private static double[] a3 = {
+        6.10143081923200417926465815756e-1,
+        -4.34841272712577471828182820888e-1,
+        1.76351193643605501125840298123e-1,
+        -6.0710795609249414860051215825e-2,
+        1.7712068995694114486147141191e-2,
+        -4.321119385567293818599864968e-3,
+        8.54216676887098678819832055e-4,
+        -1.27155090609162742628893940e-4,
+        1.1248167243671189468847072e-5,
+        3.13063885421820972630152e-7,
+        -2.70988068537762022009086e-7,
+        3.0737622701407688440959e-8,
+        2.515620384817622937314e-9,
+        -1.028929921320319127590e-9,
+        2.9944052119949939363e-11,
+        2.6051789687266936290e-11,
+        -2.634839924171969386e-12,
+        -6.43404509890636443e-13,
+        1.12457401801663447e-13,
+        1.7281533389986098e-14,
+        -4.264101694942375e-15,
+        -5.45371977880191e-16,
+        1.58697607761671e-16,
+        2.0899837844334e-17,
+        -5.900526869409e-18};
+
+    /**
+     * Calculate the error function
+     * 
+     * @param x argument
+     * @return error function
+     */
+    public static double erf(double x) {
+
+        //  Initialize
+        double xa = Math.abs(x);
+        double erf;
+
+        //  Case 1: |x| < 2
+        if (xa < 2.) {
+
+            //  First calculate 2*t
+            double tt = x*x - 2.;
+
+            //  Initialize the recursion variables.
+            double bm = 0.;
+            double b = 0.;
+            double bp = 0.;
+
+            //  Calculate bm and bp as defined above
+            for (int i = 16; i >= 0; i--) {
+                bp = b;
+                b = bm;
+                bm = tt * b - bp + a1[i];
+            }
+
+            //  Finally, calculate erfc using the Chebyshev polynomial identity
+            erf = x * (bm - bp) / 2.;
+
+        } else {
+
+            //  Case 2: |x| >= 2
+
+            //  First calculate 2*t
+            double tt = (42. - 4 * xa*xa) / (5. + 2 * xa*xa);
+
+            //  Initialize the recursion variables.
+            double bm = 0.;
+            double b = 0.;
+            double bp = 0.;
+
+            //  Calculate bm and bp as defined above
+            for (int i = 16; i >= 0; i--) {
+                bp = b;
+                b = bm;
+                bm = tt * b - bp + a2[i];
+            }
+
+            //  Finally, calculate erfc using the Chebyshev polynomial identity
+            erf = 1. - Math.exp(-x * x) * (bm - bp) / (2. * xa);
+
+            //  Take care of negative argument for case 2
+            if (x < 0.) erf = -erf;
+        }
+
+        //  Finished both cases!
+        return erf;
+    }
+
+    /**
+     * Calculate the error function complement
+     * @param x argument
+     * @return error function complement
+     */
+    public static double erfc(double x) {
+
+        //  Initialize
+        double xa = Math.abs(x);
+        double erfc;
+
+        //  Set phi to 0 when the argument is too big
+        if (xa > 100.) {
+            erfc = 0.;
+        } else {
+
+            //  First calculate 2*t
+            double tt = (8. * xa - 30.) / (4. * xa + 15.);
+
+            //  Initialize the recursion variables.
+            double bm = 0.;
+            double b = 0.;
+            double bp = 0.;
+
+            //  Calculate bm and bp as defined above
+            for (int i = 24; i >= 0; i--) {
+                bp = b;
+                b = bm;
+                bm = tt * b - bp + a3[i];
+            }
+
+            //  Finally, calculate erfc using the Chebyshev polynomial identity
+            erfc = Math.exp(-x * x) * (bm - bp) / 2.;
+        }
+
+        //  Cacluate erfc for negative arguments
+        if (x < 0.) erfc = 2. - erfc;
+
+        return erfc;
+    }
+
+    /**
+     * Calcualate the probability for an observation smaller than x for a
+     * Gaussian probability distribution with zero mean and unit standard
+     * deviation
+     * 
+     * @param x argument
+     * @return probability integral
+     */
+    public static double phi(double x) {
+        return 0.5 * erfc( -x / rtwo);
+    }
+
+    /**
+     * Calculate the probability for an observation larger than x for a
+     * Gaussian probability distribution with zero mean and unit standard
+     * deviation
+     *
+     * @param x argument
+     * @return probability integral
+     */
+    public static double phic(double x) {
+        return 0.5 * erfc(x / rtwo);
+    }
+}
+

Commit in `lcsim-math/src/main/java/org/lcsim/util/probability` on MAIN
`BivariateDistribution.java`	+260	added 1.1
`Erf.java`	+283	added 1.1
	+543